diff --git a/modules/cannarithm/include/opencv2/acl_stream_accessor.hpp b/modules/cannarithm/include/opencv2/acl_stream_accessor.hpp deleted file mode 100644 index 27118d807e3..00000000000 --- a/modules/cannarithm/include/opencv2/acl_stream_accessor.hpp +++ /dev/null @@ -1,40 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -#ifndef OPENCV_CANN_STREAM_ACCESSOR_HPP -#define OPENCV_CANN_STREAM_ACCESSOR_HPP - -#include -#include "opencv2/cann.hpp" - -namespace cv -{ -namespace cann -{ - -//! @addtogroup cann_struct -//! @{ - -/** @brief Class that enables getting aclrtAclStream from cann::AclStream - */ -struct AclStreamAccessor -{ - CV_EXPORTS static aclrtStream getStream(const AclStream& stream); - CV_EXPORTS static AclStream wrapStream(aclrtStream stream); -}; - -/** @brief Class that enables getting aclrtAclEvent from cann::AclEvent - */ -struct AclEventAccessor -{ - CV_EXPORTS static aclrtEvent getEvent(const AclEvent& event); - CV_EXPORTS static AclEvent wrapEvent(aclrtEvent event); -}; - -//! @} cann_struct - -} // namespace cann -} // namespace cv - -#endif // OPENCV_CANN_STREAM_ACCESSOR_HPP diff --git a/modules/cannarithm/include/opencv2/cann.hpp b/modules/cannarithm/include/opencv2/cann.hpp deleted file mode 100644 index 6b79f045c0e..00000000000 --- a/modules/cannarithm/include/opencv2/cann.hpp +++ /dev/null @@ -1,335 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -#ifndef OPENCV_CANN_HPP -#define OPENCV_CANN_HPP - -#include "opencv2/core.hpp" - -/** - @defgroup cann Ascend-accelerated Computer Vision - @{ - @defgroup canncore Core part - @{ - @defgroup cann_struct Data Structures - @defgroup cann_init Initializeation and Information - @} - @} - */ - -namespace cv -{ -namespace cann -{ -class AclStream; - -//! @addtogroup cann_struct -//! @{ - -//=================================================================================== -// AclMat -//=================================================================================== - -/** @brief Base storage class for NPU memory with reference counting. - * AclMat class has a similar interface with Mat and AclMat, and work on [Ascend - * NPU](https://www.hiascend.com/) backend. - * @sa Mat cuda::GpuMat - */ - -class CV_EXPORTS_W AclMat -{ -public: - class CV_EXPORTS_W Allocator - { - public: - virtual ~Allocator() {} - - // allocator must fill data, step and refcount fields - virtual bool allocate(AclMat* mat, int rows, int cols, size_t elemSize) = 0; - virtual void free(AclMat* mat) = 0; - }; - - /** - * @brief Create default allocator for AclMat. This allocator alloc memory from device for - * specific size. - */ - CV_WRAP static AclMat::Allocator* defaultAllocator(); - - /** - * @brief Set allocator for AclMat. - * @param allocator - */ - CV_WRAP static void setDefaultAllocator(AclMat::Allocator* allocator); - - //! default constructor - CV_WRAP explicit AclMat(AclMat::Allocator* allocator_ = AclMat::defaultAllocator()); - - //! constructs AclMat of the specified size and type - CV_WRAP AclMat(int rows, int cols, int type, - AclMat::Allocator* allocator = AclMat::defaultAllocator()); - //! constructs AclMat of the specified size and type - CV_WRAP AclMat(Size size, int type, AclMat::Allocator* allocator = AclMat::defaultAllocator()); - - //! constructs AclMat and fills it with the specified value s - CV_WRAP AclMat(int rows, int cols, int type, Scalar& s, - AclMat::Allocator* allocator = AclMat::defaultAllocator()); - //! constructs AclMat and fills it with the specified value s - CV_WRAP AclMat(Size size, int type, Scalar& s, - AclMat::Allocator* allocator = AclMat::defaultAllocator()); - - //! copy constructor - CV_WRAP AclMat(const AclMat& m); - - //! constructor for AclMat headers pointing to user-allocated data - AclMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP); - //! constructor for AclMat headers pointing to user-allocated data - AclMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP); - - //! builds AclMat from host memory (Blocking call) - CV_WRAP explicit AclMat(InputArray arr, - AclMat::Allocator* allocator = AclMat::defaultAllocator()); - - //! assignment operators - AclMat& operator=(const AclMat& m); - - //! destructor - calls release() - ~AclMat(); - - //! sets some of the AclMat elements to s (Blocking call) - CV_WRAP AclMat& setTo(Scalar s); - //! sets some of the AclMat elements to s (Non-Blocking call) - CV_WRAP AclMat& setTo(Scalar s, AclStream& stream); - - //! swaps with other smart pointer - CV_WRAP void swap(AclMat& mat); - - //! allocates new AclMat data unless the AclMat already has specified size and type - CV_WRAP void create(int rows, int cols, int type); - - //! upload host memory data to AclMat (Blocking call) - CV_WRAP void upload(InputArray arr); - //! upload host memory data to AclMat (Non-Blocking call) - CV_WRAP void upload(InputArray arr, AclStream& stream); - - //! download data from AclMat to host (Blocking call) - CV_WRAP void download(OutputArray dst) const; - //! download data from AclMat to host (Non-Blocking call) - CV_WRAP void download(OutputArray dst, AclStream& stream) const; - - //! converts AclMat to another datatype (Blocking call) - CV_WRAP void convertTo(CV_OUT AclMat& dst, int rtype) const; - - //! converts AclMat to another datatype (Non-Blocking call) - CV_WRAP void convertTo(CV_OUT AclMat& dst, int rtype, AclStream& stream) const; - - //! decreases reference counter, deallocate the data when reference counter reaches 0 - CV_WRAP void release(); - - //! returns element size in bytes - CV_WRAP size_t elemSize() const; - - //! returns the size of element channel in bytes - CV_WRAP size_t elemSize1() const; - - //! returns element type - CV_WRAP int type() const; - - //! returns element type - CV_WRAP int depth() const; - - //! returns number of channels - CV_WRAP int channels() const; - - //! returns step/elemSize1() - CV_WRAP size_t step1() const; - - //! returns AclMat size : width == number of columns, height == number of rows - CV_WRAP Size size() const; - - //! returns true if AclMat data is NULL - CV_WRAP bool empty() const; - - //! internal use method: updates the continuity flag - CV_WRAP void updateContinuityFlag(); - - //! expand one channel mat to multi-channels (Blocking call) - //! @note, source mat must only have one channel, copy value to all channels. - CV_WRAP void expandTo(CV_OUT AclMat& dst, int channels) const; - - //! expand one channel mat to multi-channels (Non-Blocking call) - //! @note, source mat must only have one channel, copy value to all channels. - CV_WRAP void expandTo(CV_OUT AclMat& dst, int channels, AclStream& stream) const; - - /*! includes several bit-fields: - - the magic signature - - continuity flag - - depth - - number of channels - */ - int flags; - - //! the number of rows and columns - int rows, cols; - - //! a distance between successive rows in bytes; includes the gap if any - CV_PROP size_t step; - - //! pointer to the data - uchar* data; - - //! pointer to the reference counter; - //! when AclMat points to user-allocated data, the pointer is NULL - int* refcount; - - //! helper fields used in locateROI and adjustROI - uchar* datastart; - const uchar* dataend; - - //! allocator - Allocator* allocator; -}; - -class AclStream; -class AclStreamAccessor; -class AclEvent; -class AclEventAccessor; -class DefaultDeviceInitializer; - -//=================================================================================== -// AclStream -//=================================================================================== - -/** @brief In AscendCL Stream(AclStream) is a task queue. Stream is used to manage the parallelism - * of tasks. The tasks inside a Stream are executed sequentially, that is, the Stream executes - * sequentially according to the sent tasks; the tasks in different Streams are executed in - * parallel. - * - * All Non-blocking functions should pass parameter stream, These function returns immediately after - * the task is submitted. Caller should wait stream until completion. - * - * Blocking functions implicityly use the default stream, and synchronize stream before function - * return. - * @sa cuda::Stream - */ - -// TODO: Stream is defined in namespace cuda, and pybind code does not use a namespace of stream, -// change stream name to AclStream to avoid confilct. -class CV_EXPORTS_W AclStream -{ -public: - CV_WRAP AclStream(); - - //! blocks the current CPU thread until all operations in the stream are complete. - CV_WRAP void waitForCompletion(); - - //! blocks the current CPU thread until event trigger. - CV_WRAP void waitAclEvent(const cv::cann::AclEvent& event); - - /** - * @brief return default AclStream object for default Acl stream. - */ - CV_WRAP static AclStream& Null(); - - // acl symbols CANNOT used in any hpp files. Use a inner class to avoid acl symbols defined in - // hpp. - class Impl; - - // add temporary mat for async release. - void addToAsyncRelease(const AclMat& mat); - -private: - Ptr impl_; - AclStream(const Ptr& impl); - - friend class AclStreamAccessor; - friend class DefaultDeviceInitializer; -}; - -/** - * @brief AclEvent to synchronize between different streams. - */ -class CV_EXPORTS_W AclEvent -{ -public: - CV_WRAP AclEvent(); - - //! records an event - CV_WRAP void record(AclStream& stream = AclStream::Null()); - - //! waits for an event to complete - CV_WRAP void waitForComplete() const; - - class Impl; - -private: - Ptr impl_; - AclEvent(const Ptr& impl); - - friend class AclEventAccessor; -}; - -/** @brief Bindings overload to create a Stream object from the address stored in an existing CANN - * Runtime API stream pointer (aclrtStream). - * @param aclStreamAddress Memory address stored in a CANN Runtime API stream pointer - * (aclrtStream). The created Stream object does not perform any allocation or deallocation and simply - * wraps existing raw CANN Runtime API stream pointer. - * @note Overload for generation of bindings only, not exported or intended for use internally fro C++. - */ -CV_EXPORTS_W AclStream wrapStream(size_t aclStreamAddress); - -//! @} cann_struct - -//=================================================================================== -// Initialization & Info -//=================================================================================== - -//! @addtogroup cann_init -//! @{ - -//! Get Ascend matrix object from Input array, upload matrix memory if need. (Blocking call) -AclMat getInputMat(InputArray src); -//! Get Ascend matrix object from Input array, upload matrix memory if need. (Non-Blocking call) -AclMat getInputMat(InputArray src, AclStream& stream); - -//! Get Ascend matrix object from Output array, upload matrix memory if need. -AclMat getOutputMat(OutputArray dst, int rows, int cols, int type); - -//! Sync output matrix to Output array, download matrix memory if need. -void syncOutput(const AclMat& dst, OutputArray _dst); - -/** - * @brief Choose Ascend npu device. - */ -CV_EXPORTS_W void setDevice(int device); - -/** - * @brief Clear all context created in current Ascend device. - */ -CV_EXPORTS_W void resetDevice(); - -/** - * @brief Get current Ascend device. - */ -CV_EXPORTS_W int32_t getDevice(); - -/** - * @brief init AscendCL. - */ -CV_EXPORTS_W void initAcl(); - -/** - * @brief finalize AscendCL. - * @note finalizeAcl only can be called once for a process. Call this function after all AscendCL - * options finished. - */ -CV_EXPORTS_W void finalizeAcl(); - -//! @} cann_init - -} // namespace cann -} // namespace cv - -#include "opencv2/cann.inl.hpp" - -#endif /* OPENCV_CANN_HPP */ diff --git a/modules/cannarithm/include/opencv2/cann.inl.hpp b/modules/cannarithm/include/opencv2/cann.inl.hpp deleted file mode 100644 index 0c85e8dcc7a..00000000000 --- a/modules/cannarithm/include/opencv2/cann.inl.hpp +++ /dev/null @@ -1,111 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -#ifndef OPENCV_CANNINL_HPP -#define OPENCV_CANNINL_HPP - -#include "opencv2/cann.hpp" - -namespace cv -{ -namespace cann -{ -inline AclMat::AclMat(AclMat::Allocator* allocator_) - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), - allocator(allocator_) -{ -} - -inline AclMat::AclMat(int rows_, int cols_, int type_, AclMat::Allocator* allocator_) - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), - allocator(allocator_) -{ - if (rows_ > 0 && cols_ > 0) - create(rows_, cols_, type_); -} - -inline AclMat::AclMat(Size size_, int type_, AclMat::Allocator* allocator_) - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), - allocator(allocator_) -{ - if (size_.height > 0 && size_.width > 0) - create(size_.height, size_.width, type_); -} - -inline AclMat::AclMat(InputArray arr, AclMat::Allocator* allocator_) - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), - allocator(allocator_) -{ - upload(arr); -} - -inline AclMat::AclMat(const AclMat& m) - : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), - datastart(m.datastart), dataend(m.dataend), allocator(m.allocator) -{ - if (refcount) - CV_XADD(refcount, 1); -} - -inline AclMat::~AclMat() { release(); } - -inline AclMat& AclMat::operator=(const AclMat& m) -{ - if (this != &m) - { - AclMat temp(m); - swap(temp); - } - - return *this; -} - -inline void AclMat::swap(AclMat& b) -{ - std::swap(flags, b.flags); - std::swap(rows, b.rows); - std::swap(cols, b.cols); - std::swap(step, b.step); - std::swap(data, b.data); - std::swap(datastart, b.datastart); - std::swap(dataend, b.dataend); - std::swap(refcount, b.refcount); - std::swap(allocator, b.allocator); -} - -inline void AclMat::release() -{ - CV_DbgAssert(allocator != 0); - - if (refcount && CV_XADD(refcount, -1) == 1) - allocator->free(this); - - dataend = data = datastart = 0; - step = rows = cols = 0; - refcount = 0; -} - -inline size_t AclMat::elemSize() const { return CV_ELEM_SIZE(flags); } - -inline size_t AclMat::elemSize1() const { return CV_ELEM_SIZE1(flags); } - -inline int AclMat::type() const { return CV_MAT_TYPE(flags); } - -inline int AclMat::depth() const { return CV_MAT_DEPTH(flags); } - -inline int AclMat::channels() const { return CV_MAT_CN(flags); } - -inline size_t AclMat::step1() const { return step / elemSize1(); } - -inline Size AclMat::size() const { return Size(cols, rows); } - -inline bool AclMat::empty() const { return data == 0; } - -inline AclStream::AclStream(const Ptr& impl) : impl_(impl) {} - -inline AclEvent::AclEvent(const Ptr& impl) : impl_(impl) {} -} // namespace cann -} // namespace cv - -#endif // OPENCV_CANNINL_HPP diff --git a/modules/cannarithm/include/opencv2/cann_arithm.hpp b/modules/cannarithm/include/opencv2/cann_arithm.hpp deleted file mode 100644 index 9a0f3f1655f..00000000000 --- a/modules/cannarithm/include/opencv2/cann_arithm.hpp +++ /dev/null @@ -1,176 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -#ifndef OPENCV_CANNARITHM_HPP -#define OPENCV_CANNARITHM_HPP - -#include "opencv2/cann.hpp" - -namespace cv -{ -namespace cann -{ - -/** - @addtogroup cann - @{ - @defgroup cannarithm Operations on Matrices - @{ - @defgroup cannarithm_elem Per-element Operations - @} - @} - */ - -//! @addtogroup cannarithm_elem -//! @{ - -/** @brief Computes a matrix-matrix or matrix-scalar sum. - * @param src1 First source matrix or scalar. - * @param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . - * @param dst Destination matrix that has the same size and number of channels as the input - * array(s). The depth is defined by dtype or src1 depth. - * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the - * destination array to be changed. The mask can be used only with single channel images. - * @param dtype Optional depth of the output array. - * @param stream AclStream for the asynchronous version. - * @sa cv::add cuda::add - */ -CV_EXPORTS_W void add(InputArray src1, InputArray src2, OutputArray dst, - InputArray mask = noArray(), int dtype = -1, - AclStream& stream = AclStream::Null()); -// This code should not be compiled nor analyzed by doxygen. This interface only for python binding -// code generation. add(InputArray, InputArray ...) can accept Scalar as its parametr.(Scalar -> Mat -// -> InputArray) -#ifdef NEVER_DEFINED -CV_EXPORTS_W void add(InputArray src1, Scalar src2, OutputArray dst, InputArray mask = noArray(), - int dtype = -1, AclStream& stream = AclStream::Null()); -CV_EXPORTS_W void add(Scalar src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), - int dtype = -1, AclStream& stream = AclStream::Null()); -#endif - -/** @brief Computes a matrix-matrix or matrix-scalar difference. - * @param src1 First source matrix or scalar. - * @param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . - * @param dst Destination matrix that has the same size and number of channels as the input - * array(s). The depth is defined by dtype or src1 depth. - * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the - * destination array to be changed. The mask can be used only with single channel images. - * @param dtype Optional depth of the output array. - * @param stream AclStream for the asynchronous version. - * @sa cv::subtract cuda::subtract - */ -CV_EXPORTS_W void subtract(InputArray src1, InputArray src2, OutputArray dst, - InputArray mask = noArray(), int dtype = -1, - AclStream& stream = AclStream::Null()); -#ifdef NEVER_DEFINED -CV_EXPORTS_W void subtract(InputArray src1, Scalar src2, OutputArray dst, - InputArray mask = noArray(), int dtype = -1, - AclStream& stream = AclStream::Null()); -CV_EXPORTS_W void subtract(Scalar src1, InputArray src2, OutputArray dst, - InputArray mask = noArray(), int dtype = -1, - AclStream& stream = AclStream::Null()); -#endif - -/** @brief Computes a matrix-matrix or matrix-scalar per-element product. - * @param src1 First source matrix or scalar. - * @param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . - * @param dst Destination matrix that has the same size and number of channels as the input - * array(s). The depth is defined by dtype or src1 depth. - * @param scale Optional scale factor. - * @param dtype Optional depth of the output array. - * @param stream AclStream for the asynchronous version. - * @sa cv::multiply cuda::multiply - */ -CV_EXPORTS_W void multiply(InputArray src1, InputArray src2, OutputArray dst, float scale, - int dtype = -1, AclStream& stream = AclStream::Null()); -#ifdef NEVER_DEFINED -CV_EXPORTS_W void multiply(InputArray src1, Scalar src2, OutputArray dst, float scale, - int dtype = -1, AclStream& stream = AclStream::Null()); -CV_EXPORTS_W void multiply(Scalar src1, InputArray src2, OutputArray dst, float scale, - int dtype = -1, AclStream& stream = AclStream::Null()); -#endif - -/** @brief Computes a matrix-matrix or matrix-scalar division. - * @param src1 First source matrix or scalar. - * @param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . - * @param dst Destination matrix that has the same size and number of channels as the input - * array(s). The depth is defined by dtype or src1 depth. - * @param scale Optional scale factor. - * @param dtype Optional depth of the output array. - * @param stream AclStream for the asynchronous version. - * @sa cv::divide cuda::divide - */ -CV_EXPORTS_W void divide(InputArray src1, InputArray src2, OutputArray dst, float scale, - int dtype = -1, AclStream& stream = AclStream::Null()); -#ifdef NEVER_DEFINED -CV_EXPORTS_W void divide(InputArray src1, Scalar src2, OutputArray dst, float scale, int dtype = -1, - AclStream& stream = AclStream::Null()); -CV_EXPORTS_W void divide(Scalar src1, InputArray src2, OutputArray dst, float scale, int dtype = -1, - AclStream& stream = AclStream::Null()); -#endif - -/** @brief Performs a per-element bitwise conjunction of two matrices (or of matrix and scalar). - * @param src1 First source matrix or scalar. - * @param src2 Second source matrix or scalar. - * @param dst Destination matrix that has the same size and number of channels as the input - * array(s). The depth is defined by dtype or src1 depth. - * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the - * destination array to be changed. The mask can be used only with single channel images. - * @param stream AclStream for the asynchronous version. - * @sa cv::bitwise_and cuda::bitwise_and - */ -CV_EXPORTS_W void bitwise_and(InputArray src1, InputArray src2, OutputArray dst, - InputArray mask = noArray(), AclStream& stream = AclStream::Null()); -#ifdef NEVER_DEFINED -CV_EXPORTS_W void bitwise_and(InputArray src1, Scalar src2, OutputArray dst, - InputArray mask = noArray(), AclStream& stream = AclStream::Null()); -CV_EXPORTS_W void bitwise_and(Scalar src1, InputArray src2, OutputArray dst, - InputArray mask = noArray(), AclStream& stream = AclStream::Null()); -#endif - -/** @brief Performs a per-element bitwise disjunction of two matrices (or of matrix and scalar). - * @param src1 First source matrix or scalar. - * @param src2 Second source matrix or scalar. - * @param dst Destination matrix that has the same size and number of channels as the input - * array(s). The depth is defined by dtype or src1 depth. - * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the - * destination array to be changed. The mask can be used only with single channel images. - * @param stream AclStream for the asynchronous version. - * @sa cv::bitwise_or cuda::bitwise_or - */ -CV_EXPORTS_W void bitwise_or(InputArray src1, InputArray src2, OutputArray dst, - InputArray mask = noArray(), AclStream& stream = AclStream::Null()); -#ifdef NEVER_DEFINED -CV_EXPORTS_W void bitwise_or(InputArray src1, Scalar src2, OutputArray dst, - InputArray mask = noArray(), AclStream& stream = AclStream::Null()); -CV_EXPORTS_W void bitwise_or(Scalar src1, InputArray src2, OutputArray dst, - InputArray mask = noArray(), AclStream& stream = AclStream::Null()); -#endif - -/** @brief Performs a per-element bitwise exclusive or operation of two matrices (or of matrix and - * scalar). - * @param src1 First source matrix or scalar. - * @param src2 Second source matrix or scalar. - * @param dst Destination matrix that has the same size and number of channels as the input - * array(s). The depth is defined by dtype or src1 depth. - * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the - * destination array to be changed. The mask can be used only with single channel images. - * @param stream AclStream for the asynchronous version. - * @sa cv::bitwise_xor cuda::bitwise_xor - */ -CV_EXPORTS_W void bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, - InputArray mask = noArray(), AclStream& stream = AclStream::Null()); -#ifdef NEVER_DEFINED -CV_EXPORTS_W void bitwise_xor(InputArray src1, Scalar src2, OutputArray dst, - InputArray mask = noArray(), AclStream& stream = AclStream::Null()); -CV_EXPORTS_W void bitwise_xor(Scalar src1, InputArray src2, OutputArray dst, - InputArray mask = noArray(), AclStream& stream = AclStream::Null()); -#endif - -//! @} cannarithm_elem - -} // namespace cann -} // namespace cv - -#endif /* OPENCV_CANNARITHM_HPP */ diff --git a/modules/cannarithm/include/opencv2/cann_call.hpp b/modules/cannarithm/include/opencv2/cann_call.hpp deleted file mode 100644 index 6afdd266a21..00000000000 --- a/modules/cannarithm/include/opencv2/cann_call.hpp +++ /dev/null @@ -1,52 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -#ifndef OPENCV_CANNCALL_HPP -#define OPENCV_CANNCALL_HPP - -#include -#include -#include "opencv2/cann.hpp" - -namespace cv -{ -namespace cann -{ -struct AclAttribute -{ - virtual ~AclAttribute() = default; - virtual void addAttr(aclopAttr* opAttr) = 0; -}; - -#define DEFINE_ATTR(FUNC, TYPE) \ - class Acl##FUNC##Attribute : public AclAttribute \ - { \ - const char* name; \ - TYPE value; \ - \ - public: \ - Acl##FUNC##Attribute(const char* _name, TYPE _value) : name(_name), value(_value){}; \ - void addAttr(aclopAttr* opAttr) override \ - { \ - CV_ACL_SAFE_CALL(aclopSetAttr##FUNC(opAttr, name, value)); \ - } \ - } - -DEFINE_ATTR(Float, float); -DEFINE_ATTR(String, const char*); - -static std::vector emptyattr; -void aclOneInput(const AclMat& src, AclMat& dst, const char* op, - AclStream& stream = AclStream::Null(), - std::vector& attrs = emptyattr); - -void aclTwoInputs(const AclMat& src1, const AclMat& src2, AclMat& dst, const char* op, - AclStream& stream = AclStream::Null()); - -void transNCHWToNHWC(const AclMat& src, AclMat& dst, AclStream& stream = AclStream::Null()); - -} // namespace cann -} // namespace cv - -#endif // OPENCV_CANNCALL_HPP diff --git a/modules/cannarithm/include/opencv2/cann_common.hpp b/modules/cannarithm/include/opencv2/cann_common.hpp deleted file mode 100644 index ecff9f07589..00000000000 --- a/modules/cannarithm/include/opencv2/cann_common.hpp +++ /dev/null @@ -1,43 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -#ifndef OPENCV_CANN_COMMON_HPP -#define OPENCV_CANN_COMMON_HPP - -#include - -namespace cv -{ -namespace cann -{ -static inline void checkAclError(aclError err, const char* file, const int line, const char* func) -{ - if (ACL_SUCCESS != err) - { - const char* errMsg = aclGetRecentErrMsg(); - cv::error(cv::Error::AscendApiCallError, errMsg == nullptr ? "" : errMsg, func, file, line); - } -} - -static inline void checkAclPtr(void* ptr, const char* file, const int line, const char* func) -{ - if (nullptr == ptr) - { - const char* errMsg = aclGetRecentErrMsg(); - cv::error(cv::Error::AscendApiCallError, errMsg == nullptr ? "" : errMsg, func, file, line); - } -} - -} // namespace cann -} // namespace cv - -#define CV_ACL_SAFE_CALL(expr) cv::cann::checkAclError((expr), __FILE__, __LINE__, CV_Func) -#define CV_ACL_SAFE_CALL_PTR(expr) \ - ({ \ - auto ptr = (expr); \ - cv::cann::checkAclPtr(ptr, __FILE__, __LINE__, CV_Func); \ - ptr; \ - }) - -#endif // OPENCV_CANN_COMMON_HPP diff --git a/modules/cannarithm/include/opencv2/cann_prepare.hpp b/modules/cannarithm/include/opencv2/cann_prepare.hpp deleted file mode 100644 index cc1aba25618..00000000000 --- a/modules/cannarithm/include/opencv2/cann_prepare.hpp +++ /dev/null @@ -1,96 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -#ifndef OPENCV_CANNPREPARE_HPP -#define OPENCV_CANNPREPARE_HPP - -#include -#include -#include "opencv2/core.hpp" -#include "opencv2/cann_common.hpp" - -namespace cv -{ -namespace cann -{ -struct CannPreparation -{ - CannPreparation() { opAttr_ = CV_ACL_SAFE_CALL_PTR(aclopCreateAttr()); } - - virtual ~CannPreparation() - { - for (auto desc : inputDesc_) - { - aclDestroyTensorDesc(desc); - } - - for (auto desc : outputDesc_) - { - aclDestroyTensorDesc(desc); - } - - for (auto buf : inputBuffers_) - { - aclDestroyDataBuffer(buf); - } - - for (auto buf : outputBuffers_) - { - aclDestroyDataBuffer(buf); - } - - aclopDestroyAttr(opAttr_); - } - - std::vector inputBuffers_; - std::vector outputBuffers_; - std::vector inputDesc_; - std::vector outputDesc_; - aclopAttr* opAttr_; -}; - -#define CANN_PREPARE_ADD_ATTR(var, type, ...) \ - do \ - { \ - CV_ACL_SAFE_CALL(aclopSetAttr##type(var.opAttr_, __VA_ARGS__)); \ - } while (0) - -#define CANN_PREPARE_INPUTDESC(var, ...) \ - do \ - { \ - auto _rPtr = CV_ACL_SAFE_CALL_PTR(aclCreateTensorDesc(__VA_ARGS__)); \ - if (_rPtr != nullptr) \ - var.inputDesc_.push_back(_rPtr); \ - } while (0) - -#define CANN_PREPARE_OUTPUTDESC(var, ...) \ - do \ - { \ - auto _rPtr = CV_ACL_SAFE_CALL_PTR(aclCreateTensorDesc(__VA_ARGS__)); \ - if (_rPtr != nullptr) \ - var.outputDesc_.push_back(_rPtr); \ - } while (0) - -#define CANN_PREPARE_INPUTBUFFER(var, ...) \ - do \ - { \ - auto _rPtr = CV_ACL_SAFE_CALL_PTR(aclCreateDataBuffer(__VA_ARGS__)); \ - if (_rPtr != nullptr) \ - var.inputBuffers_.push_back(_rPtr); \ - } while (0) - -#define CANN_PREPARE_OUTPUTBUFFER(var, ...) \ - do \ - { \ - auto _rPtr = CV_ACL_SAFE_CALL_PTR(aclCreateDataBuffer(__VA_ARGS__)); \ - if (_rPtr != nullptr) \ - var.outputBuffers_.push_back(_rPtr); \ - } while (0) - -aclDataType getACLType(int opencvdepth); - -} // namespace cann -} // namespace cv - -#endif // OPENCV_CANNPREPARE_HPP diff --git a/modules/cannarithm/misc/python/pyopencv_cann.hpp b/modules/cannarithm/misc/python/pyopencv_cann.hpp deleted file mode 100644 index 61dc824c886..00000000000 --- a/modules/cannarithm/misc/python/pyopencv_cann.hpp +++ /dev/null @@ -1,23 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -#ifdef HAVE_OPENCV_CORE - -#include "opencv2/cann.hpp" - -typedef std::vector vector_AclMat; -typedef cann::AclMat::Allocator AclMat_Allocator; - -CV_PY_TO_CLASS(cann::AclMat); -CV_PY_TO_CLASS(cann::AclStream); - -CV_PY_TO_CLASS_PTR(cann::AclMat); -CV_PY_TO_CLASS_PTR(cann::AclMat::Allocator); - -CV_PY_FROM_CLASS(cann::AclMat); -CV_PY_FROM_CLASS(cann::AclStream); - -CV_PY_FROM_CLASS_PTR(cann::AclMat::Allocator); - -#endif diff --git a/modules/cannarithm/perf/perf_element_operations.cpp b/modules/cannarithm/perf/perf_element_operations.cpp deleted file mode 100644 index 5299f4b3c78..00000000000 --- a/modules/cannarithm/perf/perf_element_operations.cpp +++ /dev/null @@ -1,81 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -#include "perf_precomp.hpp" -#include "opencv2/cann_arithm.hpp" - -namespace opencv_test -{ -namespace -{ - -#define ARITHM_MAT_DEPTH Values(CV_32S, CV_32SC3) -#define TYPICAL_ACL_MAT_SIZES ::perf::sz1080p, ::perf::sz2K, ::perf::sz2160p, ::perf::sz4320p -#define DEVICE_ID 0 -#define DEF_PARAM_TEST(name, ...) \ - typedef ::perf::TestBaseWithParam> name - -// NPU Perf Test -DEF_PARAM_TEST(NPU, cv::Size, perf::MatDepth); -#define TEST_NPU_OP_MAT(idx, op, ...) \ - PERF_TEST_P(NPU, MAT_##op##_MAT_##idx, \ - testing::Combine(testing::Values(TYPICAL_ACL_MAT_SIZES), ARITHM_MAT_DEPTH)) \ - { \ - Size size = GET_PARAM(0); \ - int depth = GET_PARAM(1); \ - \ - Mat src1(size, depth), src2(size, depth); \ - declare.in(src1, WARMUP_RNG); \ - declare.in(src2, WARMUP_RNG); \ - cv::cann::setDevice(DEVICE_ID); \ - \ - AclMat npu_src1, npu_src2, dst; \ - npu_src1.upload(src1); \ - npu_src2.upload(src2); \ - AclStream stream; \ - TEST_CYCLE() { cv::cann::op(npu_src1, npu_src2, dst, __VA_ARGS__); } \ - SANITY_CHECK_NOTHING(); \ - cv::cann::resetDevice(); \ - } - -// CPU Perf Test -DEF_PARAM_TEST(CPU, cv::Size, perf::MatDepth); -#define TEST_CPU_OP_MAT(idx, op, ...) \ - PERF_TEST_P(CPU, MAT_##op##_MAT_##idx, \ - testing::Combine(testing::Values(TYPICAL_ACL_MAT_SIZES), ARITHM_MAT_DEPTH)) \ - { \ - Size size = GET_PARAM(0); \ - int depth = GET_PARAM(1); \ - \ - Mat src1(size, depth), src2(size, depth), dst(size, depth); \ - declare.in(src1, WARMUP_RNG); \ - declare.in(src2, WARMUP_RNG); \ - \ - TEST_CYCLE() cv::op(src1, src2, dst, __VA_ARGS__); \ - SANITY_CHECK_NOTHING(); \ - } - -TEST_NPU_OP_MAT(1, add, noArray(), -1); -TEST_CPU_OP_MAT(1, add, noArray(), -1); - -TEST_NPU_OP_MAT(1, subtract, noArray(), -1); -TEST_CPU_OP_MAT(1, subtract, noArray(), -1); - -TEST_NPU_OP_MAT(1, multiply, 1, -1); -TEST_CPU_OP_MAT(1, multiply, 1, -1); - -TEST_NPU_OP_MAT(1, divide, 1, -1); -TEST_CPU_OP_MAT(1, divide, 1, -1); - -TEST_NPU_OP_MAT(1, bitwise_and, noArray()); -TEST_CPU_OP_MAT(1, bitwise_and, noArray()); - -TEST_NPU_OP_MAT(1, bitwise_or, noArray()); -TEST_CPU_OP_MAT(1, bitwise_or, noArray()); - -TEST_NPU_OP_MAT(1, bitwise_xor, noArray()); -TEST_CPU_OP_MAT(1, bitwise_xor, noArray()); - -} // namespace -} // namespace opencv_test diff --git a/modules/cannarithm/src/aclmat.cpp b/modules/cannarithm/src/aclmat.cpp deleted file mode 100644 index a7d0dced4d0..00000000000 --- a/modules/cannarithm/src/aclmat.cpp +++ /dev/null @@ -1,605 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -#include "precomp.hpp" - -namespace -{ -/********************************************AclMat********************************************/ -class DefaultAllocator : public cv::cann::AclMat::Allocator -{ -public: - bool allocate(cv::cann::AclMat* mat, int rows, int cols, size_t elemSize) CV_OVERRIDE; - void free(cv::cann::AclMat* mat) CV_OVERRIDE; -}; - -bool DefaultAllocator::allocate(cv::cann::AclMat* mat, int rows, int cols, size_t elemSize) -{ - CV_ACL_SAFE_CALL( - aclrtMalloc((void**)(&mat->data), elemSize * cols * rows, ACL_MEM_MALLOC_HUGE_FIRST)); - - mat->step = cols * elemSize; - mat->refcount = (int*)cv::fastMalloc(sizeof(int)); - - return true; -} - -void DefaultAllocator::free(cv::cann::AclMat* mat) -{ - aclrtFree(mat->datastart); - cv::fastFree(mat->refcount); -} - -DefaultAllocator cannDefaultAllocator; -cv::cann::AclMat::Allocator* g_defaultAllocator = &cannDefaultAllocator; -} // namespace - -namespace cv -{ -namespace cann -{ -AclMat::Allocator* AclMat::defaultAllocator() { return g_defaultAllocator; } - -void AclMat::setDefaultAllocator(AclMat::Allocator* allocator) -{ - CV_Assert(allocator != 0); - g_defaultAllocator = allocator; -} - -// TODO: this function is copied from matrix.cpp, which is a local symbol there and can be -// refreneced. -static int updateContinuityFlag(int flags, int dims, const int* size, const size_t* step) -{ - int i, j; - for (i = 0; i < dims; i++) - { - if (size[i] > 1) - break; - } - - uint64 t = (uint64)size[std::min(i, dims - 1)] * CV_MAT_CN(flags); - for (j = dims - 1; j > i; j--) - { - t *= size[j]; - if (step[j] * size[j] < step[j - 1]) - break; - } - - if (j <= i && t == (uint64)(int)t) - return flags | Mat::CONTINUOUS_FLAG; - return flags & ~Mat::CONTINUOUS_FLAG; -} - -void AclMat::updateContinuityFlag() -{ - int sz[] = {rows, cols}; - size_t steps[] = {step, elemSize()}; - flags = cv::cann::updateContinuityFlag(flags, 2, sz, steps); -} - -AclMat::AclMat(int rows_, int cols_, int type_, void* data_, size_t step_) - : flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(rows_), cols(cols_), step(step_), - data((uchar*)data_), refcount(0), datastart((uchar*)data_), dataend((const uchar*)data_), - allocator(defaultAllocator()) -{ - size_t minstep = cols * elemSize(); - - if (step == Mat::AUTO_STEP) - { - step = minstep; - } - else - { - if (rows == 1) - step = minstep; - - CV_DbgAssert(step >= minstep); - } - - dataend += step * (rows - 1) + minstep; - updateContinuityFlag(); -} - -AclMat::AclMat(Size size_, int type_, void* data_, size_t step_) - : flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(size_.height), cols(size_.width), - step(step_), data((uchar*)data_), refcount(0), datastart((uchar*)data_), - dataend((const uchar*)data_), allocator(defaultAllocator()) -{ - size_t minstep = cols * elemSize(); - - if (step == Mat::AUTO_STEP) - { - step = minstep; - } - else - { - if (rows == 1) - step = minstep; - - CV_DbgAssert(step >= minstep); - } - - dataend += step * (rows - 1) + minstep; - updateContinuityFlag(); -} - -void AclMat::create(int _rows, int _cols, int _type) -{ - CV_DbgAssert(_rows >= 0 && _cols >= 0); - - _type &= Mat::TYPE_MASK; - - if (rows == _rows && cols == _cols && type() == _type && data) - return; - - if (data) - release(); - - if (_rows > 0 && _cols > 0) - { - flags = Mat::MAGIC_VAL + _type; - rows = _rows; - cols = _cols; - - const size_t esz = elemSize(); - - bool allocSuccess = allocator->allocate(this, rows, cols, esz); - - if (!allocSuccess) - { - // custom allocator fails, try default allocator - allocator = defaultAllocator(); - allocSuccess = allocator->allocate(this, rows, cols, esz); - CV_Assert(allocSuccess); - } - - if (esz * cols == step) - flags |= Mat::CONTINUOUS_FLAG; - - datastart = data; - dataend = data + step * (rows - 1) + cols * esz; - - if (refcount) - *refcount = 1; - } -} - -void AclMat::upload(InputArray arr) -{ - Mat mat = arr.getMat(); - CV_DbgAssert(!mat.empty()); - create(mat.rows, mat.cols, mat.type()); - CV_ACL_SAFE_CALL(aclrtMemcpy2d(data, step, mat.data, mat.step[0], cols * elemSize(), rows, - ACL_MEMCPY_HOST_TO_DEVICE)); -} - -void AclMat::upload(InputArray arr, AclStream& _stream) -{ - Mat mat = arr.getMat(); - CV_DbgAssert(!mat.empty()); - create(mat.rows, mat.cols, mat.type()); - aclrtStream stream = AclStreamAccessor::getStream(_stream); - CV_ACL_SAFE_CALL(aclrtMemcpy2dAsync(data, step, mat.data, mat.step[0], cols * elemSize(), rows, - ACL_MEMCPY_HOST_TO_DEVICE, stream)); -} - -void AclMat::download(OutputArray _dst) const -{ - CV_DbgAssert(!empty()); - - _dst.create(size(), type()); - Mat dst = _dst.getMat(); - CV_ACL_SAFE_CALL(aclrtMemcpy2d(dst.data, dst.step[0], data, step, cols * elemSize(), rows, - ACL_MEMCPY_DEVICE_TO_HOST)); -} - -void AclMat::download(OutputArray _dst, AclStream& _stream) const -{ - CV_DbgAssert(!empty()); - - _dst.create(size(), type()); - Mat dst = _dst.getMat(); - aclrtStream stream = AclStreamAccessor::getStream(_stream); - CV_ACL_SAFE_CALL(aclrtMemcpy2dAsync(dst.data, dst.step[0], data, step, cols * elemSize(), rows, - ACL_MEMCPY_DEVICE_TO_HOST, stream)); -} - -AclMat::AclMat(int rows_, int cols_, int type_, Scalar& s_, AclMat::Allocator* allocator_) - : flags(0), rows(rows_), cols(cols_), step(0), data(0), refcount(0), datastart(0), dataend(0), - allocator(allocator_) -{ - create(rows_, cols_, type_); - setTo(s_); -} - -AclMat::AclMat(Size size_, int type_, Scalar& s_, AclMat::Allocator* allocator_) - : flags(0), rows(size_.height), cols(size_.width), step(0), data(0), refcount(0), datastart(0), - dataend(0), allocator(allocator_) -{ - create(size_.height, size_.width, type_); - setTo(s_); -} - -AclMat& AclMat::setTo(Scalar s_) { return setTo(s_, AclStream::Null()); } - -AclMat& AclMat::setTo(Scalar s_, AclStream& stream_) -{ - size_t totalBytes = (size_t)rows * cols * elemSize(); - if (totalBytes == 0) - return *this; - - CV_ACL_SAFE_CALL(aclrtMemset(data, totalBytes, 0, totalBytes)); - - Mat scMat(1, 1, type(), s_); - AclMat scAclMat; - scAclMat.upload(scMat); - - AclMat dst(rows, cols, type()); - // TODO use AssignAdd to avoid memcpy, or use broadcase. - aclTwoInputs(*this, scAclMat, dst, "Add", stream_); - swap(dst); - - return *this; -} - -void AclMat::convertTo(AclMat& dst, int rtype) const { convertTo(dst, rtype, AclStream::Null()); } - -void AclMat::convertTo(AclMat& dst, int _rtype, AclStream& _stream) const -{ - int cn = channels(); - dst.create(rows, cols, CV_MAKE_TYPE(_rtype, cn)); - aclOneInput(*this, dst, "Cast", _stream); -} - -void AclMat::expandTo(CV_OUT AclMat& dst, int chs) const { expandTo(dst, chs, AclStream::Null()); } - -void AclMat::expandTo(CV_OUT AclMat& dst, int chs, AclStream& stream) const -{ - CV_Assert(channels() == 1); - - // TODO use inplace expand. - AclMat NCHW_mat; - NCHW_mat.create(rows, cols, CV_MAKE_TYPE(depth(), chs)); - - aclrtStream rawStream = AclStreamAccessor::getStream(stream); - size_t expandsize = rows * step * chs; - uchar* dataptr = (uchar*)NCHW_mat.data; - for (int ch = 0; ch < chs; ch++) - { - if (rawStream == nullptr) - { - CV_ACL_SAFE_CALL( - aclrtMemcpy(dataptr, expandsize, data, rows * step, ACL_MEMCPY_DEVICE_TO_DEVICE)); - } - else - { - CV_ACL_SAFE_CALL(aclrtMemcpyAsync(dataptr, expandsize, data, rows * step, - ACL_MEMCPY_DEVICE_TO_DEVICE, rawStream)); - } - - dataptr += (step * rows); - } - - dst.create(rows, cols, CV_MAKE_TYPE(depth(), chs)); - - transNCHWToNHWC(NCHW_mat, dst, stream); -} - -AclStream wrapStream(size_t aclStreamAddress) -{ - return AclStreamAccessor::wrapStream(reinterpret_cast(aclStreamAddress)); -} - -static AclMat getAclMat(InputArray arr) -{ - _InputArray::KindFlag k = arr.kind(); - if (k == _InputArray::ACL_MAT) - { - const cann::AclMat* a_mat = (const cann::AclMat*)arr.getObj(); - return *a_mat; - } - - if (k == _InputArray::NONE) - return cann::AclMat(); - - CV_Error(cv::Error::StsNotImplemented, "getAclMat is available only for cann::AclMat"); -} - -AclMat getInputMat(InputArray _src) -{ - AclMat src; - if (_src.kind() == _InputArray::ACL_MAT) - { - src = getAclMat(_src); - } - else if (!_src.empty()) - { - src.upload(_src); - } - return src; -} - -AclMat getInputMat(InputArray _src, AclStream& stream) -{ - AclMat src; - if (_src.kind() == _InputArray::ACL_MAT) - { - src = getAclMat(_src); - } - else if (!_src.empty()) - { - aclrtStream rawStream = AclStreamAccessor::getStream(stream); - if (rawStream == nullptr) - { - src.upload(_src); - } - else - { - src.upload(_src, stream); - } - } - return src; -} - -AclMat getOutputMat(OutputArray _dst, int rows, int cols, int type) -{ - AclMat dst; - if (_dst.kind() == _InputArray::ACL_MAT) - { - ((cann::AclMat*)(_dst.getObj()))->create(rows, cols, type); - dst = getAclMat(_dst); - } - else - { - dst.create(rows, cols, type); - } - return dst; -} - -void syncOutput(const AclMat& dst, OutputArray _dst) -{ - if (_dst.kind() != _InputArray::ACL_MAT) - { - dst.download(_dst); - } -} - -/********************************************Device********************************************/ - -void setDevice(int device_id) -{ - aclrtContext context; - CV_ACL_SAFE_CALL(aclrtSetDevice(device_id)); - CV_ACL_SAFE_CALL(aclrtCreateContext(&context, device_id)); -} - -void resetDevice() { CV_ACL_SAFE_CALL(aclrtResetDevice(getDevice())); } - -int32_t getDevice() -{ - int32_t deviceId; - CV_ACL_SAFE_CALL(aclrtGetDevice(&deviceId)); - return deviceId; -} - -void initAcl() { CV_ACL_SAFE_CALL(aclInit(nullptr)); } - -void finalizeAcl() { CV_ACL_SAFE_CALL(aclFinalize()); } - -class DefaultDeviceInitializer -{ -public: - DefaultDeviceInitializer(); - ~DefaultDeviceInitializer(); - - AclStream& getNullAclStream(int deviceId); - -private: - std::vector> streams_; - Mutex streams_mtx_; -}; - -DefaultDeviceInitializer::DefaultDeviceInitializer() {} - -DefaultDeviceInitializer::~DefaultDeviceInitializer() { streams_.clear(); } - -AclStream& DefaultDeviceInitializer::getNullAclStream(int deviceId) -{ - AutoLock lock(streams_mtx_); - - if (streams_.empty()) - { - uint32_t deviceCount; - CV_ACL_SAFE_CALL(aclrtGetDeviceCount(&deviceCount)); - - if (deviceCount > 0) - streams_.resize(deviceCount); - } - - CV_DbgAssert(deviceId >= 0 && deviceId < static_cast(streams_.size())); - - if (streams_[deviceId].empty()) - { - aclrtStream stream = nullptr; - Ptr impl = makePtr(stream); - streams_[deviceId] = Ptr(new AclStream(impl)); - } - - return *streams_[deviceId]; -} - -DefaultDeviceInitializer initializer; - -/********************************************AclEvent********************************************/ -class AclEvent::Impl -{ -public: - aclrtEvent event; - bool ownEvent; - - Impl(); - explicit Impl(aclrtEvent event); - - ~Impl(); -}; - -AclEvent::Impl::Impl() : event(nullptr), ownEvent(true) -{ - CV_ACL_SAFE_CALL(aclrtCreateEvent(&event)); -} - -AclEvent::Impl::Impl(aclrtEvent e) : event(e), ownEvent(false) {} - -AclEvent::Impl::~Impl() -{ - if (event && ownEvent) - { - CV_ACL_SAFE_CALL(aclrtDestroyEvent(event)); - } -} - -aclrtEvent AclEventAccessor::getEvent(const AclEvent& event) { return event.impl_->event; } - -AclEvent AclEventAccessor::wrapEvent(aclrtEvent event) -{ - return AclEvent(makePtr(event)); -} - -AclEvent::AclEvent() { impl_ = makePtr(); } - -void AclEvent::record(AclStream& stream) -{ - CV_ACL_SAFE_CALL(aclrtRecordEvent(impl_->event, AclStreamAccessor::getStream(stream))); -} - -void AclEvent::waitForComplete() const { CV_ACL_SAFE_CALL(aclrtSynchronizeEvent(impl_->event)); } - -/******************************************AclStream********************************************/ -struct AsyncThdArgs -{ - bool isExit; - void* context; - pthread_mutex_t mutex; - AsyncThdArgs() : isExit(false), context(nullptr), mutex(PTHREAD_MUTEX_INITIALIZER) {} -}; - -class AclStream::Impl -{ -public: - aclrtStream stream; - bool ownStream; - AsyncThdArgs asyncThdArgs; - pthread_t asyncThdId; - - void bindThread(); - void addToAsyncRelease(const AclMat& mat); - - Impl(); - explicit Impl(aclrtStream stream); - - ~Impl(); -}; - -AclStream::Impl::Impl() : stream(nullptr), ownStream(true), asyncThdId(0) -{ - CV_ACL_SAFE_CALL(aclrtCreateStream(&stream)); -} - -AclStream::Impl::Impl(aclrtStream s) : stream(s), ownStream(false), asyncThdId(0) {} - -AclStream::Impl::~Impl() -{ - if (stream && ownStream) - { - aclrtSynchronizeStream(stream); - if (asyncThdId != 0) - { - asyncThdArgs.isExit = true; - CV_ACL_SAFE_CALL(aclrtUnSubscribeReport(asyncThdId, stream)); - (void)pthread_join(asyncThdId, nullptr); - } - CV_ACL_SAFE_CALL(aclrtDestroyStream(stream)); - } -} - -static void* processReportLoop(void* args_) -{ - AsyncThdArgs* args = (AsyncThdArgs*)args_; - CV_ACL_SAFE_CALL(aclrtSetCurrentContext(args->context)); - - // Wait for subscribe. - pthread_mutex_lock(&args->mutex); - pthread_mutex_unlock(&args->mutex); - - while (!args->isExit) - { - aclError ret = aclrtProcessReport(-1); - // Skip error check if exiting. aclrtProcessReport will report an timeout error when - // unsubscribing. - if (!args->isExit) - CV_ACL_SAFE_CALL(ret); - } - - return (nullptr); -} - -void AclStream::Impl::bindThread() -{ - // Only one thread will created. Lock for parallelling. - pthread_mutex_lock(&asyncThdArgs.mutex); - if (asyncThdId == 0) - { - CV_ACL_SAFE_CALL(aclrtGetCurrentContext(&asyncThdArgs.context)); - (void)pthread_create(&asyncThdId, nullptr, processReportLoop, &asyncThdArgs); - CV_ACL_SAFE_CALL(aclrtSubscribeReport(asyncThdId, stream)); - } - pthread_mutex_unlock(&asyncThdArgs.mutex); -} - -static void releaseAclMatCB(void* releaseHandle) -{ - if (releaseHandle == nullptr) - return; - AclMat* mat = (AclMat*)releaseHandle; - delete mat; -} - -void AclStream::Impl::addToAsyncRelease(const AclMat& mat) -{ - if (stream != nullptr) - { - if (asyncThdId == 0) - bindThread(); - AclMat* releaseHandle = new AclMat(mat); - CV_ACL_SAFE_CALL( - aclrtLaunchCallback(releaseAclMatCB, releaseHandle, ACL_CALLBACK_BLOCK, stream)); - } -} - -aclrtStream AclStreamAccessor::getStream(const AclStream& stream) { return stream.impl_->stream; } - -AclStream AclStreamAccessor::wrapStream(aclrtStream stream) -{ - return AclStream(makePtr(stream)); -} - -AclStream::AclStream() { impl_ = makePtr(); } - -void AclStream::waitForCompletion() { CV_ACL_SAFE_CALL(aclrtSynchronizeStream(impl_->stream)); } - -void AclStream::waitAclEvent(const AclEvent& event) -{ - CV_ACL_SAFE_CALL(aclrtStreamWaitEvent(impl_->stream, AclEventAccessor::getEvent(event))); -} - -AclStream& AclStream::Null() -{ - const uint32_t deviceId = getDevice(); - return initializer.getNullAclStream(deviceId); -} - -void AclStream::addToAsyncRelease(const AclMat& mat) { impl_->addToAsyncRelease(mat); } - -} // namespace cann -} // namespace cv diff --git a/modules/cannarithm/src/cann_call.cpp b/modules/cannarithm/src/cann_call.cpp deleted file mode 100644 index 0e9ad8036bb..00000000000 --- a/modules/cannarithm/src/cann_call.cpp +++ /dev/null @@ -1,140 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -#include "precomp.hpp" - -namespace cv -{ -namespace cann -{ -void aclOneInput(const AclMat& src, AclMat& dst, const char* op, AclStream& stream, - std::vector& attrs) -{ - CannPreparation prepare; - for (auto& attrIterator : attrs) - { - attrIterator->addAttr(prepare.opAttr_); - } - - int64_t dimSrc[] = {1, src.rows, src.cols, src.channels()}; - int64_t dimDst[] = {1, dst.rows, dst.cols, dst.channels()}; - CANN_PREPARE_INPUTDESC(prepare, getACLType(src.depth()), sizeof(dimSrc) / sizeof(dimSrc[0]), - dimSrc, ACL_FORMAT_NHWC); - CANN_PREPARE_OUTPUTDESC(prepare, getACLType(dst.depth()), sizeof(dimDst) / sizeof(dimDst[0]), - dimDst, ACL_FORMAT_NHWC); - - CANN_PREPARE_INPUTBUFFER(prepare, const_cast(src.data), src.rows * src.step); - CANN_PREPARE_OUTPUTBUFFER(prepare, const_cast(dst.data), dst.rows * dst.step); - - aclrtStream rawStream = AclStreamAccessor::getStream(stream); - - CV_ACL_SAFE_CALL(aclopCompileAndExecute( - op, prepare.inputDesc_.size(), prepare.inputDesc_.data(), prepare.inputBuffers_.data(), - prepare.outputDesc_.size(), prepare.outputDesc_.data(), prepare.outputBuffers_.data(), - prepare.opAttr_, ACL_ENGINE_SYS, ACL_COMPILE_SYS, NULL, rawStream)); - if (rawStream == nullptr) - CV_ACL_SAFE_CALL(aclrtSynchronizeStream(rawStream)); - else - { - stream.addToAsyncRelease(src); - stream.addToAsyncRelease(dst); - } -} - -void aclTwoInputs(const AclMat& src1, const AclMat& src2, AclMat& dst, const char* op, - AclStream& stream) -{ - CannPreparation prepare; - aclrtStream rawStream = AclStreamAccessor::getStream(stream); - - int64_t dimSrc1[] = {1, src1.rows, src1.cols, src1.channels()}; - int64_t dimSrc2[] = {1, src2.rows, src2.cols, src2.channels()}; - - int64_t dimDst[] = {1, dst.rows, dst.cols, dst.channels()}; - - CANN_PREPARE_INPUTDESC(prepare, getACLType(src1.depth()), sizeof(dimSrc1) / sizeof(dimSrc1[0]), - dimSrc1, ACL_FORMAT_NHWC); - - CANN_PREPARE_INPUTDESC(prepare, getACLType(src2.depth()), sizeof(dimSrc2) / sizeof(dimSrc2[0]), - dimSrc2, ACL_FORMAT_NHWC); - - CANN_PREPARE_OUTPUTDESC(prepare, getACLType(dst.depth()), sizeof(dimDst) / sizeof(dimDst[0]), - dimDst, ACL_FORMAT_NHWC); - - CANN_PREPARE_INPUTBUFFER(prepare, const_cast(src1.data), src1.rows * src1.step); - CANN_PREPARE_INPUTBUFFER(prepare, const_cast(src2.data), src2.rows * src2.step); - CANN_PREPARE_OUTPUTBUFFER(prepare, const_cast(dst.data), dst.rows * dst.step); - - CV_ACL_SAFE_CALL(aclopCompileAndExecute( - op, prepare.inputDesc_.size(), prepare.inputDesc_.data(), prepare.inputBuffers_.data(), - prepare.outputDesc_.size(), prepare.outputDesc_.data(), prepare.outputBuffers_.data(), - prepare.opAttr_, ACL_ENGINE_SYS, ACL_COMPILE_SYS, NULL, rawStream)); - if (rawStream == nullptr) - CV_ACL_SAFE_CALL(aclrtSynchronizeStream(rawStream)); - else - { - stream.addToAsyncRelease(src1); - stream.addToAsyncRelease(src2); - stream.addToAsyncRelease(dst); - } -} - -void transNCHWToNHWC(const AclMat& src, AclMat& dst, AclStream& stream) -{ - CannPreparation prepare; - CANN_PREPARE_ADD_ATTR(prepare, String, "src_format", "NCHW"); - CANN_PREPARE_ADD_ATTR(prepare, String, "dst_format", "NHWC"); - - int64_t dimSrc[] = {1, src.channels(), src.rows, src.cols}; - int64_t dimDst[] = {1, dst.rows, dst.cols, dst.channels()}; - - CANN_PREPARE_INPUTDESC(prepare, getACLType(src.depth()), sizeof(dimSrc) / sizeof(dimSrc[0]), - dimSrc, ACL_FORMAT_NCHW); - CANN_PREPARE_OUTPUTDESC(prepare, getACLType(dst.depth()), sizeof(dimDst) / sizeof(dimDst[0]), - dimDst, ACL_FORMAT_NHWC); - - CANN_PREPARE_INPUTBUFFER(prepare, const_cast(src.data), src.rows * src.step); - CANN_PREPARE_OUTPUTBUFFER(prepare, const_cast(dst.data), dst.rows * dst.step); - - aclrtStream rawStream = AclStreamAccessor::getStream(stream); - - CV_ACL_SAFE_CALL(aclopCompileAndExecute("TransData", prepare.inputDesc_.size(), - prepare.inputDesc_.data(), prepare.inputBuffers_.data(), - prepare.outputDesc_.size(), prepare.outputDesc_.data(), - prepare.outputBuffers_.data(), prepare.opAttr_, - ACL_ENGINE_SYS, ACL_COMPILE_SYS, NULL, rawStream)); - if (rawStream == nullptr) - CV_ACL_SAFE_CALL(aclrtSynchronizeStream(rawStream)); - else - { - stream.addToAsyncRelease(src); - stream.addToAsyncRelease(dst); - } -} - -aclDataType getACLType(int opencvdepth) -{ - switch (opencvdepth) - { - case CV_8S: - return ACL_INT8; - case CV_16S: - return ACL_INT16; - case CV_8U: - return ACL_UINT8; - case CV_16U: - return ACL_UINT16; - case CV_32S: - return ACL_INT32; - case CV_64F: - return ACL_DOUBLE; - case CV_16F: - return ACL_FLOAT16; - default: - return ACL_DT_UNDEFINED; - } -} - -} // namespace cann -} // namespace cv diff --git a/modules/cannarithm/src/element_operations.cpp b/modules/cannarithm/src/element_operations.cpp deleted file mode 100644 index f23323bf384..00000000000 --- a/modules/cannarithm/src/element_operations.cpp +++ /dev/null @@ -1,165 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -#include "precomp.hpp" -#include - -namespace cv -{ -namespace cann -{ -void opMatMat(AclMat&, AclMat&, AclMat&, const char*, AclStream& stream = AclStream::Null()); -void opMatMat(AclMat& src1, AclMat& src2, AclMat& dst, const char* op, AclStream& stream) -{ - aclTwoInputs(src1, src2, dst, op, stream); -} - -void opMatScalar(AclMat&, AclMat&, bool, Scalar, const char*, - AclStream& stream = AclStream::Null()); -void opMatScalar(AclMat& src, AclMat& dst, bool inv, Scalar s, const char* op, AclStream& stream) -{ - Mat scMat(1, 1, src.type(), s); - AclMat scAclMat; - scAclMat.upload(scMat); - if (inv) - aclTwoInputs(scAclMat, src, dst, op, stream); - else - aclTwoInputs(src, scAclMat, dst, op, stream); -} - -void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, float scale, int dtype, - const char* op, AclStream& stream = AclStream::Null()); -void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, float scale, int dtype, - const char* op, AclStream& stream) -{ - const int kind1 = _src1.kind(); - const int kind2 = _src2.kind(); - - const bool isScalar1 = (kind1 == _InputArray::MATX); - const bool isScalar2 = (kind2 == _InputArray::MATX); - - AclMat src1, src2; - - if (!isScalar1) - src1 = getInputMat(_src1, stream); - - if (!isScalar2) - src2 = getInputMat(_src2, stream); - - Mat scalar; - if (isScalar1) - scalar = _src1.getMat(); - else if (isScalar2) - scalar = _src2.getMat(); - - Scalar val; - if (!scalar.empty()) - { - CV_Assert(scalar.total() <= 4); - scalar.convertTo(Mat_(scalar.rows, scalar.cols, &val[0]), CV_64F); - } - - const int sdepth = src1.empty() ? src2.depth() : src1.depth(); - const int cn = src1.empty() ? src2.channels() : src1.channels(); - const Size size = src1.empty() ? src2.size() : src1.size(); - - if (dtype < 0) - dtype = sdepth; - - const int ddepth = CV_MAT_DEPTH(dtype); - - CV_Assert(sdepth <= CV_64F && ddepth <= CV_64F); - CV_Assert(!scalar.empty() || (src2.depth() == src1.depth() && src2.size() == src1.size())); - - AclMat dst = getOutputMat(_dst, size.height, size.width, CV_MAKE_TYPE(ddepth, cn)); - - if (isScalar1) - opMatScalar(src2, dst, true, val, op, stream); - else if (isScalar2) - opMatScalar(src1, dst, false, val, op, stream); - else - opMatMat(src1, src2, dst, op, stream); - - // TODO implement emtpy for AclMat in InputArray - AclMat mask = getInputMat(_mask, stream); - if (!mask.empty()) - { - int mtype = mask.type(); - - CV_Assert((mtype == CV_8UC1 || mtype == CV_8SC1) && mask.size() == size); - // TODO use MaskSelect? - AclMat formatedMask; - if (mask.depth() != dst.depth()) - mask.convertTo(formatedMask, dst.depth()); - else - formatedMask = mask; - - AclMat expandedMask; - if (dst.channels() != 1) - formatedMask.expandTo(expandedMask, dst.channels()); - else - expandedMask = formatedMask; - - // TODO call DIV before expand? - AclMat divRet; - arithm_op(expandedMask, expandedMask, divRet, noArray(), 1, -1, "Div", stream); - AclMat dstCopy = dst; - // TODO dst memory and dskCopy mempry point to a same memory area, seems no harm yet. - arithm_op(dstCopy, divRet, dst, noArray(), 1, -1, "Mul", stream); - } - - if(scale != 1) - { - AclMat dstCpy = dst; - AclFloatAttribute scaleOP("value", scale); - std::vector attrs{&scaleOP}; - aclOneInput(dstCpy, dst, "Muls", stream, attrs); - } - - syncOutput(dst, _dst); -} - -void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, - AclStream& stream) -{ - arithm_op(src1, src2, dst, mask, 1, dtype, "Add", stream); -} - -void subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, - AclStream& stream) -{ - arithm_op(src1, src2, dst, mask, 1, dtype, "Sub", stream); -} - -void multiply(InputArray src1, InputArray src2, OutputArray dst, float scale, int dtype, AclStream& stream) -{ - arithm_op(src1, src2, dst, noArray(), scale, dtype, "Mul", stream); -} - -void divide(InputArray src1, InputArray src2, OutputArray dst, float scale, int dtype, AclStream& stream) -{ - arithm_op(src1, src2, dst, noArray(), scale, dtype, "Div", stream); -} - -void bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, - AclStream& stream) -{ - arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseAnd", stream); -} - -void bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, - AclStream& stream) -{ - arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseOr", stream); -} - -void bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, - AclStream& stream) -{ - arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseXor", stream); -} - - -} // namespace cann -} // namespace cv diff --git a/modules/cannarithm/test/test_cann.cpp b/modules/cannarithm/test/test_cann.cpp deleted file mode 100644 index 6c2e65beefe..00000000000 --- a/modules/cannarithm/test/test_cann.cpp +++ /dev/null @@ -1,227 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -#include "test_precomp.hpp" -#include - -namespace opencv_test -{ -namespace -{ - -class DummyAllocator : public AclMat::Allocator -{ -public: - bool allocate(cv::cann::AclMat* mat, int rows, int cols, size_t elemSize) CV_OVERRIDE - { - CV_UNUSED(rows); - CV_UNUSED(cols); - CV_UNUSED(elemSize); - mat->data = (uchar*)0x12345; - mat->refcount = (int*)cv::fastMalloc(sizeof(int)); - return true; - } - void free(cv::cann::AclMat* mat) CV_OVERRIDE - { - mat->data = (uchar*)0x54321; - cv::fastFree(mat->refcount); - } -}; - -TEST(AclMat, Construct) -{ - cv::cann::setDevice(0); - // 1 Default constructor. - AclMat defaultAclMat; - AclMat::Allocator* defaultAllocator = AclMat::defaultAllocator(); - ASSERT_EQ(defaultAclMat.allocator, defaultAllocator); - - // 2 get & set allocator. - DummyAllocator dummyAllocator; - AclMat::setDefaultAllocator(&dummyAllocator); - ASSERT_EQ(defaultAclMat.defaultAllocator(), &dummyAllocator); - AclMat::setDefaultAllocator(defaultAllocator); - - // 3 constructs AclMat of the specified size and type - AclMat specifiedSizeAclMat1(5, 6, CV_8UC3); - AclMat specifiedSizeAclMat2(Size(300, 200), CV_64F); - - ASSERT_EQ(specifiedSizeAclMat1.rows, 5); - ASSERT_EQ(specifiedSizeAclMat1.cols, 6); - ASSERT_EQ(specifiedSizeAclMat1.depth(), CV_8U); - ASSERT_EQ(specifiedSizeAclMat1.channels(), 3); - - ASSERT_EQ(specifiedSizeAclMat2.cols, 300); - ASSERT_EQ(specifiedSizeAclMat2.rows, 200); - ASSERT_EQ(specifiedSizeAclMat2.depth(), CV_64F); - ASSERT_EQ(specifiedSizeAclMat2.channels(), 1); - - // 4 constructs AclMat and fills it with the specified value s - srand((unsigned int)(time(NULL))); - Scalar sc(rand() % 256, rand() % 256, rand() % 256, rand() % 256); - - Mat scalarToMat(7, 8, CV_8UC3, sc); - AclMat scalarToAclMat1(7, 8, CV_8UC3, sc); - Mat scalarToMatChecker; - scalarToAclMat1.download(scalarToMatChecker); - - EXPECT_MAT_NEAR(scalarToMat, scalarToMatChecker, 0.0); - - AclMat scalarToAclMat2(Size(123, 345), CV_32S); - - ASSERT_EQ(scalarToAclMat1.rows, 7); - ASSERT_EQ(scalarToAclMat1.cols, 8); - ASSERT_EQ(scalarToAclMat1.depth(), CV_8U); - ASSERT_EQ(scalarToAclMat1.channels(), 3); - - ASSERT_EQ(scalarToAclMat2.cols, 123); - ASSERT_EQ(scalarToAclMat2.rows, 345); - ASSERT_EQ(scalarToAclMat2.depth(), CV_32S); - ASSERT_EQ(scalarToAclMat2.channels(), 1); - - // 5 constructor for AclMat headers pointing to user-allocated data - void* userAllocatedData = malloc(1); - AclMat userAllocatedAclMat1(9, 10, CV_16SC2, userAllocatedData); - AclMat userAllocatedAclMat2(Size(1024, 2048), CV_16F, userAllocatedData); - - ASSERT_EQ(userAllocatedAclMat1.rows, 9); - ASSERT_EQ(userAllocatedAclMat1.cols, 10); - ASSERT_EQ(userAllocatedAclMat1.depth(), CV_16S); - ASSERT_EQ(userAllocatedAclMat1.channels(), 2); - ASSERT_EQ(userAllocatedAclMat1.data, userAllocatedData); - - ASSERT_EQ(userAllocatedAclMat2.cols, 1024); - ASSERT_EQ(userAllocatedAclMat2.rows, 2048); - ASSERT_EQ(userAllocatedAclMat2.depth(), CV_16F); - ASSERT_EQ(userAllocatedAclMat2.channels(), 1); - ASSERT_EQ(userAllocatedAclMat1.data, userAllocatedData); - - // 6 builds AclMat from host memory - Scalar sc2(rand() % 256, rand() % 256, rand() % 256, rand() % 256); - Mat randomMat(7, 8, CV_8UC3, sc2); - InputArray arr = randomMat; - - AclMat fromInputArray(arr); - Mat randomMatChecker; - fromInputArray.download(randomMatChecker); - EXPECT_MAT_NEAR(randomMat, randomMatChecker, 0.0); - - cv::cann::resetDevice(); -} - -TEST(AclMat, RefCount) -{ - DummyAllocator dummyAllocator; - AclMat* mat = new AclMat(1, 1, CV_8U, &dummyAllocator); - ASSERT_EQ(*(mat->refcount), 1); - ASSERT_EQ(mat->data, (uchar*)0x12345); - - AclMat* copy1 = new AclMat(*mat); - ASSERT_EQ(mat->refcount, copy1->refcount); - ASSERT_EQ(*(copy1->refcount), 2); - - AclMat* copy2 = new AclMat(*copy1); - ASSERT_EQ(mat->refcount, copy2->refcount); - ASSERT_EQ(*(copy2->refcount), 3); - - delete copy1; - ASSERT_EQ(mat->data, (uchar*)0x12345); - ASSERT_EQ(*(mat->refcount), 2); - - delete copy2; - ASSERT_EQ(mat->data, (uchar*)0x12345); - ASSERT_EQ(*(mat->refcount), 1); - - delete mat; -} - -TEST(AclMat, Assignment) -{ - DummyAllocator dummyAllocator; - AclMat mat1; - AclMat mat2(3, 4, CV_8SC1, &dummyAllocator); - mat1 = mat2; - - ASSERT_EQ(mat1.rows, 3); - ASSERT_EQ(mat1.cols, 4); - ASSERT_EQ(mat1.depth(), CV_8S); - ASSERT_EQ(mat1.channels(), 1); - ASSERT_EQ(mat1.data, (uchar*)0x12345); -} - -TEST(AclMat, SetTo) -{ - cv::cann::setDevice(0); - - srand((unsigned int)(time(NULL))); - Scalar sc(rand() % 256, rand() % 256, rand() % 256, rand() % 256); - - AclMat aclMat(2, 2, CV_8UC4); - aclMat.setTo(sc); - Mat mat(2, 2, CV_8UC4, sc); - Mat checker; - aclMat.download(checker); - - EXPECT_MAT_NEAR(mat, checker, 0.0); - - cv::cann::resetDevice(); -} - -TEST(AclMat, ConvertTo) -{ - cv::cann::setDevice(0); - - srand((unsigned int)(time(NULL))); - Scalar sc(rand() % 256, rand() % 256, rand() % 256, rand() % 256); - - AclMat aclMat(2, 2, CV_8UC4, sc); - AclMat convertedAclMat; - aclMat.convertTo(convertedAclMat, CV_16S); - Mat mat(2, 2, CV_16SC4, sc); - Mat checker; - convertedAclMat.download(checker); - - EXPECT_MAT_NEAR(mat, checker, 0.0); - - cv::cann::resetDevice(); -} - -TEST(AclMat, ExpandTo) -{ - cv::cann::setDevice(0); - - Scalar sc1(1); - Scalar sc2(1, 1, 1); - AclMat aclMat(10, 10, CV_8UC1, sc1); - Mat mat(10, 10, CV_8UC3, sc2); - AclMat expandedAclMat; - aclMat.expandTo(expandedAclMat, 3); - Mat checker; - expandedAclMat.download(checker); - - EXPECT_MAT_NEAR(mat, checker, 0.0); - - cv::cann::resetDevice(); -} - -TEST(AclStream, AsyncProcess) -{ - cv::cann::setDevice(0); - - DummyAllocator dummyAllocator; - AclMat* mat = new AclMat(&dummyAllocator); - AclStream stream; - - stream.addToAsyncRelease(*mat); - stream.waitForCompletion(); - - // TODO: need sync point to check: - // 1. mat->data is not freed after it add to async release list even mat is deleted. - // 2. mat->data is freed after callback is called. - - cv::cann::resetDevice(); -} - -} // namespace -} // namespace opencv_test diff --git a/modules/cannarithm/test/test_element_operation.cpp b/modules/cannarithm/test/test_element_operation.cpp deleted file mode 100644 index db20321d43f..00000000000 --- a/modules/cannarithm/test/test_element_operation.cpp +++ /dev/null @@ -1,137 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -#include "test_precomp.hpp" -#include "opencv2/ts/cuda_test.hpp" -#include "opencv2/cann_arithm.hpp" - -namespace opencv_test -{ -namespace -{ -// Random Generator -Mat randomMat(int w, int h, int dtype) -{ - Mat rnMat(w, h, dtype); - RNG rng; - rng.fill(rnMat, RNG::UNIFORM, 0.f, 1.f); - return rnMat; -} -cv::Scalar randomScalar() -{ - RNG rng; - return Scalar(rng, rng.next(), rng.next(), rng.next()); -} -float randomNum() -{ - RNG rng; - float rdnNum = float(rng.uniform(0.3, 3.0)); - return rdnNum; -} -Mat genMask() -{ - Mat mask = Mat::zeros(Size(10, 10), CV_8UC1); - rectangle(mask, cv::Rect(5, 5, 3, 3), Scalar(255), -1); - return mask; -} - -#define DEVICE_ID 0 - -/****************TEST CASE***************/ -// MAT & Mat -#define TEST_MAT_OP_MAT(idx, op, ...) \ - TEST(ELEMENTWISE_OP, MAT_##op##_MAT_##idx) \ - { \ - cv::cann::setDevice(DEVICE_ID); \ - \ - Mat cpuMat1 = randomMat(10, 10, CV_32SC3); \ - Mat cpuMat2 = randomMat(10, 10, CV_32SC3); \ - Mat cpuDst; \ - cv::op(cpuMat1, cpuMat2, cpuDst, __VA_ARGS__); \ - \ - AclMat mat1, mat2; \ - mat1.upload(cpuMat1); \ - mat2.upload(cpuMat2); \ - AclMat dst, dstS; \ - cv::cann::op(mat1, mat2, dst, __VA_ARGS__); \ - Mat npuDst, npuDstS; \ - dst.download(npuDst); \ - AclStream stream; \ - cv::cann::op(mat1, mat2, dstS, __VA_ARGS__, stream); \ - stream.waitForCompletion(); \ - dstS.download(npuDstS); \ - \ - EXPECT_MAT_NEAR(npuDst, cpuDst, 0.0); \ - EXPECT_MAT_NEAR(npuDst, npuDstS, 0.0); \ - cv::cann::resetDevice(); \ - } - -TEST_MAT_OP_MAT(1, add, noArray(), -1); -TEST_MAT_OP_MAT(1, subtract, noArray(), -1); -TEST_MAT_OP_MAT(1, multiply, 1, -1); -TEST_MAT_OP_MAT(1, divide, 1, -1); -TEST_MAT_OP_MAT(1, bitwise_and, noArray()); -TEST_MAT_OP_MAT(1, bitwise_or, noArray()); -TEST_MAT_OP_MAT(1, bitwise_xor, noArray()); - -TEST_MAT_OP_MAT(2, add, genMask(), CV_32SC3); -TEST_MAT_OP_MAT(2, subtract, genMask(), CV_32SC3); -TEST_MAT_OP_MAT(2, multiply, randomNum(), -1); -TEST_MAT_OP_MAT(2, divide, randomNum(), -1); -TEST_MAT_OP_MAT(2, bitwise_and, genMask()); -TEST_MAT_OP_MAT(2, bitwise_or, genMask()); -TEST_MAT_OP_MAT(2, bitwise_xor, genMask()); - -// SCALAR & MAT -#define TEST_MAT_OP_SCALAR(idx, op, ...) \ - TEST(ELEMENTWISE_OP, MAT_##op##_SCALAR_##idx) \ - { \ - Scalar cpuS1 = randomScalar(); \ - Scalar cpuS2 = randomScalar(); \ - Mat cpuMatS1(10, 10, CV_32SC3, cpuS1); \ - Mat cpuMatS2(10, 10, CV_32SC3, cpuS2); \ - Mat cpuDst, cpuDstC; \ - cv::op(cpuMatS1, cpuMatS2, cpuDst, __VA_ARGS__); \ - cv::op(cpuMatS2, cpuMatS1, cpuDstC, __VA_ARGS__); \ - cv::cann::setDevice(DEVICE_ID); \ - \ - AclMat mat; \ - mat.upload(cpuMatS2); \ - AclMat dst, dstS, dstC, dstCS; \ - cv::cann::op(cpuS1, cpuMatS2, dst, __VA_ARGS__); \ - cv::cann::op(cpuMatS2, cpuS1, dstC, __VA_ARGS__); \ - Mat npuDst, npuDstS, npuDstC, npuDstCS; \ - dst.download(npuDst); \ - dstC.download(npuDstC); \ - AclStream stream; \ - cv::cann::op(cpuS1, cpuMatS2, dstS, __VA_ARGS__, stream); \ - cv::cann::op(cpuMatS2, cpuS1, dstCS, __VA_ARGS__, stream); \ - stream.waitForCompletion(); \ - dstS.download(npuDstS); \ - dstCS.download(npuDstCS); \ - \ - EXPECT_MAT_NEAR(npuDst, npuDstS, 0.0); \ - EXPECT_MAT_NEAR(npuDst, cpuDst, 0.0); \ - EXPECT_MAT_NEAR(npuDstC, npuDstCS, 0.0); \ - EXPECT_MAT_NEAR(npuDstC, cpuDstC, 0.0); \ - \ - cv::cann::resetDevice(); \ - } -TEST_MAT_OP_SCALAR(1, add, noArray(), -1); -TEST_MAT_OP_SCALAR(1, subtract, noArray(), -1); -TEST_MAT_OP_SCALAR(1, multiply, 1, -1); -TEST_MAT_OP_SCALAR(1, divide, 1, -1); -TEST_MAT_OP_SCALAR(1, bitwise_and, noArray()); -TEST_MAT_OP_SCALAR(1, bitwise_or, noArray()); -TEST_MAT_OP_SCALAR(1, bitwise_xor, noArray()); - -TEST_MAT_OP_SCALAR(2, add, genMask(), CV_32SC3); -TEST_MAT_OP_SCALAR(2, subtract, genMask(), CV_32SC3); -TEST_MAT_OP_SCALAR(2, bitwise_and, genMask()); -TEST_MAT_OP_SCALAR(2, bitwise_or, genMask()); -TEST_MAT_OP_SCALAR(2, bitwise_xor, genMask()); -TEST_MAT_OP_SCALAR(2, multiply, randomNum(), -1); -TEST_MAT_OP_SCALAR(2, divide, randomNum(), -1); -} // namespace -} // namespace opencv_test diff --git a/modules/cannarithm/CMakeLists.txt b/modules/cannops/CMakeLists.txt similarity index 66% rename from modules/cannarithm/CMakeLists.txt rename to modules/cannops/CMakeLists.txt index 55bcc028510..016168d9359 100644 --- a/modules/cannarithm/CMakeLists.txt +++ b/modules/cannops/CMakeLists.txt @@ -1,10 +1,10 @@ if(IOS OR WINRT OR ANDROID OR APPLE OR WIN32 OR (NOT HAVE_CANN)) - ocv_module_disable(cannarithm) + ocv_module_disable(cannops) endif() set(the_description "Ascend-accelerated Operations on Matrices") -ocv_add_module(cannarithm opencv_core WRAP python) +ocv_add_module(cannops opencv_core WRAP python) ocv_module_include_directories(${CANN_INCLUDE_DIRS}) ocv_glob_module_sources() ocv_install_used_external_targets(${CANN_LIBRARIES}) @@ -12,5 +12,5 @@ ocv_create_module(${CANN_LIBRARIES}) ocv_include_directories(${CMAKE_SOURCE_DIR}/modules/ts/include) -ocv_add_accuracy_tests(DEPENDS_ON opencv_cannarithm) -ocv_add_perf_tests(DEPENDS_ON opencv_cannarithm) +ocv_add_accuracy_tests(DEPENDS_ON opencv_cannops) +ocv_add_perf_tests(DEPENDS_ON opencv_cannops) diff --git a/modules/cannops/include/opencv2/cann.hpp b/modules/cannops/include/opencv2/cann.hpp new file mode 100644 index 00000000000..3290e768d6e --- /dev/null +++ b/modules/cannops/include/opencv2/cann.hpp @@ -0,0 +1,324 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CANNOPS_CANN_HPP +#define OPENCV_CANNOPS_CANN_HPP + +#include "opencv2/core.hpp" + +/** + @defgroup cann Ascend-accelerated Computer Vision + @{ + @defgroup canncore Core part + @{ + @defgroup cann_struct Data Structures + @defgroup cann_init Initializeation and Information + @} + @} + */ + +namespace cv +{ +namespace cann +{ +class AscendStream; + +//! @addtogroup cann_struct +//! @{ + +//=================================================================================== +// NpuMat +//=================================================================================== + +/** @brief Base storage class for NPU memory with reference counting. + * NpuMat class has a similar interface with Mat and NpuMat, and work on [Ascend + * NPU](https://www.hiascend.com/) backend. + * @sa Mat cuda::GpuMat + */ +class AscendStream; +class CV_EXPORTS_W NpuMat +{ +public: + class CV_EXPORTS_W Allocator + { + public: + virtual ~Allocator() {} + // basic allocator + virtual std::shared_ptr allocate(size_t size) = 0; + // allocator must fill data, step and refcount fields + virtual bool allocate(NpuMat* mat, int rows, int cols, size_t elemSize) = 0; + }; + + /** + * @brief Create default allocator for NpuMat. This allocator alloc memory from device for + * specific size. + */ + CV_WRAP static NpuMat::Allocator* defaultAllocator(); + + /** + * @brief Set allocator for NpuMat. + * @param allocator + */ + CV_WRAP static void setDefaultAllocator(NpuMat::Allocator* allocator); + + //! default constructor + CV_WRAP explicit NpuMat(NpuMat::Allocator* allocator_ = NpuMat::defaultAllocator()); + + //! constructs NpuMat of the specified size and type + CV_WRAP NpuMat(int rows, int cols, int type, + NpuMat::Allocator* allocator = NpuMat::defaultAllocator()); + //! constructs NpuMat of the specified size and type + CV_WRAP NpuMat(Size size, int type, NpuMat::Allocator* allocator = NpuMat::defaultAllocator()); + + //! constructs NpuMat and fills it with the specified value s + CV_WRAP NpuMat(int rows, int cols, int type, Scalar& s, + NpuMat::Allocator* allocator = NpuMat::defaultAllocator()); + //! constructs NpuMat and fills it with the specified value s + CV_WRAP NpuMat(Size size, int type, Scalar& s, + NpuMat::Allocator* allocator = NpuMat::defaultAllocator()); + + //! copy constructor + CV_WRAP NpuMat(const NpuMat& m); + + //! constructs NpuMat by crop a certain area from another + CV_WRAP NpuMat(InputArray _m, const Rect& roi); + CV_WRAP NpuMat(InputArray _m, const Rect& roi, AscendStream& stream); + + //! builds NpuMat from host memory (Blocking call) + CV_WRAP explicit NpuMat(InputArray arr, AscendStream& stream, + NpuMat::Allocator* allocator = NpuMat::defaultAllocator()); + + //! assignment operators + NpuMat& operator=(const NpuMat& m); + + //! sets some of the NpuMat elements to s (Blocking call) + CV_WRAP NpuMat& setTo(const Scalar& s); + //! sets some of the NpuMat elements to s (Non-Blocking call) + CV_WRAP NpuMat& setTo(const Scalar& s, AscendStream& stream); + + //! sets all of the NpuMat elements to float (Blocking call) + CV_WRAP NpuMat& setTo(float sc); + + //! sets all of the NpuMat elements to float (Non-Blocking call) + CV_WRAP NpuMat& setTo(float sc, AscendStream& stream); + + //! swaps with other smart pointer + CV_WRAP void swap(NpuMat& mat); + + //! allocates new NpuMat data unless the NpuMat already has specified size and type + CV_WRAP void create(int rows, int cols, int type); + + //! upload host memory data to NpuMat (Blocking call) + CV_WRAP void upload(InputArray arr); + //! upload host memory data to NpuMat (Non-Blocking call) + CV_WRAP void upload(InputArray arr, AscendStream& stream); + + //! download data from NpuMat to host (Blocking call) + CV_WRAP void download(OutputArray dst) const; + //! download data from NpuMat to host (Non-Blocking call) + CV_WRAP void download(OutputArray dst, AscendStream& stream) const; + + //! converts NpuMat to another datatype (Blocking call) + CV_WRAP void convertTo(CV_OUT NpuMat& dst, int rtype) const; + + //! converts NpuMat to another datatype (Non-Blocking call) + CV_WRAP void convertTo(CV_OUT NpuMat& dst, int rtype, AscendStream& stream) const; + + //! returns true iff the NpuMat data is continuous + //! (i.e. when there are no gaps between successive rows) + CV_WRAP bool isContinuous() const; + + //! returns element size in bytes + CV_WRAP size_t elemSize() const; + + //! returns the size of element channel in bytes + CV_WRAP size_t elemSize1() const; + + //! returns element type + CV_WRAP int type() const; + + //! returns element type + CV_WRAP int depth() const; + + //! returns number of channels + CV_WRAP int channels() const; + + //! returns step/elemSize1() + CV_WRAP size_t step1() const; + + //! returns NpuMat size : width == number of columns, height == number of rows + CV_WRAP Size size() const; + + //! returns true if NpuMat data is NULL + CV_WRAP bool empty() const; + + //! internal use method: updates the continuity flag + CV_WRAP void updateContinuityFlag(); + + /*! includes several bit-fields: + - the magic signature + - continuity flag + - depth + - number of channels + */ + int flags; + + //! the number of rows and columns + int rows, cols; + + //! a distance between successive rows in bytes; includes the gap if any + CV_PROP size_t step; + + //! pointer to the data + std::shared_ptr data; + + //! helper fields used in locateROI and adjustROI + uchar* datastart; + const uchar* dataend; + + //! allocator + Allocator* allocator; +}; + +class AscendStream; +class AscendStreamAccessor; +class AscendEvent; +class AscendEventAccessor; +class DefaultDeviceInitializer; + +//=================================================================================== +// AscendStream +//=================================================================================== + +/** @brief In AscendCL Stream(AscendStream) is a task queue. Stream is used to manage the + * parallelism of tasks. The tasks inside a Stream are executed sequentially, that is, the Stream + * executes sequentially according to the sent tasks; the tasks in different Streams are executed in + * parallel. + * + * All Non-blocking functions should pass parameter stream, These function returns immediately after + * the task is submitted. Caller should wait stream until completion. + * + * Blocking functions implicityly use the default stream, and synchronize stream before function + * return. + * @sa cuda::Stream + */ + +// TODO: Stream is defined in namespace cuda, and pybind code does not use a namespace of stream, +// change stream name to AscendStream to avoid confilct. +class CV_EXPORTS_W AscendStream +{ +public: + CV_WRAP AscendStream(); + + //! blocks the current CPU thread until all operations in the stream are complete. + CV_WRAP void waitForCompletion(); + + //! blocks the current CPU thread until event trigger. + CV_WRAP void waitAscendEvent(const cv::cann::AscendEvent& event); + + /** + * @brief return default AscendStream object for default Acl stream. + */ + CV_WRAP static AscendStream& Null(); + + // acl symbols CANNOT used in any hpp files. Use a inner class to avoid acl symbols defined in + // hpp. + class Impl; + + void addTensorHolder(const std::shared_ptr& holder); + +private: + Ptr impl_; + AscendStream(const Ptr& impl); + + friend class AscendStreamAccessor; + friend class DefaultDeviceInitializer; +}; + +/** + * @brief AscendEvent to synchronize between different streams. + */ +class CV_EXPORTS_W AscendEvent +{ +public: + CV_WRAP AscendEvent(); + + //! records an event + CV_WRAP void record(AscendStream& stream); + + //! waits for an event to complete + CV_WRAP void waitForComplete() const; + + class Impl; + +private: + Ptr impl_; + AscendEvent(const Ptr& impl); + + friend class AscendEventAccessor; +}; + +/** @brief Bindings overload to create a Stream object from the address stored in an existing CANN + * Runtime API stream pointer (aclrtStream). + * @param AscendStreamAddress Memory address stored in a CANN Runtime API stream pointer + * (aclrtStream). The created Stream object does not perform any allocation or deallocation and + * simply wraps existing raw CANN Runtime API stream pointer. + * @note Overload for generation of bindings only, not exported or intended for use internally fro + * C++. + */ +CV_EXPORTS_W AscendStream wrapStream(size_t AscendStreamAddress); + +//! @} cann_struct + +//=================================================================================== +// Initialization & Info +//=================================================================================== + +//! @addtogroup cann_init +//! @{ + +//! Get Ascend matrix object from Input array, upload matrix memory if need. (Non-Blocking call) +NpuMat getInputMat(InputArray src, AscendStream& stream); + +//! Get Ascend matrix object from Output array, upload matrix memory if need. +NpuMat getOutputMat(OutputArray dst, int rows, int cols, int type, AscendStream& stream); + +//! Sync output matrix to Output array, download matrix memory if need. +void syncOutput(const NpuMat& dst, OutputArray _dst, AscendStream& stream); + +/** + * @brief Choose Ascend npu device. + */ +CV_EXPORTS_W void setDevice(int device); + +/** + * @brief Clear all context created in current Ascend device. + */ +CV_EXPORTS_W void resetDevice(); + +/** + * @brief Get current Ascend device. + */ +CV_EXPORTS_W int32_t getDevice(); + +/** + * @brief init AscendCL. + */ +CV_EXPORTS_W void initAcl(); + +/** + * @brief finalize AscendCL. + * @note finalizeAcl only can be called once for a process. Call this function after all AscendCL + * options finished. + */ +CV_EXPORTS_W void finalizeAcl(); + +//! @} cann_init + +} // namespace cann +} // namespace cv + +#include "opencv2/cann.inl.hpp" + +#endif // OPENCV_CANNOPS_CANN_HPP diff --git a/modules/cannops/include/opencv2/cann.inl.hpp b/modules/cannops/include/opencv2/cann.inl.hpp new file mode 100644 index 00000000000..8529a51655b --- /dev/null +++ b/modules/cannops/include/opencv2/cann.inl.hpp @@ -0,0 +1,95 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CANNOPS_CANN_INL_HPP +#define OPENCV_CANNOPS_CANN_INL_HPP + +#include "opencv2/cann.hpp" + +namespace cv +{ +namespace cann +{ +inline NpuMat::NpuMat(NpuMat::Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), datastart(0), dataend(0), + allocator(allocator_) +{ +} + +inline NpuMat::NpuMat(int rows_, int cols_, int type_, NpuMat::Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), datastart(0), dataend(0), + allocator(allocator_) +{ + if (rows_ > 0 && cols_ > 0) + create(rows_, cols_, type_); +} + +inline NpuMat::NpuMat(Size size_, int type_, NpuMat::Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), datastart(0), dataend(0), + allocator(allocator_) +{ + if (size_.height > 0 && size_.width > 0) + create(size_.height, size_.width, type_); +} + +inline NpuMat::NpuMat(InputArray arr, AscendStream& stream, NpuMat::Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), datastart(0), dataend(0), + allocator(allocator_) +{ + upload(arr, stream); +} + +inline NpuMat::NpuMat(const NpuMat& m) + : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), + datastart(m.datastart), dataend(m.dataend), allocator(m.allocator) +{} + +inline NpuMat& NpuMat::operator=(const NpuMat& m) +{ + if (this != &m) + { + NpuMat temp(m); + swap(temp); + } + + return *this; +} + +inline void NpuMat::swap(NpuMat& b) +{ + std::swap(flags, b.flags); + std::swap(rows, b.rows); + std::swap(cols, b.cols); + std::swap(step, b.step); + std::swap(data, b.data); + std::swap(datastart, b.datastart); + std::swap(dataend, b.dataend); + std::swap(allocator, b.allocator); +} + +inline bool NpuMat::isContinuous() const { return (flags & Mat::CONTINUOUS_FLAG) != 0; } + +inline size_t NpuMat::elemSize() const { return CV_ELEM_SIZE(flags); } + +inline size_t NpuMat::elemSize1() const { return CV_ELEM_SIZE1(flags); } + +inline int NpuMat::type() const { return CV_MAT_TYPE(flags); } + +inline int NpuMat::depth() const { return CV_MAT_DEPTH(flags); } + +inline int NpuMat::channels() const { return CV_MAT_CN(flags); } + +inline size_t NpuMat::step1() const { return step / elemSize1(); } + +inline Size NpuMat::size() const { return Size(cols, rows); } + +inline bool NpuMat::empty() const { return data == 0; } + +inline AscendStream::AscendStream(const Ptr& impl) : impl_(impl) {} + +inline AscendEvent::AscendEvent(const Ptr& impl) : impl_(impl) {} +} // namespace cann +} // namespace cv + +#endif // OPENCV_CANNOPS_CANN_INL_HPP diff --git a/modules/cannops/include/opencv2/cann_call.hpp b/modules/cannops/include/opencv2/cann_call.hpp new file mode 100644 index 00000000000..e13e4c5c72a --- /dev/null +++ b/modules/cannops/include/opencv2/cann_call.hpp @@ -0,0 +1,134 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CANNOPS_CANN_CALL_HPP +#define OPENCV_CANNOPS_CANN_CALL_HPP + +#include +#include +#include +#include +#include "opencv2/cann.hpp" + +class aclopAttr; + +namespace cv +{ +namespace cann +{ +struct AclAttribute +{ + virtual ~AclAttribute() = default; + virtual void addAttr(aclopAttr* opAttr) = 0; +}; + +#define DEFINE_ATTR_DECLEAR(FUNC, TYPE) \ + class Acl##FUNC##Attribute : public AclAttribute \ + { \ + const char* name; \ + TYPE value; \ + \ + public: \ + Acl##FUNC##Attribute(const char* _name, TYPE _value) : name(_name), value(_value){}; \ + void addAttr(aclopAttr* opAttr) CV_OVERRIDE; \ + } + +#define DEFINE_ATTR_LIST_DECLEAR(FUNC, TYPE) \ + class AclList##FUNC##Attribute : public AclAttribute \ + { \ + const char* name; \ + TYPE value; \ + int num; \ + \ + public: \ + AclList##FUNC##Attribute(const char* _name, int _num, TYPE _value) \ + : name(_name), value(_value), num(_num){}; \ + void addAttr(aclopAttr* opAttr) CV_OVERRIDE; \ + } + +DEFINE_ATTR_DECLEAR(Float, float); +DEFINE_ATTR_DECLEAR(String, const char*); +DEFINE_ATTR_DECLEAR(Int, int); +DEFINE_ATTR_DECLEAR(Bool, bool); +DEFINE_ATTR_LIST_DECLEAR(Int, int64_t*); + +#undef DEFINE_ATTR_DECLEAR +#undef DEFINE_ATTR_LIST_DECLEAR + +class AscendStream::Impl +{ +public: + aclrtStream stream; + bool ownStream; + std::set> tensorHolders; + Impl(); + explicit Impl(aclrtStream stream); + void AddTensorHolder(const std::shared_ptr& tensorData); +}; +class AscendEvent::Impl +{ +public: + aclrtEvent event; + bool ownEvent; + + Impl(); + explicit Impl(aclrtEvent event); + ~Impl(); +}; +struct AscendTensor +{ + std::string name; + std::shared_ptr data; + size_t dataSize; + std::vector dims; + aclDataType dtype; + aclFormat format; + AscendTensor(){}; + AscendTensor(std::shared_ptr _data, size_t _dataSize, int64_t* _dims, size_t _dimSize, + aclDataType _dtype, std::string _name = "", aclFormat _format = ACL_FORMAT_ND); + AscendTensor(std::shared_ptr _data, size_t _dataSize, std::vector& _dims, + aclDataType _dtype, std::string _name = "", aclFormat _format = ACL_FORMAT_ND) + : name(_name), data(_data), dataSize(_dataSize), dims(_dims), dtype(_dtype), + format(_format){}; + AscendTensor(const NpuMat& npuMat, std::string _name = "", aclFormat format = ACL_FORMAT_ND); +}; +void aclrtMallocWarpper(void** data, size_t size); +void aclrtFreeWarpper(void* data); + +void aclrtMemcpyWarpper(std::shared_ptr& dst, size_t offset, const void* src, size_t size, + AscendStream& stream); +void aclrtMemcpyWarpper(void* dst, const std::shared_ptr& src, size_t offset, size_t size, + AscendStream& stream); +void aclrtMemcpyWarpper(std::shared_ptr& dst, size_t dstOffset, + const std::shared_ptr& src, size_t srcOffset, size_t size, + AscendStream& stream); +void aclrtMemcpy2dWarpper(std::shared_ptr& dst, size_t offset, size_t dpitch, + const void* src, size_t spitch, size_t width, size_t length, + AscendStream& stream); +void aclrtMemcpy2dWarpper(void* dst, size_t dpitch, const std::shared_ptr& src, + size_t offset, size_t spitch, size_t width, size_t length, + AscendStream& stream); +void aclrtMemsetWarpper(std::shared_ptr& ptr, int32_t value, size_t count, + AscendStream& stream); + +static std::vector emptyattr; +void callAscendOperator(const char* op, std::vector& srcs, + std::vector& dsts, AscendStream& stream, + std::vector& attrs = emptyattr); +void callAscendOperator(const NpuMat& src, NpuMat& dst, const char* op, AscendStream& stream, + std::vector& attrs = emptyattr); +void callAscendOperator(const NpuMat& src1, const NpuMat& src2, NpuMat& dst, const char* op, + AscendStream& stream, std::vector& attrs = emptyattr); +void callAscendOperator(const NpuMat* srcs, size_t srcCount, NpuMat& dst, const char* op, + AscendStream& stream, std::vector& attrs = emptyattr); +void callAscendOperator(const NpuMat& src, const Scalar& sc, bool inv, NpuMat& dst, const char* op, + AscendStream& stream, std::vector& attrs = emptyattr); +void callAscendOperator(const NpuMat& src, NpuMat* dsts, const size_t dstCount, const char* op, + AscendStream& stream, std::vector& attrs = emptyattr); +std::shared_ptr mallocAndUpload(void* data, size_t size, AscendStream& stream, + NpuMat::Allocator* allocator = NpuMat::defaultAllocator()); +} // namespace cann +} // namespace cv + +#endif // OPENCV_CANNOPS_CANN_CALL_HPP diff --git a/modules/cannops/include/opencv2/cann_interface.hpp b/modules/cannops/include/opencv2/cann_interface.hpp new file mode 100644 index 00000000000..5d3d75096f1 --- /dev/null +++ b/modules/cannops/include/opencv2/cann_interface.hpp @@ -0,0 +1,333 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CANNOPS_CANN_INTERFACE_HPP +#define OPENCV_CANNOPS_CANN_INTERFACE_HPP + +#include "opencv2/cann.hpp" + +namespace cv +{ +namespace cann +{ + +/** + @addtogroup cann + @{ + @defgroup cannops Operations for Ascend Backend. + @{ + @defgroup cannops_elem Per-element Operations + @defgroup cannops_core Core Operations on Matrices + @defgroup cannimgproc Image Processing + @} + @} + */ + +//! @addtogroup cannops_elem +//! @{ + +/** @brief Computes a matrix-matrix or matrix-scalar sum. + * @param src1 First source matrix or scalar. + * @param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . + * @param dst Destination matrix that has the same size and number of channels as the input + * array(s). The depth is defined by dtype or src1 depth. + * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the + * destination array to be changed. The mask can be used only with single channel images. + * @param dtype Optional depth of the output array. + * @param stream AscendStream for the asynchronous version. + * @sa cv::add cuda::add + */ +CV_EXPORTS_W void add(InputArray src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), int dtype = -1, + AscendStream& stream = AscendStream::Null()); +// This code should not be compiled nor analyzed by doxygen. This interface only for python binding +// code generation. add(InputArray, InputArray ...) can accept Scalar as its parametr.(Scalar -> Mat +// -> InputArray) +#ifdef NEVER_DEFINED +CV_EXPORTS_W void add(InputArray src1, Scalar src2, OutputArray dst, InputArray mask = noArray(), + int dtype = -1, AscendStream& stream = AscendStream::Null()); +CV_EXPORTS_W void add(Scalar src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), + int dtype = -1, AscendStream& stream = AscendStream::Null()); +#endif + +/** @brief Computes a matrix-matrix or matrix-scalar difference. + * @param src1 First source matrix or scalar. + * @param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . + * @param dst Destination matrix that has the same size and number of channels as the input + * array(s). The depth is defined by dtype or src1 depth. + * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the + * destination array to be changed. The mask can be used only with single channel images. + * @param dtype Optional depth of the output array. + * @param stream AscendStream for the asynchronous version. + * @sa cv::subtract cuda::subtract + */ +CV_EXPORTS_W void subtract(InputArray src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), int dtype = -1, + AscendStream& stream = AscendStream::Null()); +#ifdef NEVER_DEFINED +CV_EXPORTS_W void subtract(InputArray src1, Scalar src2, OutputArray dst, + InputArray mask = noArray(), int dtype = -1, + AscendStream& stream = AscendStream::Null()); +CV_EXPORTS_W void subtract(Scalar src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), int dtype = -1, + AscendStream& stream = AscendStream::Null()); +#endif + +/** @brief Computes a matrix-matrix or matrix-scalar per-element product. + * @param src1 First source matrix or scalar. + * @param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . + * @param dst Destination matrix that has the same size and number of channels as the input + * array(s). The depth is defined by dtype or src1 depth. + * @param scale Optional scale factor. + * @param dtype Optional depth of the output array. + * @param stream AscendStream for the asynchronous version. + * @sa cv::multiply cuda::multiply + */ +CV_EXPORTS_W void multiply(InputArray src1, InputArray src2, OutputArray dst, float scale = 1, + int dtype = -1, AscendStream& stream = AscendStream::Null()); +#ifdef NEVER_DEFINED +CV_EXPORTS_W void multiply(InputArray src1, Scalar src2, OutputArray dst, float scale = 1, + int dtype = -1, AscendStream& stream = AscendStream::Null()); +CV_EXPORTS_W void multiply(Scalar src1, InputArray src2, OutputArray dst, float scale = 1, + int dtype = -1, AscendStream& stream = AscendStream::Null()); +#endif + +/** @brief Computes a matrix-matrix or matrix-scalar division. + * @param src1 First source matrix or scalar. + * @param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . + * @param dst Destination matrix that has the same size and number of channels as the input + * array(s). The depth is defined by dtype or src1 depth. + * @param scale Optional scale factor. + * @param dtype Optional depth of the output array. + * @param stream AscendStream for the asynchronous version. + * @sa cv::divide cuda::divide + */ +CV_EXPORTS_W void divide(InputArray src1, InputArray src2, OutputArray dst, float scale = 1, + int dtype = -1, AscendStream& stream = AscendStream::Null()); +#ifdef NEVER_DEFINED +CV_EXPORTS_W void divide(InputArray src1, Scalar src2, OutputArray dst, float scale = 1, int dtype = -1, + AscendStream& stream = AscendStream::Null()); +CV_EXPORTS_W void divide(Scalar src1, InputArray src2, OutputArray dst, float scale = 1, int dtype = -1, + AscendStream& stream = AscendStream::Null()); +#endif + +/** @brief Performs a per-element bitwise conjunction of two matrices (or of matrix and scalar). + * @param src1 First source matrix or scalar. + * @param src2 Second source matrix or scalar. + * @param dst Destination matrix that has the same size and number of channels as the input + * array(s). The depth is defined by dtype or src1 depth. + * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the + * destination array to be changed. The mask can be used only with single channel images. + * @param stream AscendStream for the asynchronous version. + * @sa cv::bitwise_and cuda::bitwise_and + */ +CV_EXPORTS_W void bitwise_and(InputArray src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), + AscendStream& stream = AscendStream::Null()); +#ifdef NEVER_DEFINED +CV_EXPORTS_W void bitwise_and(InputArray src1, Scalar src2, OutputArray dst, + InputArray mask = noArray(), + AscendStream& stream = AscendStream::Null()); +CV_EXPORTS_W void bitwise_and(Scalar src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), + AscendStream& stream = AscendStream::Null()); +#endif + +/** @brief Performs a per-element bitwise disjunction of two matrices (or of matrix and scalar). + * @param src1 First source matrix or scalar. + * @param src2 Second source matrix or scalar. + * @param dst Destination matrix that has the same size and number of channels as the input + * array(s). The depth is defined by dtype or src1 depth. + * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the + * destination array to be changed. The mask can be used only with single channel images. + * @param stream AscendStream for the asynchronous version. + * @sa cv::bitwise_or cuda::bitwise_or + */ +CV_EXPORTS_W void bitwise_or(InputArray src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), + AscendStream& stream = AscendStream::Null()); +#ifdef NEVER_DEFINED +CV_EXPORTS_W void bitwise_or(InputArray src1, Scalar src2, OutputArray dst, + InputArray mask = noArray(), + AscendStream& stream = AscendStream::Null()); +CV_EXPORTS_W void bitwise_or(Scalar src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), + AscendStream& stream = AscendStream::Null()); +#endif + +/** @brief Performs a per-element bitwise exclusive or operation of two matrices (or of matrix and + * scalar). + * @param src1 First source matrix or scalar. + * @param src2 Second source matrix or scalar. + * @param dst Destination matrix that has the same size and number of channels as the input + * array(s). The depth is defined by dtype or src1 depth. + * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the + * destination array to be changed. The mask can be used only with single channel images. + * @param stream AscendStream for the asynchronous version. + * @sa cv::bitwise_xor cuda::bitwise_xor + */ +CV_EXPORTS_W void bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), + AscendStream& stream = AscendStream::Null()); +#ifdef NEVER_DEFINED +CV_EXPORTS_W void bitwise_xor(InputArray src1, Scalar src2, OutputArray dst, + InputArray mask = noArray(), + AscendStream& stream = AscendStream::Null()); +CV_EXPORTS_W void bitwise_xor(Scalar src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), + AscendStream& stream = AscendStream::Null()); +#endif + +/** @brief Performs a per-element bitwise inversion. + * @param src First source matrix. + * @param dst Destination matrix that has the same size and number of channels as the input + * array(s). The depth is defined by dtype or src1 depth. + * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the + * destination array to be changed. The mask can be used only with single channel images. + * @param stream AscendStream for the asynchronous version. + * @sa cv::bitwise_not cuda::bitwise_not + */ +CV_EXPORTS_W void bitwise_not(InputArray src, OutputArray dst, InputArray mask = noArray(), + AscendStream& stream = AscendStream::Null()); + +/** @brief Computes the weighted sum of two arrays. + +@param src1 First source array. +@param alpha Weight for the first array elements. +@param src2 Second source array of the same size and channel number as src1 . +@param beta Weight for the second array elements. +@param dst Destination array that has the same size and number of channels as the input arrays. +@param gamma Scalar added to each sum. +@param dtype Optional depth of the destination array. When both input arrays have the same depth, +dtype can be set to -1, which will be equivalent to src1.depth(). +@param stream Stream for the asynchronous version. + +The function addWeighted calculates the weighted sum of two arrays as follows: + +\f[\texttt{dst} (I)= \texttt{saturate} ( \texttt{src1} (I)* \texttt{alpha} + \texttt{src2} (I)* +\texttt{beta} + \texttt{gamma} )\f] + +where I is a multi-dimensional index of array elements. In case of multi-channel arrays, each +channel is processed independently. + +@sa cv::addWeighted cv::cuda::addWeighted + */ +CV_EXPORTS_W void addWeighted(InputArray src1, double alpha, InputArray src2, double beta, + double gamma, OutputArray dst, int dtype = -1, + AscendStream& stream = AscendStream::Null()); + +/** @brief Applies a fixed-level threshold to each array element. + +@param src Source array (single-channel). +@param dst Destination array with the same size and type as src . +@param thresh Threshold value. +@param maxval Maximum value to use with THRESH_BINARY and THRESH_BINARY_INV threshold types. +@param type Threshold type. For details, see threshold . The THRESH_MASK, THRESH_OTSU and +THRESH_TRIANGLE threshold types are not supported. +@param stream AscendStream for the asynchronous version. + +@sa cv::threshold cv::cuda::threshold +*/ +CV_EXPORTS_W double threshold(InputArray src, OutputArray dst, double thresh, double maxval, + int type, AscendStream& stream = AscendStream::Null()); + +//! @} cannops_elem + +//! @addtogroup cannops_core +//! @{ + +/** @brief Makes a multi-channel matrix out of several single-channel matrices. + +@param src Array/vector of source matrices. +@param n Number of source matrices. +@param dst Destination matrix. +@param stream AscendStream for the asynchronous version. + +@sa cv::merge cv::cuda::merge + */ +CV_EXPORTS void merge(const NpuMat* src, size_t n, OutputArray dst, + AscendStream& stream = AscendStream::Null()); +/** @overload */ +CV_EXPORTS_W void merge(const std::vector& src, OutputArray dst, + AscendStream& stream = AscendStream::Null()); + +/** @brief Copies each plane of a multi-channel matrix into an array. + +@param src Source matrix. +@param dst Destination array/vector of single-channel matrices. +@param stream AscendStream for the asynchronous version. + +@sa cv::split cv::cuda::split + */ +CV_EXPORTS void split(InputArray src, NpuMat* dst, AscendStream& stream = AscendStream::Null()); +/** @overload */ +CV_EXPORTS_W void split(InputArray src, CV_OUT std::vector& dst, + AscendStream& stream = AscendStream::Null()); + +/** @brief Transposes a matrix. + +@param src Source matrix. +@param dst Destination matrix. +@param stream AscendStream for the asynchronous version. + +@sa cv::transpose cv::cuda::transpose + */ +CV_EXPORTS_W void transpose(InputArray src, OutputArray dst, + AscendStream& stream = AscendStream::Null()); + +/** @brief Flips a 2D matrix around vertical, horizontal, or both axes. + +@param src Source matrix. +@param dst Destination matrix. +@param flipCode Flip mode for the source: +- 0 Flips around x-axis. +- \> 0 Flips around y-axis. +- \< 0 Flips around both axes. +@param stream AscendStream for the asynchronous version. + +@sa cv::flip cv::cuda::flip + */ +CV_EXPORTS_W void flip(InputArray src, OutputArray dst, int flipCode, + AscendStream& stream = AscendStream::Null()); + +/** @brief Rotates a 2D array in multiples of 90 degrees. +The function cv::rotate rotates the array in one of three different ways: +* Rotate by 90 degrees clockwise (rotateCode = ROTATE_90_CLOCKWISE). +* Rotate by 180 degrees clockwise (rotateCode = ROTATE_180). +* Rotate by 270 degrees clockwise (rotateCode = ROTATE_90_COUNTERCLOCKWISE). +@param src input array. +@param dst output array of the same type as src. The size is the same with ROTATE_180, +and the rows and cols are switched for ROTATE_90_CLOCKWISE and ROTATE_90_COUNTERCLOCKWISE. +@param rotateCode an enum to specify how to rotate the array; see the enum #RotateFlags +@param stream AscendStream for the asynchronous version. + +@sa cv::rotate +*/ +CV_EXPORTS_W void rotate(InputArray src, OutputArray dst, int rotateCode, + AscendStream& stream = AscendStream::Null()); +//! @} cannops_core + +//! @addtogroup cannimgproc +//! @{ + +/** @brief Converts an image from one color space to another. + +@param src Source image with CV_8U , CV_16U , or CV_32F depth and 1, 3, or 4 channels. +@param dst Destination image. +@param code Color space conversion code. For details, see cvtColor . +@param dstCn Number of channels in the destination image. If the parameter is 0, the number of the +channels is derived automatically from src and the code . +@param stream AscendStream for the asynchronous version. + +@sa cv::cvtColor cv::cuda::cvtColor + */ +CV_EXPORTS_W void cvtColor(InputArray src, OutputArray dst, int code, int dstCn = 0, + AscendStream& stream = AscendStream::Null()); + +//! @} cannimgproc + +} // namespace cann +} // namespace cv + +#endif // OPENCV_CANNOPS_CANN_INTERFACE_HPP diff --git a/modules/cannops/include/opencv2/cann_private.hpp b/modules/cannops/include/opencv2/cann_private.hpp new file mode 100644 index 00000000000..9609b0d883d --- /dev/null +++ b/modules/cannops/include/opencv2/cann_private.hpp @@ -0,0 +1,27 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CANNOPS_CANN_PRIVATE_HPP +#define OPENCV_CANNOPS_CANN_PRIVATE_HPP +#include "opencv2/cann.hpp" + +namespace cv +{ +namespace cann +{ +void adds(const NpuMat& arr, float scalar, NpuMat& dst, AscendStream& stream); +void muls(const NpuMat& arr, float scalar, NpuMat& dst, AscendStream& stream); +void transData(const NpuMat& src, NpuMat& dst, const char* from, const char* to, + AscendStream& stream); +void transpose(const NpuMat& src, int64_t* perm, NpuMat& dst, AscendStream& stream); +void flip(const NpuMat& src, std::vector& asixs, NpuMat& dst, AscendStream& stream); +void merge(const NpuMat* src, size_t n, NpuMat& dst, AscendStream& stream); +void split(const NpuMat& src, NpuMat* dst, AscendStream& stream); + +double threshold(NpuMat& src, NpuMat& dst, double thresh, double maxval, int type, + AscendStream& stream); +} // namespace cann +} // namespace cv + +#endif // OPENCV_CANNOPS_CANN_PRIVATE_HPP \ No newline at end of file diff --git a/modules/cannops/include/opencv2/stream_accessor.hpp b/modules/cannops/include/opencv2/stream_accessor.hpp new file mode 100644 index 00000000000..ff64d7dcbc0 --- /dev/null +++ b/modules/cannops/include/opencv2/stream_accessor.hpp @@ -0,0 +1,39 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CANNOPS_STREAM_ACCESSOR_HPP +#define OPENCV_CANNOPS_STREAM_ACCESSOR_HPP + +#include +#include "opencv2/cann.hpp" + +namespace cv +{ +namespace cann +{ +//! @addtogroup cann_struct +//! @{ + +/** @brief Class that enables getting aclrtAscendStream from cann::AscendStream + */ +struct AscendStreamAccessor +{ + CV_EXPORTS static aclrtStream getStream(const AscendStream& stream); + CV_EXPORTS static AscendStream wrapStream(aclrtStream stream); +}; + +/** @brief Class that enables getting aclrtAscendEvent from cann::AscendEvent + */ +struct AscendEventAccessor +{ + CV_EXPORTS static aclrtEvent getEvent(const AscendEvent& event); + CV_EXPORTS static AscendEvent wrapEvent(aclrtEvent event); +}; + +//! @} cann_struct + +} // namespace cann +} // namespace cv + +#endif // OPENCV_CANNOPS_STREAM_ACCESSOR_HPP diff --git a/modules/cannops/misc/python/pyopencv_cann.hpp b/modules/cannops/misc/python/pyopencv_cann.hpp new file mode 100644 index 00000000000..ed9a5cd08fc --- /dev/null +++ b/modules/cannops/misc/python/pyopencv_cann.hpp @@ -0,0 +1,28 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CANNOPS_PYOPENCV_CANN_HPP +#define OPENCV_CANNOPS_PYOPENCV_CANN_HPP + +#ifdef HAVE_OPENCV_CORE + +#include "opencv2/cann.hpp" + +typedef std::vector vector_NpuMat; +typedef cann::NpuMat::Allocator NpuMat_Allocator; + +CV_PY_TO_CLASS(cann::NpuMat); +CV_PY_TO_CLASS(cann::AscendStream); + +CV_PY_TO_CLASS_PTR(cann::NpuMat); +CV_PY_TO_CLASS_PTR(cann::NpuMat::Allocator); + +CV_PY_FROM_CLASS(cann::NpuMat); +CV_PY_FROM_CLASS(cann::AscendStream); + +CV_PY_FROM_CLASS_PTR(cann::NpuMat::Allocator); + +#endif // HAVE_OPENCV_CORE + +#endif // OPENCV_CANNOPS_PYOPENCV_CANN_HPP diff --git a/modules/cannops/misc/python/test/test_cannops.py b/modules/cannops/misc/python/test/test_cannops.py new file mode 100644 index 00000000000..fc69ba3e166 --- /dev/null +++ b/modules/cannops/misc/python/test/test_cannops.py @@ -0,0 +1,154 @@ +# This file is part of OpenCV project. +# It is subject to the license terms in the LICENSE file found in the top-level directory +# of this distribution and at http://opencv.org/license.html. + +from tests_common import NewOpenCVTests +import cv2 as cv +import numpy as np + + +def genMask(mask, listx, listy): + for row in range(mask.shape[0]): + for col in range(mask.shape[1]): + if (row in listx and col in listx) or (row in listy and col in listy): + mask[row][col] = 1 + mask = mask.astype(np.uint8) + return mask + + +mask = np.zeros((5, 5)) +listx = [0, 1] +listy = [1, 2] +mask = genMask(mask, listx, listy) + + +class cannop_test(NewOpenCVTests): + def test_ascend(self): + cv.cann.initAcl() + cv.cann.getDevice() + cv.cann.setDevice(0) + stream = cv.cann.AscendStream_Null() + cv.cann.wrapStream(id(stream)) + cv.cann.resetDevice() + + def test_arithmetic(self): + npMat1 = np.random.random((5, 5, 3)).astype(int) + npMat2 = np.random.random((5, 5, 3)).astype(int) + cv.cann.setDevice(0) + + self.assertTrue(np.allclose(cv.cann.add( + npMat1, npMat2), cv.add(npMat1, npMat2))) + self.assertTrue(np.allclose(cv.cann.subtract( + npMat1, npMat2), cv.subtract(npMat1, npMat2))) + self.assertTrue(np.allclose(cv.cann.multiply( + npMat1, npMat2, scale=2), cv.multiply(npMat1, npMat2, scale=2))) + self.assertTrue(np.allclose(cv.cann.divide( + npMat1, npMat2, scale=2), cv.divide(npMat1, npMat2, scale=2))) + + # mask + self.assertTrue(np.allclose(cv.cann.add( + npMat1, npMat2, mask=mask), cv.add(npMat1, npMat2, mask=mask))) + self.assertTrue(np.allclose(cv.cann.subtract( + npMat1, npMat2, mask=mask), cv.subtract(npMat1, npMat2, mask=mask))) + self.assertTrue(np.allclose(cv.cann.multiply(npMat1, npMat2, scale=2), + cv.multiply(npMat1, npMat2, scale=2))) + self.assertTrue(np.allclose(cv.cann.divide(npMat1, npMat2, scale=2), + cv.divide(npMat1, npMat2, scale=2))) + self.assertTrue(np.allclose(cv.cann.addWeighted(npMat1, 2, npMat2, 4, 3), + cv.addWeighted(npMat1, 2, npMat2, 4, 3))) + + # stream + stream = cv.cann.AscendStream() + matDst = cv.cann.add(npMat1, npMat2, stream=stream) + stream.waitForCompletion() + self.assertTrue(np.allclose(matDst, cv.add(npMat1, npMat2))) + matDst = cv.cann.add(npMat1, npMat2, mask=mask, stream=stream) + stream.waitForCompletion() + self.assertTrue(np.allclose(matDst, cv.add(npMat1, npMat2, mask=mask))) + matDst = cv.cann.subtract(npMat1, npMat2, mask=mask, stream=stream) + stream.waitForCompletion() + self.assertTrue(np.allclose( + matDst, cv.subtract(npMat1, npMat2, mask=mask))) + + cv.cann.resetDevice() + + def test_logical(self): + npMat1 = np.random.random((5, 5, 3)).astype(np.uint16) + npMat2 = np.random.random((5, 5, 3)).astype(np.uint16) + cv.cann.setDevice(0) + + self.assertTrue(np.allclose(cv.cann.bitwise_or(npMat1, npMat2), + cv.bitwise_or(npMat1, npMat2))) + self.assertTrue(np.allclose(cv.cann.bitwise_or( + npMat1, npMat2), cv.bitwise_or(npMat1, npMat2))) + self.assertTrue(np.allclose(cv.cann.bitwise_and(npMat1, npMat2), + cv.bitwise_and(npMat1, npMat2))) + self.assertTrue(np.allclose(cv.cann.bitwise_and( + npMat1, npMat2), cv.bitwise_and(npMat1, npMat2))) + self.assertTrue(np.allclose(cv.cann.bitwise_xor(npMat1, npMat2), + cv.bitwise_xor(npMat1, npMat2))) + self.assertTrue(np.allclose(cv.cann.bitwise_xor( + npMat1, npMat2), cv.bitwise_xor(npMat1, npMat2))) + self.assertTrue(np.allclose(cv.cann.bitwise_not(npMat1), + cv.bitwise_not(npMat1))) + self.assertTrue(np.allclose( + cv.cann.bitwise_not(npMat1), cv.bitwise_not(npMat1))) + self.assertTrue(np.allclose(cv.cann.bitwise_and(npMat1, npMat2, mask=mask), + cv.bitwise_and(npMat1, npMat2, mask=mask))) + self.assertTrue(np.allclose(cv.cann.bitwise_or(npMat1, npMat2, mask=mask), + cv.bitwise_or(npMat1, npMat2, mask=mask))) + self.assertTrue(np.allclose(cv.cann.bitwise_not(npMat1, mask=mask), + cv.bitwise_not(npMat1, mask=mask))) + self.assertTrue(np.allclose(cv.cann.bitwise_xor(npMat1, npMat2, mask=mask), + cv.bitwise_xor(npMat1, npMat2, mask=mask))) + cv.cann.resetDevice() + + def test_imgproc(self): + npMat = (np.random.random((128, 128, 3)) * 255).astype(np.uint8) + cv.cann.setDevice(0) + + self.assertTrue(np.allclose( + cv.cann.merge(cv.cann.split(npMat)), npMat)) + + self.assertTrue(np.allclose( + cv.cann.transpose(npMat), cv.transpose(npMat))) + + flipMode = [0, 1, -1] + for fMode in flipMode: + self.assertTrue(np.allclose(cv.cann.flip( + npMat, fMode), cv.flip(npMat, fMode))) + + rotateMode = [0, 1, 2] + for rMode in rotateMode: + self.assertTrue(np.allclose(cv.cann.rotate( + npMat, rMode), cv.rotate(npMat, rMode))) + + cvtModeC1 = [cv.COLOR_GRAY2BGR, cv.COLOR_GRAY2BGRA] + cvtModeC3 = [cv.COLOR_BGR2GRAY, cv.COLOR_BGRA2BGR, cv.COLOR_BGR2RGBA, cv.COLOR_RGBA2BGR, + cv.COLOR_BGR2RGB, cv.COLOR_BGRA2RGBA, cv.COLOR_RGB2GRAY, cv.COLOR_BGRA2GRAY, + cv.COLOR_RGBA2GRAY, cv.COLOR_BGR2BGRA, cv.COLOR_BGR2YUV, cv.COLOR_RGB2YUV, + cv.COLOR_YUV2BGR, cv.COLOR_YUV2RGB, cv.COLOR_BGR2YCrCb, cv.COLOR_RGB2YCrCb, + cv.COLOR_YCrCb2BGR, cv.COLOR_YCrCb2RGB, cv.COLOR_BGR2XYZ, cv.COLOR_RGB2XYZ, + cv.COLOR_XYZ2BGR, cv.COLOR_XYZ2RGB,] + for cvtM in cvtModeC3: + self.assertTrue(np.allclose(cv.cann.cvtColor( + npMat, cvtM), cv.cvtColor(npMat, cvtM), 1)) + npMatC1 = (np.random.random((128, 128, 1)) * 255).astype(np.uint8) + for cvtM in cvtModeC1: + self.assertTrue(np.allclose(cv.cann.cvtColor( + npMatC1, cvtM), cv.cvtColor(npMatC1, cvtM), 1)) + + threshType = [cv.THRESH_BINARY, cv.THRESH_BINARY_INV, + cv.THRESH_TRUNC, cv.THRESH_TOZERO, cv.THRESH_TOZERO_INV] + for tType in threshType: + cvRet, cvThresh = cv.threshold( + npMat.astype(np.uint8), 127, 255, tType) + cannRet, cannThresh = cv.cann.threshold( + npMat.astype(np.float32), 127, 255, tType) + self.assertTrue(np.allclose(cvThresh, cannThresh)) + self.assertTrue(np.allclose(cvRet, cannRet)) + cv.cann.resetDevice() + + +if __name__ == '__main__': + NewOpenCVTests.bootstrap() diff --git a/modules/cannops/perf/perf_core.cpp b/modules/cannops/perf/perf_core.cpp new file mode 100644 index 00000000000..0280da36a22 --- /dev/null +++ b/modules/cannops/perf/perf_core.cpp @@ -0,0 +1,150 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "perf_precomp.hpp" +#include "opencv2/cann_interface.hpp" + +namespace opencv_test +{ +namespace +{ +#define TYPICAL_NPU_MAT_SIZES \ + Values(::perf::sz1080p, ::perf::sz2K, ::perf::sz2160p, ::perf::sz4320p) +#define DEF_PARAM_TEST(name, ...) \ + typedef ::perf::TestBaseWithParam> name + +DEF_PARAM_TEST(NPU, Size); +DEF_PARAM_TEST(CPU, Size); + +PERF_TEST_P(NPU, MERGE, TYPICAL_NPU_MAT_SIZES) +{ + Mat mat(GET_PARAM(0), CV_8UC1); + Mat dst; + declare.in(mat, WARMUP_RNG); + cv::cann::setDevice(DEVICE_ID); + NpuMat npuMat[3]; + npuMat[0].upload(mat); + npuMat[1].upload(mat); + npuMat[2].upload(mat); + + TEST_CYCLE() { cv::cann::merge(&npuMat[0], 3, dst); } + cv::cann::resetDevice(); + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(CPU, MERGE, TYPICAL_NPU_MAT_SIZES) +{ + Mat mat(GET_PARAM(0), CV_8UC1); + Mat dst; + declare.in(mat, WARMUP_RNG); + Mat mats[3] = {mat, mat, mat}; + TEST_CYCLE() { cv::merge(&mats[0], 3, dst); } + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(NPU, SPLIT, TYPICAL_NPU_MAT_SIZES) +{ + Mat mat(GET_PARAM(0), CV_8UC3); + declare.in(mat, WARMUP_RNG); + cv::cann::setDevice(DEVICE_ID); + NpuMat npuMat[3]; + + TEST_CYCLE() { cv::cann::split(mat, &npuMat[0]); } + cv::cann::resetDevice(); + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(CPU, SPLIT, TYPICAL_NPU_MAT_SIZES) +{ + Mat mat(GET_PARAM(0), CV_8UC3); + declare.in(mat, WARMUP_RNG); + Mat mats[3] = {mat, mat, mat}; + TEST_CYCLE() { cv::split(mat, &mats[0]); } + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(NPU, TRANSPOSE, TYPICAL_NPU_MAT_SIZES) +{ + Mat mat(GET_PARAM(0), CV_8UC3); + Mat dst; + declare.in(mat, WARMUP_RNG); + cv::cann::setDevice(DEVICE_ID); + TEST_CYCLE() { cv::cann::transpose(mat, dst); } + cv::cann::resetDevice(); + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(CPU, TRANSPOSE, TYPICAL_NPU_MAT_SIZES) +{ + Mat mat(GET_PARAM(0), CV_8UC3); + Mat dst; + declare.in(mat, WARMUP_RNG); + TEST_CYCLE() { cv::transpose(mat, dst); } + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(NPU, FLIP, TYPICAL_NPU_MAT_SIZES) +{ + Mat mat(GET_PARAM(0), CV_8UC3); + Mat dst; + declare.in(mat, WARMUP_RNG); + cv::cann::setDevice(DEVICE_ID); + TEST_CYCLE() { cv::cann::flip(mat, dst, -1); } + cv::cann::resetDevice(); + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(CPU, FLIP, TYPICAL_NPU_MAT_SIZES) +{ + Mat mat(GET_PARAM(0), CV_8UC3); + Mat dst; + declare.in(mat, WARMUP_RNG); + TEST_CYCLE() { cv::flip(mat, dst, -1); } + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(NPU, ROTATE, TYPICAL_NPU_MAT_SIZES) +{ + Mat mat(GET_PARAM(0), CV_8UC3); + Mat dst; + declare.in(mat, WARMUP_RNG); + cv::cann::setDevice(DEVICE_ID); + TEST_CYCLE() { cv::cann::rotate(mat, dst, 1); } + cv::cann::resetDevice(); + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(CPU, ROTATE, TYPICAL_NPU_MAT_SIZES) +{ + Mat mat(GET_PARAM(0), CV_8UC3); + Mat dst; + declare.in(mat, WARMUP_RNG); + TEST_CYCLE() { cv::rotate(mat, dst, 1); } + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(NPU, CROP, TYPICAL_NPU_MAT_SIZES) +{ + Mat mat(GET_PARAM(0), CV_8UC3); + Mat dst; + declare.in(mat, WARMUP_RNG); + Rect b(1, 2, 4, 4); + cv::cann::setDevice(DEVICE_ID); + TEST_CYCLE() { NpuMat cropped_cann(mat, b); } + cv::cann::resetDevice(); + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(CPU, CROP, TYPICAL_NPU_MAT_SIZES) +{ + Mat mat(GET_PARAM(0), CV_8UC3); + Mat dst; + declare.in(mat, WARMUP_RNG); + Rect b(1, 2, 4, 4); + TEST_CYCLE() { Mat cropped_cv(mat, b); } + SANITY_CHECK_NOTHING(); +} + +} // namespace +} // namespace opencv_test diff --git a/modules/cannops/perf/perf_cvtcolor.cpp b/modules/cannops/perf/perf_cvtcolor.cpp new file mode 100644 index 00000000000..dc10d71aa66 --- /dev/null +++ b/modules/cannops/perf/perf_cvtcolor.cpp @@ -0,0 +1,69 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "perf_precomp.hpp" +#include "opencv2/cann_interface.hpp" + +namespace opencv_test +{ +namespace +{ + +#define CVT_COLORS_3 \ + Values(COLOR_BGR2BGRA, COLOR_BGRA2BGR, COLOR_BGR2RGBA, COLOR_RGBA2BGR, COLOR_BGR2RGB, \ + COLOR_BGRA2RGBA, COLOR_BGR2GRAY, COLOR_BGRA2GRAY, COLOR_RGBA2GRAY, COLOR_BGR2XYZ, \ + COLOR_RGB2XYZ, COLOR_XYZ2BGR, COLOR_XYZ2RGB, COLOR_BGR2YCrCb, COLOR_RGB2YCrCb, \ + COLOR_YCrCb2BGR, COLOR_YCrCb2RGB, COLOR_BGR2YUV, COLOR_RGB2YUV, COLOR_YUV2BGR, \ + COLOR_YUV2RGB) +#define CVT_COLORS_1 Values(COLOR_GRAY2BGR, COLOR_GRAY2BGRA) +#define TYPICAL_NPU_MAT_SIZES \ + Values(::perf::sz1080p, ::perf::sz2K, ::perf::sz2160p, ::perf::sz4320p) +#define DEF_PARAM_TEST(name, ...) \ + typedef ::perf::TestBaseWithParam> name + +DEF_PARAM_TEST(NPU, Size, ColorConversionCodes); +DEF_PARAM_TEST(CPU, Size, ColorConversionCodes); + +PERF_TEST_P(NPU, CVT_COLOR_3, testing::Combine(TYPICAL_NPU_MAT_SIZES, CVT_COLORS_3)) +{ + Mat mat(GET_PARAM(0), CV_32FC3); + Mat dst; + declare.in(mat, WARMUP_RNG); + cv::cann::setDevice(DEVICE_ID); + TEST_CYCLE() { cv::cann::cvtColor(mat, dst, GET_PARAM(1)); } + cv::cann::resetDevice(); + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(CPU, CVT_COLOR_3, testing::Combine(TYPICAL_NPU_MAT_SIZES, CVT_COLORS_3)) +{ + Mat mat(GET_PARAM(0), CV_32FC3); + Mat dst; + declare.in(mat, WARMUP_RNG); + TEST_CYCLE() { cv::cvtColor(mat, dst, GET_PARAM(1)); } + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(NPU, CVT_COLOR_1, testing::Combine(TYPICAL_NPU_MAT_SIZES, CVT_COLORS_1)) +{ + Mat mat(GET_PARAM(0), CV_32FC1); + Mat dst; + declare.in(mat, WARMUP_RNG); + cv::cann::setDevice(DEVICE_ID); + TEST_CYCLE() { cv::cann::cvtColor(mat, dst, GET_PARAM(1)); } + cv::cann::resetDevice(); + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(CPU, CVT_COLOR_1, testing::Combine(TYPICAL_NPU_MAT_SIZES, CVT_COLORS_1)) +{ + Mat mat(GET_PARAM(0), CV_32FC1); + Mat dst; + declare.in(mat, WARMUP_RNG); + TEST_CYCLE() { cv::cvtColor(mat, dst, GET_PARAM(1)); } + SANITY_CHECK_NOTHING(); +} + +} // namespace +} // namespace opencv_test diff --git a/modules/cannops/perf/perf_element_operations.cpp b/modules/cannops/perf/perf_element_operations.cpp new file mode 100644 index 00000000000..d7d5947d661 --- /dev/null +++ b/modules/cannops/perf/perf_element_operations.cpp @@ -0,0 +1,211 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "perf_precomp.hpp" +#include "opencv2/cann_interface.hpp" + +namespace opencv_test +{ +namespace +{ + +#define ARITHM_MAT_DEPTH Values(CV_32S, CV_32SC3) +#define TYPICAL_NPU_MAT_SIZES \ + Values(::perf::sz1080p, ::perf::sz2K, ::perf::sz2160p, ::perf::sz4320p) +#define DEF_PARAM_TEST(name, ...) \ + typedef ::perf::TestBaseWithParam> name + +DEF_PARAM_TEST(NPU, Size, int); +DEF_PARAM_TEST(CPU, Size, int); + +PERF_TEST_P(NPU, MAT_ADD_MAT, testing::Combine(TYPICAL_NPU_MAT_SIZES, ARITHM_MAT_DEPTH)) +{ + Mat mat1(GET_PARAM(0), GET_PARAM(1)); + Mat mat2(GET_PARAM(0), GET_PARAM(1)); + Mat dst; + declare.in(mat1, WARMUP_RNG); + declare.in(mat2, WARMUP_RNG); + cv::cann::setDevice(DEVICE_ID); + TEST_CYCLE() { cv::cann::add(mat1, mat2, dst, noArray(), -1); } + cv::cann::resetDevice(); + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(CPU, MAT_ADD_MAT, testing::Combine(TYPICAL_NPU_MAT_SIZES, ARITHM_MAT_DEPTH)) +{ + Mat mat1(GET_PARAM(0), GET_PARAM(1)); + Mat mat2(GET_PARAM(0), GET_PARAM(1)); + Mat dst; + declare.in(mat1, WARMUP_RNG); + declare.in(mat2, WARMUP_RNG); + TEST_CYCLE() { cv::add(mat1, mat2, dst, noArray(), -1); } + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(NPU, MAT_SUB_MAT, testing::Combine(TYPICAL_NPU_MAT_SIZES, ARITHM_MAT_DEPTH)) +{ + Mat mat1(GET_PARAM(0), GET_PARAM(1)); + Mat mat2(GET_PARAM(0), GET_PARAM(1)); + Mat dst; + declare.in(mat1, WARMUP_RNG); + declare.in(mat2, WARMUP_RNG); + cv::cann::setDevice(DEVICE_ID); + TEST_CYCLE() { cv::cann::subtract(mat1, mat2, dst, noArray(), -1); } + cv::cann::resetDevice(); + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(CPU, MAT_SUB_MAT, testing::Combine(TYPICAL_NPU_MAT_SIZES, ARITHM_MAT_DEPTH)) +{ + Mat mat1(GET_PARAM(0), GET_PARAM(1)); + Mat mat2(GET_PARAM(0), GET_PARAM(1)); + Mat dst; + declare.in(mat1, WARMUP_RNG); + declare.in(mat2, WARMUP_RNG); + TEST_CYCLE() { cv::subtract(mat1, mat2, dst, noArray(), -1); } + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(NPU, MAT_MUL_MAT, testing::Combine(TYPICAL_NPU_MAT_SIZES, ARITHM_MAT_DEPTH)) +{ + Mat mat1(GET_PARAM(0), GET_PARAM(1)); + Mat mat2(GET_PARAM(0), GET_PARAM(1)); + Mat dst; + declare.in(mat1, WARMUP_RNG); + declare.in(mat2, WARMUP_RNG); + cv::cann::setDevice(DEVICE_ID); + TEST_CYCLE() { cv::cann::multiply(mat1, mat2, dst, 1, -1); } + cv::cann::resetDevice(); + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(CPU, MAT_MUL_MAT, testing::Combine(TYPICAL_NPU_MAT_SIZES, ARITHM_MAT_DEPTH)) +{ + Mat mat1(GET_PARAM(0), GET_PARAM(1)); + Mat mat2(GET_PARAM(0), GET_PARAM(1)); + Mat dst; + declare.in(mat1, WARMUP_RNG); + declare.in(mat2, WARMUP_RNG); + TEST_CYCLE() { cv::multiply(mat1, mat2, dst, 1, -1); } + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(NPU, MAT_DIV_MAT, testing::Combine(TYPICAL_NPU_MAT_SIZES, ARITHM_MAT_DEPTH)) +{ + Mat mat1(GET_PARAM(0), GET_PARAM(1)); + Mat mat2(GET_PARAM(0), GET_PARAM(1)); + Mat dst; + declare.in(mat1, WARMUP_RNG); + declare.in(mat2, WARMUP_RNG); + cv::cann::setDevice(DEVICE_ID); + TEST_CYCLE() { cv::cann::divide(mat1, mat2, dst, 1, -1); } + cv::cann::resetDevice(); + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(CPU, MAT_DIV_MAT, testing::Combine(TYPICAL_NPU_MAT_SIZES, ARITHM_MAT_DEPTH)) +{ + Mat mat1(GET_PARAM(0), GET_PARAM(1)); + Mat mat2(GET_PARAM(0), GET_PARAM(1)); + Mat dst; + declare.in(mat1, WARMUP_RNG); + declare.in(mat2, WARMUP_RNG); + TEST_CYCLE() { cv::divide(mat1, mat2, dst, 1, -1); } + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(NPU, MAT_BITWISE_AND_MAT, testing::Combine(TYPICAL_NPU_MAT_SIZES, ARITHM_MAT_DEPTH)) +{ + Mat mat1(GET_PARAM(0), GET_PARAM(1)); + Mat mat2(GET_PARAM(0), GET_PARAM(1)); + Mat dst; + declare.in(mat1, WARMUP_RNG); + declare.in(mat2, WARMUP_RNG); + cv::cann::setDevice(DEVICE_ID); + TEST_CYCLE() { cv::cann::bitwise_and(mat1, mat2, dst, noArray()); } + cv::cann::resetDevice(); + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(CPU, MAT_BITWISE_AND_MAT, testing::Combine(TYPICAL_NPU_MAT_SIZES, ARITHM_MAT_DEPTH)) +{ + Mat mat1(GET_PARAM(0), GET_PARAM(1)); + Mat mat2(GET_PARAM(0), GET_PARAM(1)); + Mat dst; + declare.in(mat1, WARMUP_RNG); + declare.in(mat2, WARMUP_RNG); + TEST_CYCLE() { cv::bitwise_and(mat1, mat2, dst, noArray()); } + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(NPU, MAT_BITWISE_OR_MAT, testing::Combine(TYPICAL_NPU_MAT_SIZES, ARITHM_MAT_DEPTH)) +{ + Mat mat1(GET_PARAM(0), GET_PARAM(1)); + Mat mat2(GET_PARAM(0), GET_PARAM(1)); + Mat dst; + declare.in(mat1, WARMUP_RNG); + declare.in(mat2, WARMUP_RNG); + cv::cann::setDevice(DEVICE_ID); + TEST_CYCLE() { cv::cann::bitwise_or(mat1, mat2, dst, noArray()); } + cv::cann::resetDevice(); + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(CPU, MAT_BITWISE_OR_MAT, testing::Combine(TYPICAL_NPU_MAT_SIZES, ARITHM_MAT_DEPTH)) +{ + Mat mat1(GET_PARAM(0), GET_PARAM(1)); + Mat mat2(GET_PARAM(0), GET_PARAM(1)); + Mat dst; + declare.in(mat1, WARMUP_RNG); + declare.in(mat2, WARMUP_RNG); + TEST_CYCLE() { cv::bitwise_or(mat1, mat2, dst, noArray()); } + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(NPU, MAT_BITWISE_XOR_MAT, testing::Combine(TYPICAL_NPU_MAT_SIZES, ARITHM_MAT_DEPTH)) +{ + Mat mat1(GET_PARAM(0), GET_PARAM(1)); + Mat mat2(GET_PARAM(0), GET_PARAM(1)); + Mat dst; + declare.in(mat1, WARMUP_RNG); + declare.in(mat2, WARMUP_RNG); + cv::cann::setDevice(DEVICE_ID); + TEST_CYCLE() { cv::cann::bitwise_xor(mat1, mat2, dst, noArray()); } + cv::cann::resetDevice(); + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(CPU, MAT_BITWISE_XOR_MAT, testing::Combine(TYPICAL_NPU_MAT_SIZES, ARITHM_MAT_DEPTH)) +{ + Mat mat1(GET_PARAM(0), GET_PARAM(1)); + Mat mat2(GET_PARAM(0), GET_PARAM(1)); + Mat dst; + declare.in(mat1, WARMUP_RNG); + declare.in(mat2, WARMUP_RNG); + TEST_CYCLE() { cv::bitwise_xor(mat1, mat2, dst, noArray()); } + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(NPU, MAT_BITWISE_NOT_MAT, testing::Combine(TYPICAL_NPU_MAT_SIZES, ARITHM_MAT_DEPTH)) +{ + Mat mat(GET_PARAM(0), GET_PARAM(1)); + Mat dst; + declare.in(mat, WARMUP_RNG); + cv::cann::setDevice(DEVICE_ID); + TEST_CYCLE() { cv::cann::bitwise_not(mat, dst, noArray()); } + cv::cann::resetDevice(); + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(CPU, MAT_BITWISE_NOT_MAT, testing::Combine(TYPICAL_NPU_MAT_SIZES, ARITHM_MAT_DEPTH)) +{ + Mat mat(GET_PARAM(0), GET_PARAM(1)); + Mat dst; + declare.in(mat, WARMUP_RNG); + TEST_CYCLE() { cv::bitwise_not(mat, dst, noArray()); } + SANITY_CHECK_NOTHING(); +} + +} // namespace +} // namespace opencv_test diff --git a/modules/cannarithm/perf/perf_main.cpp b/modules/cannops/perf/perf_main.cpp similarity index 51% rename from modules/cannarithm/perf/perf_main.cpp rename to modules/cannops/perf/perf_main.cpp index 13cde8f491e..33503ac4158 100644 --- a/modules/cannarithm/perf/perf_main.cpp +++ b/modules/cannops/perf/perf_main.cpp @@ -3,27 +3,14 @@ // of this distribution and at http://opencv.org/license.html. #include "perf_precomp.hpp" -#include "opencv2/cann_arithm.hpp" +#include "opencv2/cann_interface.hpp" using namespace perf; class CannEnvironment : public ::testing::Environment { public: virtual ~CannEnvironment() = default; - virtual void SetUp() CV_OVERRIDE { - cv::cann::initAcl(); - - // for device warmup - Scalar s1(1,2,3), s2(4,5,6); - Mat src1(10, 10, CV_32SC3, s1), src2(10, 10, CV_32SC3, s2); - cv::cann::setDevice(0); - - cv::cann::AclMat npu_src1, npu_src2, dst; - npu_src1.upload(src1); - npu_src2.upload(src2); - cv::cann::add(npu_src1, npu_src2, dst); - cv::cann::resetDevice(); - } + virtual void SetUp() CV_OVERRIDE { cv::cann::initAcl(); } virtual void TearDown() CV_OVERRIDE { cv::cann::finalizeAcl(); } }; @@ -33,4 +20,4 @@ static void initTests() ::testing::AddGlobalTestEnvironment(cannEnv); } -CV_PERF_TEST_MAIN("cannarithm", initTests()) +CV_PERF_TEST_MAIN("cannops", initTests()) diff --git a/modules/cannarithm/perf/perf_precomp.hpp b/modules/cannops/perf/perf_precomp.hpp similarity index 89% rename from modules/cannarithm/perf/perf_precomp.hpp rename to modules/cannops/perf/perf_precomp.hpp index d0ff9533235..59e2fa03d7b 100644 --- a/modules/cannarithm/perf/perf_precomp.hpp +++ b/modules/cannops/perf/perf_precomp.hpp @@ -9,12 +9,11 @@ #include "opencv2/ts/ts_perf.hpp" #include "opencv2/cann.hpp" -namespace opencv_test -{ +#define DEVICE_ID 0 + using namespace perf; using namespace testing; using namespace cv; using namespace cv::cann; -} // namespace opencv_test #endif diff --git a/modules/cannarithm/samples/sample.cpp b/modules/cannops/samples/sample.cpp similarity index 55% rename from modules/cannarithm/samples/sample.cpp rename to modules/cannops/samples/sample.cpp index 772ca96f54f..ddf9d45adac 100644 --- a/modules/cannarithm/samples/sample.cpp +++ b/modules/cannops/samples/sample.cpp @@ -2,12 +2,13 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. -//g++ -o sample sample.cpp -I opencv/include/opencv4/ -L opencv/build/install/lib/ -l opencv_cannarithm -l opencv_core -l opencv_imgcodecs +// g++ -o sample sample.cpp -I opencv/include/opencv4/ -L opencv/build/install/lib/ -l +// opencv_cannops -l opencv_core -l opencv_imgcodecs #include #include #include -#include +#include int main() { @@ -16,14 +17,14 @@ int main() cv::cann::initAcl(); cv::cann::setDevice(0); - cv::cann::AclMat aclMat = cv::cann::AclMat(); - aclMat.upload(img); + cv::cann::NpuMat npuMat; + npuMat.upload(img); - cv::cann::AclMat aclMatSum; - cv::cann::add(aclMat, aclMat, aclMatSum); + cv::cann::NpuMat npuMatSum; + cv::cann::add(npuMat, npuMat, npuMatSum); cv::Mat imgResult; - aclMatSum.download(imgResult); - std::cout< +#include +#include "precomp.hpp" +#include "opencv2/core/private.hpp" +namespace cv +{ +namespace cann +{ +/*******************************Acl Error Checker*****************************/ +static inline void checkAclError(aclError err, const char* file, const int line, const char* func) +{ + if (ACL_SUCCESS != err) + { + const char* errMsg = aclGetRecentErrMsg(); + cv::error(cv::Error::AscendApiCallError, errMsg == nullptr ? "" : errMsg, func, file, line); + } +} + +static inline void checkAclPtr(void* ptr, const char* file, const int line, const char* func) +{ + if (nullptr == ptr) + { + const char* errMsg = aclGetRecentErrMsg(); + cv::error(cv::Error::AscendApiCallError, errMsg == nullptr ? "" : errMsg, func, file, line); + } +} + +#define CV_ACL_SAFE_CALL(expr) checkAclError((expr), __FILE__, __LINE__, CV_Func) +#define CV_ACL_SAFE_CALL_PTR(expr) \ + ({ \ + auto ptr = (expr); \ + checkAclPtr(ptr, __FILE__, __LINE__, CV_Func); \ + ptr; \ + }) + +/*****************************Acl Operator Attribute**************************/ +#define DEFINE_ATTR_BODY(FUNC) \ + void Acl##FUNC##Attribute::addAttr(aclopAttr* opAttr) \ + { \ + CV_ACL_SAFE_CALL(aclopSetAttr##FUNC(opAttr, name, value)); \ + } + +#define DEFINE_ATTR_LIST_BODY(FUNC) \ + void AclList##FUNC##Attribute::addAttr(aclopAttr* opAttr) \ + { \ + CV_ACL_SAFE_CALL(aclopSetAttrList##FUNC(opAttr, name, num, value)); \ + } + +DEFINE_ATTR_BODY(Float); +DEFINE_ATTR_BODY(String); +DEFINE_ATTR_BODY(Int); +DEFINE_ATTR_BODY(Bool); +DEFINE_ATTR_LIST_BODY(Int); + +#undef DEFINE_ATTR_BODY +#undef DEFINE_ATTR_LIST_BODY + +/******************************Acl Runtime Warpper****************************/ +void aclrtMallocWarpper(void** data, size_t size) +{ + CV_ACL_SAFE_CALL(aclrtMalloc(data, size, ACL_MEM_MALLOC_HUGE_FIRST)); +} + +void aclrtFreeWarpper(void* data) { CV_ACL_SAFE_CALL(aclrtFree(data)); } +// TODO should define dstMax? +void aclrtMemcpyWarpper(std::shared_ptr& dst, size_t offset, const void* src, size_t size, + AscendStream& stream) +{ + aclrtStream rawStream = AscendStreamAccessor::getStream(stream); + if (rawStream == nullptr) + CV_ACL_SAFE_CALL( + aclrtMemcpy(dst.get() + offset, size, src, size, ACL_MEMCPY_HOST_TO_DEVICE)); + else + { + CV_ACL_SAFE_CALL(aclrtMemcpyAsync(dst.get() + offset, size, src, size, + ACL_MEMCPY_HOST_TO_DEVICE, rawStream)); + if (offset == 0) + stream.addTensorHolder(dst); + } +} + +void aclrtMemcpyWarpper(void* dst, const std::shared_ptr& src, size_t offset, size_t size, + AscendStream& stream) +{ + aclrtStream rawStream = AscendStreamAccessor::getStream(stream); + if (rawStream == nullptr) + CV_ACL_SAFE_CALL( + aclrtMemcpy(dst, size, src.get() + offset, size, ACL_MEMCPY_DEVICE_TO_HOST)); + else + { + CV_ACL_SAFE_CALL(aclrtMemcpyAsync(dst, size, src.get() + offset, size, + ACL_MEMCPY_DEVICE_TO_HOST, rawStream)); + if (offset == 0) + stream.addTensorHolder(src); + } +} + +void aclrtMemcpyWarpper(std::shared_ptr& dst, size_t dstOffset, + const std::shared_ptr& src, size_t srcOffset, size_t size, + AscendStream& stream) +{ + aclrtStream rawStream = AscendStreamAccessor::getStream(stream); + if (rawStream == nullptr) + CV_ACL_SAFE_CALL(aclrtMemcpy(dst.get() + dstOffset, size, src.get() + srcOffset, size, + ACL_MEMCPY_DEVICE_TO_DEVICE)); + else + { + CV_ACL_SAFE_CALL(aclrtMemcpyAsync(dst.get() + dstOffset, size, src.get() + srcOffset, size, + ACL_MEMCPY_DEVICE_TO_DEVICE, rawStream)); + if (srcOffset == 0) + stream.addTensorHolder(src); + if (dstOffset == 0) + stream.addTensorHolder(dst); + } +} + +void aclrtMemcpy2dWarpper(std::shared_ptr& dst, size_t offset, size_t dpitch, + const void* src, size_t spitch, size_t width, size_t length, + AscendStream& stream) +{ + aclrtStream rawStream = AscendStreamAccessor::getStream(stream); + if (rawStream == nullptr) + CV_ACL_SAFE_CALL(aclrtMemcpy2d(dst.get() + offset, dpitch, src, spitch, width, length, + ACL_MEMCPY_HOST_TO_DEVICE)); + else + { + CV_ACL_SAFE_CALL(aclrtMemcpy2dAsync(dst.get() + offset, dpitch, src, spitch, width, length, + ACL_MEMCPY_HOST_TO_DEVICE, rawStream)); + stream.addTensorHolder(dst); + } +} + +void aclrtMemcpy2dWarpper(void* dst, size_t dpitch, const std::shared_ptr& src, + size_t offset, size_t spitch, size_t width, size_t length, + AscendStream& stream) +{ + aclrtStream rawStream = AscendStreamAccessor::getStream(stream); + if (rawStream == nullptr) + CV_ACL_SAFE_CALL(aclrtMemcpy2d(dst, dpitch, src.get() + offset, spitch, width, length, + ACL_MEMCPY_DEVICE_TO_HOST)); + else + { + CV_ACL_SAFE_CALL(aclrtMemcpy2dAsync(dst, dpitch, src.get() + offset, spitch, width, length, + ACL_MEMCPY_DEVICE_TO_HOST, rawStream)); + stream.addTensorHolder(src); + } +} + +void aclrtMemsetWarpper(std::shared_ptr& ptr, int32_t value, size_t count, + AscendStream& stream) +{ + aclrtStream rawStream = AscendStreamAccessor::getStream(stream); + if (rawStream == nullptr) + CV_ACL_SAFE_CALL(aclrtMemset(ptr.get(), count, value, count)); + else + { + CV_ACL_SAFE_CALL(aclrtMemsetAsync(ptr.get(), count, value, count, rawStream)); + stream.addTensorHolder(ptr); + } +} + +/**************************Acl attribute preparation**************************/ +struct CannPreparation +{ + CannPreparation() { opAttr_ = CV_ACL_SAFE_CALL_PTR(aclopCreateAttr()); } + + virtual ~CannPreparation() + { + for (auto desc : inputDesc_) + { + aclDestroyTensorDesc(desc); + } + for (auto desc : outputDesc_) + { + aclDestroyTensorDesc(desc); + } + for (auto buf : inputBuffers_) + { + aclDestroyDataBuffer(buf); + } + for (auto buf : outputBuffers_) + { + aclDestroyDataBuffer(buf); + } + aclopDestroyAttr(opAttr_); + } + + std::vector inputBuffers_; + std::vector outputBuffers_; + std::vector inputDesc_; + std::vector outputDesc_; + aclopAttr* opAttr_; +}; + +#define CANN_PREPARE_INPUTDESC(var, name, ...) \ + do \ + { \ + auto _rPtr = CV_ACL_SAFE_CALL_PTR(aclCreateTensorDesc(__VA_ARGS__)); \ + if (_rPtr != nullptr) \ + { \ + if (name != nullptr and strlen(name) != 0) \ + aclSetTensorDescName(_rPtr, name); \ + var.inputDesc_.push_back(_rPtr); \ + } \ + } while (0) + +#define CANN_PREPARE_OUTPUTDESC(var, ...) \ + do \ + { \ + auto _rPtr = CV_ACL_SAFE_CALL_PTR(aclCreateTensorDesc(__VA_ARGS__)); \ + if (_rPtr != nullptr) \ + var.outputDesc_.push_back(_rPtr); \ + } while (0) + +#define CANN_PREPARE_INPUTBUFFER(var, ...) \ + do \ + { \ + auto _rPtr = CV_ACL_SAFE_CALL_PTR(aclCreateDataBuffer(__VA_ARGS__)); \ + if (_rPtr != nullptr) \ + var.inputBuffers_.push_back(_rPtr); \ + } while (0) + +#define CANN_PREPARE_OUTPUTBUFFER(var, ...) \ + do \ + { \ + auto _rPtr = CV_ACL_SAFE_CALL_PTR(aclCreateDataBuffer(__VA_ARGS__)); \ + if (_rPtr != nullptr) \ + var.outputBuffers_.push_back(_rPtr); \ + } while (0) + +/********************************Ascend Tensor********************************/ +static inline aclDataType getACLType(int opencvdepth) +{ + switch (opencvdepth) + { + case CV_8S: + return ACL_INT8; + case CV_16S: + return ACL_INT16; + case CV_8U: + return ACL_UINT8; + case CV_16U: + return ACL_UINT16; + case CV_32S: + return ACL_INT32; + case CV_32F: + return ACL_FLOAT; + case CV_64F: + return ACL_DOUBLE; + case CV_16F: + return ACL_FLOAT16; + default: + return ACL_DT_UNDEFINED; + } +} + +AscendTensor::AscendTensor(std::shared_ptr _data, size_t _dataSize, int64_t* _dims, + size_t _dimSize, aclDataType _dtype, std::string _name, + aclFormat _format) + : name(_name), data(_data), dataSize(_dataSize), dtype(_dtype), format(_format) +{ + dims.assign(_dims, _dims + _dimSize); +} + +AscendTensor::AscendTensor(const NpuMat& npuMat, std::string _name, aclFormat _format) + : name(_name), format(_format) +{ + data = npuMat.data; + // Ascend can't process with gaps in matrix. + CV_Assert(npuMat.isContinuous()); + dataSize = npuMat.rows * npuMat.cols * npuMat.elemSize(); + + switch (_format) + { + case ACL_FORMAT_NHWC: + case ACL_FORMAT_ND: + dims.resize(4); + // Batch, default = 1. + dims[0] = 1; + // Default OpenCV image format = NHWC. + dims[1] = npuMat.rows; + dims[2] = npuMat.cols; + dims[3] = npuMat.channels(); + break; + case ACL_FORMAT_NCHW: + dims.resize(4); + dims[0] = 1; + dims[1] = npuMat.channels(); + dims[2] = npuMat.rows; + dims[3] = npuMat.cols; + break; + default: + CV_Error(Error::StsBadArg, "Unknown/unsupported matrix format"); + } + + dtype = getACLType(npuMat.depth()); +} + +/**********************************Device*************************************/ +void setDevice(int device_id) +{ + aclrtContext context; + CV_ACL_SAFE_CALL(aclrtSetDevice(device_id)); + CV_ACL_SAFE_CALL(aclrtCreateContext(&context, device_id)); +} + +void resetDevice() { CV_ACL_SAFE_CALL(aclrtResetDevice(getDevice())); } + +int32_t getDevice() +{ + int32_t deviceId; + CV_ACL_SAFE_CALL(aclrtGetDevice(&deviceId)); + return deviceId; +} + +void initAcl() { CV_ACL_SAFE_CALL(aclInit(nullptr)); } + +void finalizeAcl() { CV_ACL_SAFE_CALL(aclFinalize()); } + +class DefaultDeviceInitializer +{ +public: + DefaultDeviceInitializer(); + ~DefaultDeviceInitializer(); + + AscendStream& getNullAscendStream(int deviceId); + +private: + std::vector> streams_; + Mutex streams_mtx_; +}; + +DefaultDeviceInitializer::DefaultDeviceInitializer() {} + +DefaultDeviceInitializer::~DefaultDeviceInitializer() { streams_.clear(); } + +AscendStream& DefaultDeviceInitializer::getNullAscendStream(int deviceId) +{ + AutoLock lock(streams_mtx_); + + if (streams_.empty()) + { + uint32_t deviceCount; + CV_ACL_SAFE_CALL(aclrtGetDeviceCount(&deviceCount)); + + if (deviceCount > 0) + streams_.resize(deviceCount); + } + + CV_DbgAssert(deviceId >= 0 && deviceId < static_cast(streams_.size())); + + if (streams_[deviceId].empty()) + { + aclrtStream stream = nullptr; + Ptr impl = makePtr(stream); + streams_[deviceId] = Ptr(new AscendStream(impl)); + } + + return *streams_[deviceId]; +} + +DefaultDeviceInitializer initializer; + +/***********************************Event*************************************/ +AscendEvent::Impl::Impl() : event(nullptr), ownEvent(true) +{ + CV_ACL_SAFE_CALL(aclrtCreateEvent(&event)); +} + +AscendEvent::Impl::Impl(aclrtEvent e) : event(e), ownEvent(false) {} + +AscendEvent::Impl::~Impl() +{ + if (event && ownEvent) + { + CV_ACL_SAFE_CALL(aclrtDestroyEvent(event)); + } +} + +aclrtEvent AscendEventAccessor::getEvent(const AscendEvent& event) { return event.impl_->event; } + +AscendEvent AscendEventAccessor::wrapEvent(aclrtEvent event) +{ + return AscendEvent(makePtr(event)); +} + +AscendEvent::AscendEvent() { impl_ = makePtr(); } + +void AscendEvent::record(AscendStream& stream) +{ + CV_ACL_SAFE_CALL(aclrtRecordEvent(impl_->event, AscendStreamAccessor::getStream(stream))); +} + +void AscendEvent::waitForComplete() const { CV_ACL_SAFE_CALL(aclrtSynchronizeEvent(impl_->event)); } + +/************************************Stream***********************************/ +void AscendStream::Impl::AddTensorHolder(const std::shared_ptr& tensorData) +{ + tensorHolders.insert(tensorData); +} + +AscendStream::Impl::Impl() : stream(nullptr), ownStream(true) +{ + CV_ACL_SAFE_CALL(aclrtCreateStream(&stream)); +} + +AscendStream::Impl::Impl(aclrtStream s) : stream(s), ownStream(false) {} + +aclrtStream AscendStreamAccessor::getStream(const AscendStream& stream) +{ + return stream.impl_->stream; +} + +AscendStream AscendStreamAccessor::wrapStream(aclrtStream stream) +{ + return AscendStream(makePtr(stream)); +} + +AscendStream wrapStream(size_t AscendStreamAddress) +{ + return AscendStreamAccessor::wrapStream(reinterpret_cast(AscendStreamAddress)); +} + +AscendStream::AscendStream() { impl_ = makePtr(); } + +void AscendStream::waitForCompletion() +{ + CV_ACL_SAFE_CALL(aclrtSynchronizeStream(impl_->stream)); + impl_->tensorHolders.clear(); +} + +void AscendStream::waitAscendEvent(const AscendEvent& event) +{ + CV_ACL_SAFE_CALL(aclrtStreamWaitEvent(impl_->stream, AscendEventAccessor::getEvent(event))); +} + +AscendStream& AscendStream::Null() +{ + const uint32_t deviceId = getDevice(); + return initializer.getNullAscendStream(deviceId); +} + +void AscendStream::addTensorHolder(const std::shared_ptr& holder) +{ + impl_->AddTensorHolder(holder); +} + +/********************************Operator caller******************************/ +std::shared_ptr mallocAndUpload(void* data, size_t size, AscendStream& stream, + NpuMat::Allocator* allocator) +{ + std::shared_ptr ptr = allocator->allocate(size); + aclrtStream rawStream = AscendStreamAccessor::getStream(stream); + + if (rawStream == nullptr) + CV_ACL_SAFE_CALL(aclrtMemcpy(ptr.get(), size, data, size, ACL_MEMCPY_HOST_TO_DEVICE)); + else + CV_ACL_SAFE_CALL( + aclrtMemcpyAsync(ptr.get(), size, data, size, ACL_MEMCPY_HOST_TO_DEVICE, rawStream)); + return ptr; +} + +void callAscendOperator(const char* op, std::vector& srcs, + std::vector& dsts, AscendStream& stream, + std::vector& attrs) +{ + CannPreparation prepare; + for (AclAttribute* attr : attrs) + { + attr->addAttr(prepare.opAttr_); + } + + for (const AscendTensor& src : srcs) + { + CANN_PREPARE_INPUTDESC(prepare, src.name.c_str(), src.dtype, src.dims.size(), + &src.dims.at(0), src.format); + CANN_PREPARE_INPUTBUFFER(prepare, src.data.get(), src.dataSize); + } + + for (const AscendTensor& dst : dsts) + { + CANN_PREPARE_OUTPUTDESC(prepare, dst.dtype, dst.dims.size(), &dst.dims.at(0), dst.format); + CANN_PREPARE_OUTPUTBUFFER(prepare, dst.data.get(), dst.dataSize); + } + + aclrtStream rawStream = AscendStreamAccessor::getStream(stream); + + CV_ACL_SAFE_CALL(aclopCompileAndExecute( + op, prepare.inputDesc_.size(), prepare.inputDesc_.data(), prepare.inputBuffers_.data(), + prepare.outputDesc_.size(), prepare.outputDesc_.data(), prepare.outputBuffers_.data(), + prepare.opAttr_, ACL_ENGINE_SYS, ACL_COMPILE_SYS, NULL, rawStream)); + if (rawStream == nullptr) + CV_ACL_SAFE_CALL(aclrtSynchronizeStream(rawStream)); + else + { + for (const AscendTensor& src : srcs) + { + stream.addTensorHolder(src.data); + } + for (const AscendTensor& dst : dsts) + { + stream.addTensorHolder(dst.data); + } + } +} + +void callAscendOperator(const NpuMat& src, NpuMat& dst, const char* op, AscendStream& stream, + std::vector& attrs) +{ + std::vector srcTensors, dstTensors; + srcTensors.emplace_back(src); + dstTensors.emplace_back(dst); + callAscendOperator(op, srcTensors, dstTensors, stream, attrs); +} + +void callAscendOperator(const NpuMat& src1, const NpuMat& src2, NpuMat& dst, const char* op, + AscendStream& stream, std::vector& attrs) +{ + std::vector srcTensors, dstTensors; + srcTensors.emplace_back(src1); + srcTensors.emplace_back(src2); + dstTensors.emplace_back(dst); + callAscendOperator(op, srcTensors, dstTensors, stream, attrs); +} + +void callAscendOperator(const NpuMat* srcs, const size_t srcCount, NpuMat& dst, const char* op, + AscendStream& stream, std::vector& attrs) +{ + std::vector srcTensors, dstTensors; + for (size_t i = 0; i < srcCount; i++) + { + srcTensors.emplace_back(srcs[i]); + } + dstTensors.emplace_back(dst); + callAscendOperator(op, srcTensors, dstTensors, stream, attrs); +} + +void callAscendOperator(const NpuMat& src, const Scalar& sc, bool inv, NpuMat& dst, const char* op, + AscendStream& stream, std::vector& attrs) +{ + uchar rawData[32]; + cv::scalarToRawData(sc, rawData, src.type(), 0); + std::shared_ptr scPtr = mallocAndUpload(rawData, src.elemSize(), stream); + + int64_t dims[] = {1, 1, 1, src.channels()}; + AscendTensor scalarTensor(scPtr, src.elemSize(), dims, sizeof(dims) / sizeof(dims[0]), + getACLType(src.depth())); + + std::vector srcTensors, dstTensors; + + srcTensors.emplace_back(src); + srcTensors.push_back(scalarTensor); + + if (inv) + std::swap(srcTensors[0], srcTensors[1]); + + dstTensors.emplace_back(dst); + callAscendOperator(op, srcTensors, dstTensors, stream, attrs); +} + +void callAscendOperator(const NpuMat& src, NpuMat* dsts, const size_t dstCount, const char* op, + AscendStream& stream, std::vector& attrs) +{ + std::vector srcTensors, dstTensors; + srcTensors.emplace_back(src); + for (size_t i = 0; i < dstCount; i++) + { + dstTensors.emplace_back(dsts[i]); + } + callAscendOperator(op, srcTensors, dstTensors, stream, attrs); +} + +} // namespace cann +} // namespace cv diff --git a/modules/cannops/src/color.cpp b/modules/cannops/src/color.cpp new file mode 100644 index 00000000000..0c6f9df74a5 --- /dev/null +++ b/modules/cannops/src/color.cpp @@ -0,0 +1,722 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +namespace cv +{ +namespace cann +{ +static void matAlphaSet(NpuMat& mat, int dtype, AscendStream& stream) +{ + if (dtype < 0) + dtype = mat.depth(); + + if (mat.depth() == CV_8U || mat.depth() == CV_16U) + { + size_t size = mat.rows * mat.step; + aclrtMemsetWarpper(mat.data, 255, size, stream); + } + else + { + if (dtype == CV_32F) + mat.setTo(1.0f, stream); + else + { + mat.setTo((dtype == CV_8U ? (1 << 8) : (1 << 16)) - 1, stream); + } + } +} + +inline void checkImg(const NpuMat& mat) +{ + int depth = mat.depth(); + CV_Assert(!mat.empty()); + CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32F); +} + +inline void cvtBGRtoBGR(InputArray& _src, OutputArray& _dst, int dcn, bool swapBlue, + AscendStream& stream) +{ + NpuMat src = getInputMat(_src, stream); + checkImg(src); + CV_Assert(src.channels() == 3 || src.channels() == 4); + + NpuMat matChannels[4]; + split(src, matChannels, stream); + + if (swapBlue) + { + std::swap(matChannels[0], matChannels[2]); + } + + if (dcn == 4 && src.channels() != 4) + { + NpuMat& alpha = matChannels[3]; + alpha.create(src.rows, src.cols, CV_MAKE_TYPE(src.depth(), 1)); + matAlphaSet(alpha, -1, stream); + } + + merge(matChannels, dcn, _dst, stream); +} + +// TODO duplicated code +static const float B2YF = 0.114f; +static const float G2YF = 0.587f; +static const float R2YF = 0.299f; + +inline void cvtBGRtoGray(InputArray& _src, OutputArray& _dst, int, bool swapBlue, + AscendStream& stream) +{ + NpuMat src = getInputMat(_src, stream); + checkImg(src); + CV_Assert(src.channels() == 3 || src.channels() == 4); + + float coeffs[] = {B2YF, G2YF, R2YF}; + + NpuMat formatMat; + if (src.depth() != CV_32F) + { + src.convertTo(formatMat, CV_32F); + } + else + { + formatMat = src; + } + + // For RGB + if (swapBlue) + { + std::swap(coeffs[0], coeffs[2]); + } + + Scalar sc = {coeffs[0], coeffs[1], coeffs[2], 0}; + NpuMat grayRet; + multiply(formatMat, sc, grayRet, 1, -1, stream); + + NpuMat matChannels[4]; + split(grayRet, matChannels, stream); + + NpuMat dst = getOutputMat(_dst, src.rows, src.cols, CV_MAKE_TYPE(src.depth(), 1), stream); + + AclIntAttribute matSize("N", 3); + std::vector attrs{&matSize}; + + if (src.depth() != CV_32F) + { + formatMat.create(grayRet.rows, grayRet.cols, CV_MAKE_TYPE(grayRet.depth(), 1)); + callAscendOperator(matChannels, 3, formatMat, "AddN", stream, attrs); + + // do not use convertTo here, dst.data will overwrited. + callAscendOperator(formatMat, dst, "Cast", stream); + } + else + callAscendOperator(matChannels, 3, dst, "AddN", stream, attrs); + syncOutput(dst, _dst, stream); +} + +inline void cvtGraytoBGR(InputArray& _src, OutputArray& _dst, int dcn, bool, AscendStream& stream) +{ + NpuMat src = getInputMat(_src, stream); + checkImg(src); + CV_Assert(src.channels() == 1); + + NpuMat matChannels[4]; + for (int i = 0; i < 3; i++) + { + matChannels[i] = src; + } + + if (dcn == 4) + { + NpuMat& alpha = matChannels[3]; + alpha.create(src.rows, src.cols, CV_MAKE_TYPE(src.depth(), 1)); + matAlphaSet(alpha, -1, stream); + } + + merge(matChannels, dcn, _dst, stream); +} + +static const float RGB2XYZ_D65[] = {0.412453, 0.357580, 0.180423, 0.212671, 0.715160, + 0.072169, 0.019334, 0.119193, 0.950227}; + +static const float XYZ2RGB_D65[] = {3.240479, -1.53715, -0.498535, -0.969256, 1.875991, + 0.041556, 0.055648, -0.204043, 1.057311}; + +inline void matMulRGB(InputArray& _src, OutputArray& _dst, float* matrix, AscendStream& stream) +{ + NpuMat src = getInputMat(_src, stream); + checkImg(src); + CV_Assert(src.channels() == 3); + + NpuMat formatMat; + if (src.depth() != CV_32F) + { + src.convertTo(formatMat, CV_32F); + } + else + { + formatMat = src; + } + + // TODO async!!! + Mat transMat(1, 3, CV_32FC3, matrix); + NpuMat transNpuMat; + transNpuMat.upload(transMat, stream); + + AclBoolAttribute transposeX1("adj_x1", false); + AclBoolAttribute transposeX2("adj_x2", true); + std::vector matMulAttr{&transposeX1, &transposeX2}; + + NpuMat dst = getOutputMat(_dst, src.rows, src.cols, src.type(), stream); + + if (src.depth() != CV_32F) + { + NpuMat formatRet(formatMat.size(), formatMat.type()), + thresholdRet(formatMat.size(), formatMat.type()); + callAscendOperator(formatMat, transNpuMat, formatRet, "BatchMatMulV2", stream, matMulAttr); + uint16_t thresh = (src.depth() == CV_8U ? (1 << 8) : (1 << 16)) - 1; + threshold(formatRet, thresholdRet, thresh, 0, 2 /*THRESH_TRUNC*/, stream); + threshold(thresholdRet, formatRet, 0, 0, 3 /*THRESH_TOZERO*/, stream); + callAscendOperator(formatRet, dst, "Cast", stream); + } + else + callAscendOperator(formatMat, transNpuMat, dst, "BatchMatMulV2", stream, matMulAttr); + + syncOutput(dst, _dst, stream); +} + +// TODO should deal with overflow. set 255 instead of cut off. +inline void cvtBGRtoXYZ(InputArray& src, OutputArray& dst, int, bool swapBlue, AscendStream& stream) +{ + float coeffs[9]; + memcpy(coeffs, RGB2XYZ_D65, 9 * sizeof(float)); + if (!swapBlue) + { + std::swap(coeffs[0], coeffs[2]); + std::swap(coeffs[3], coeffs[5]); + std::swap(coeffs[6], coeffs[8]); + } + matMulRGB(src, dst, coeffs, stream); +} + +inline void cvtXYZtoBGR(InputArray& src, OutputArray& dst, int dcn, bool swapBlue, + AscendStream& stream) +{ + float coeffs[9]; + memcpy(coeffs, XYZ2RGB_D65, 9 * sizeof(float)); + if (!swapBlue) + { + std::swap(coeffs[0], coeffs[6]); + std::swap(coeffs[1], coeffs[7]); + std::swap(coeffs[2], coeffs[8]); + } + + if (dcn == 4) + { + NpuMat RGB[4], tempMat1; + matMulRGB(src, tempMat1, coeffs, stream); + + split(tempMat1, RGB, stream); + RGB[3].create(RGB[0].rows, RGB[1].cols, RGB[0].type()); + matAlphaSet(RGB[3], -1, stream); + merge(RGB, 4, dst, stream); + } + else + matMulRGB(src, dst, coeffs, stream); +} + +// TODO duplicated code +static const float YCRF = 0.713f; +static const float YCBF = 0.564f; +static const float R2VF = 0.877f; +static const float B2UF = 0.492f; +inline void cvtBGRtoYCrCb(InputArray& _src, OutputArray& _dst, float* coeffs, bool swapBlue, + bool yuvOrder, AscendStream& stream) +{ + NpuMat src = getInputMat(_src, stream); + checkImg(src); + CV_Assert(src.channels() == 3); + + int buleIdx = swapBlue ? 2 : 0; + int depth = src.depth(); + float delta = (depth == CV_8U) ? 128 : ((depth == CV_16U) ? 32768 : 0.5); + + NpuMat formatMat; + if (src.depth() != CV_32F) + { + src.convertTo(formatMat, CV_32F); + } + else + { + formatMat = src; + } + + NpuMat YCrCb[3], RGB[3]; + split(formatMat, RGB, stream); + cvtBGRtoGray(formatMat, YCrCb[0], 1, swapBlue, stream); + YCrCb[1].create(YCrCb[0].rows, YCrCb[0].cols, YCrCb[0].type()); + YCrCb[2].create(YCrCb[0].rows, YCrCb[0].cols, YCrCb[0].type()); + + NpuMat tempMat1(formatMat.size(), CV_MAKE_TYPE(formatMat.depth(), 1)), + tempMat2(formatMat.size(), CV_MAKE_TYPE(formatMat.depth(), 1)); + + callAscendOperator(RGB[buleIdx ^ 2], YCrCb[0], tempMat1, "Sub", stream); + muls(tempMat1, coeffs[0], tempMat2, stream); + adds(tempMat2, delta, YCrCb[1], stream); + + callAscendOperator(RGB[buleIdx], YCrCb[0], tempMat1, "Sub", stream); + muls(tempMat1, coeffs[1], tempMat2, stream); + adds(tempMat2, delta, YCrCb[2], stream); + + if (yuvOrder) + { + std::swap(YCrCb[1], YCrCb[2]); + } + + if (src.depth() != CV_32F) + { + NpuMat formatRet(formatMat.size(), formatMat.type()), + thresholdRet(formatMat.size(), formatMat.type()); + merge(YCrCb, 3, formatRet, stream); + uint16_t thresh = (src.depth() == CV_8U ? (1 << 8) : (1 << 16)) - 1; + threshold(formatRet, thresholdRet, thresh, 0, 2 /*THRESH_TRUNC*/, stream); + threshold(thresholdRet, formatRet, 0, 0, 3 /*THRESH_TOZERO*/, stream); + NpuMat dst = getOutputMat(_dst, src.rows, src.cols, src.type(), stream); + callAscendOperator(formatRet, dst, "Cast", stream); + syncOutput(dst, _dst, stream); + } + else + merge(YCrCb, 3, _dst, stream); +} + +static const float CR2RF = 1.403f; +static const float CR2GF = -0.714f; +static const float CB2GF = -0.344f; +static const float CB2BF = 1.773f; + +static const float V2RF = 1.140f; +static const float V2GF = -0.581f; +static const float U2GF = -0.395f; +static const float U2BF = 2.032f; + +inline void cvtYCrCbtoBGR(InputArray& _src, OutputArray& _dst, int dcn, float* coeffs, + bool swapBlue, bool yuvOrder, AscendStream& stream) +{ + NpuMat src = getInputMat(_src, stream); + checkImg(src); + CV_Assert(src.channels() == 3); + + int buleIdx = swapBlue ? 2 : 0; + int depth = src.depth(); + float delta = (depth == CV_8U) ? 128 : ((depth == CV_16U) ? 32768 : 0.5); + + NpuMat formatMat; + if (src.depth() != CV_32F) + { + src.convertTo(formatMat, CV_32F); + } + else + { + formatMat = src; + } + + NpuMat YCrCb[3], RGB[4]; + split(formatMat, YCrCb, stream); + if (yuvOrder) + { + std::swap(YCrCb[1], YCrCb[2]); + } + RGB[0].create(formatMat.rows, formatMat.cols, CV_MAKE_TYPE(formatMat.depth(), 1)); + RGB[1].create(formatMat.rows, formatMat.cols, CV_MAKE_TYPE(formatMat.depth(), 1)); + RGB[2].create(formatMat.rows, formatMat.cols, CV_MAKE_TYPE(formatMat.depth(), 1)); + NpuMat tempMat1(formatMat.size(), CV_MAKE_TYPE(formatMat.depth(), 1)), + tempMat2(formatMat.size(), CV_MAKE_TYPE(formatMat.depth(), 1)), + CbSubDelta(formatMat.size(), CV_MAKE_TYPE(formatMat.depth(), 1)), + CrSubDelta(formatMat.size(), CV_MAKE_TYPE(formatMat.depth(), 1)); + + adds(YCrCb[1], (0.0f - delta), CrSubDelta, stream); + adds(YCrCb[2], (0.0f - delta), CbSubDelta, stream); + + muls(CrSubDelta, coeffs[0], tempMat1, stream); + callAscendOperator(YCrCb[0], tempMat1, RGB[buleIdx ^ 2], "Add", stream); + + muls(CrSubDelta, coeffs[1], tempMat1, stream); + callAscendOperator(YCrCb[0], tempMat1, tempMat2, "Add", stream); + muls(CbSubDelta, coeffs[2], tempMat1, stream); + callAscendOperator(tempMat2, tempMat1, RGB[1], "Add", stream); + + muls(CbSubDelta, coeffs[3], tempMat1, stream); + callAscendOperator(YCrCb[0], tempMat1, RGB[buleIdx], "Add", stream); + + if (dcn == 4) + { + RGB[3].create(RGB[0].rows, RGB[0].cols, RGB[0].type()); + matAlphaSet(RGB[3], src.depth(), stream); + } + + if (src.depth() != CV_32F) + { + NpuMat formatRet(formatMat.size(), CV_MAKE_TYPE(formatMat.depth(), dcn)), + thresholdRet(formatMat.size(), CV_MAKE_TYPE(formatMat.depth(), dcn)); + merge(RGB, dcn, formatRet, stream); + uint16_t thresh = (src.depth() == CV_8U ? (1 << 8) : (1 << 16)) - 1; + threshold(formatRet, thresholdRet, thresh, 0, 2 /*THRESH_TRUNC*/, stream); + threshold(thresholdRet, formatRet, 0, 0, 3 /*THRESH_TOZERO*/, stream); + NpuMat dst = getOutputMat(_dst, src.rows, src.cols, CV_MAKE_TYPE(src.depth(), dcn), stream); + callAscendOperator(formatRet, dst, "Cast", stream); + syncOutput(dst, _dst, stream); + } + else + merge(RGB, dcn, _dst, stream); +} + +inline void BGR2BGRA(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + cvtBGRtoBGR(src, dst, 4, false, stream); +} + +inline void BGRA2BGR(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + cvtBGRtoBGR(src, dst, 3, false, stream); +} + +inline void BGR2RGBA(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + cvtBGRtoBGR(src, dst, 4, true, stream); +} + +inline void RGBA2BGR(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + cvtBGRtoBGR(src, dst, 3, true, stream); +} + +inline void BGR2RGB(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + cvtBGRtoBGR(src, dst, 3, true, stream); +} + +inline void BGRA2RGBA(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + cvtBGRtoBGR(src, dst, 4, true, stream); +} + +inline void BGR2GRAY(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + cvtBGRtoGray(src, dst, 1, false, stream); +} + +inline void RGB2GRAY(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + cvtBGRtoGray(src, dst, 1, true, stream); +} + +inline void GRAY2BGR(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + cvtGraytoBGR(src, dst, 3, false, stream); +} + +inline void GRAY2BGRA(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + cvtGraytoBGR(src, dst, 4, false, stream); +} + +inline void BGRA2GRAY(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + cvtBGRtoGray(src, dst, 1, false, stream); +} + +inline void RGBA2GRAY(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + cvtBGRtoGray(src, dst, 1, true, stream); +} + +inline void BGR2XYZ(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + cvtBGRtoXYZ(src, dst, 3, false, stream); +} + +inline void RGB2XYZ(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + cvtBGRtoXYZ(src, dst, 3, true, stream); +} + +inline void XYZ2BGR(InputArray src, OutputArray& dst, int dcn, AscendStream& stream) +{ + if (dcn <= 0) + dcn = 3; + cvtXYZtoBGR(src, dst, dcn, false, stream); +} + +inline void XYZ2RGB(InputArray src, OutputArray& dst, int dcn, AscendStream& stream) +{ + if (dcn <= 0) + dcn = 3; + cvtXYZtoBGR(src, dst, dcn, true, stream); +} + +inline void BGR2YCrCb(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + float coeffs[2]; + coeffs[0] = YCRF; + coeffs[1] = YCBF; + cvtBGRtoYCrCb(src, dst, coeffs, false, false, stream); +} + +inline void RGB2YCrCb(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + float coeffs[2]; + coeffs[0] = YCRF; + coeffs[1] = YCBF; + cvtBGRtoYCrCb(src, dst, coeffs, true, false, stream); +} + +inline void YCrCb2BGR(InputArray src, OutputArray& dst, int dcn, AscendStream& stream) +{ + float coeffs[4]; + coeffs[0] = CR2RF; + coeffs[1] = CR2GF; + coeffs[2] = CB2GF; + coeffs[3] = CB2BF; + if (dcn <= 0) + dcn = 3; + cvtYCrCbtoBGR(src, dst, dcn, coeffs, false, false, stream); +} + +inline void YCrCb2RGB(InputArray src, OutputArray& dst, int dcn, AscendStream& stream) +{ + float coeffs[4]; + coeffs[0] = CR2RF; + coeffs[1] = CR2GF; + coeffs[2] = CB2GF; + coeffs[3] = CB2BF; + if (dcn <= 0) + dcn = 3; + cvtYCrCbtoBGR(src, dst, dcn, coeffs, true, false, stream); +} + +inline void BGR2YUV(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + float coeffs[2]; + coeffs[0] = R2VF; + coeffs[1] = B2UF; + cvtBGRtoYCrCb(src, dst, coeffs, false, true, stream); +} + +inline void RGB2YUV(InputArray src, OutputArray& dst, int, AscendStream& stream) +{ + float coeffs[2]; + coeffs[0] = R2VF; + coeffs[1] = B2UF; + cvtBGRtoYCrCb(src, dst, coeffs, true, true, stream); +} + +inline void YUV2BGR(InputArray src, OutputArray& dst, int dcn, AscendStream& stream) +{ + float coeffs[4]; + coeffs[0] = V2RF; + coeffs[1] = V2GF; + coeffs[2] = U2GF; + coeffs[3] = U2BF; + if (dcn <= 0) + dcn = 3; + cvtYCrCbtoBGR(src, dst, dcn, coeffs, false, true, stream); +} + +inline void YUV2RGB(InputArray src, OutputArray& dst, int dcn, AscendStream& stream) +{ + float coeffs[4]; + coeffs[0] = V2RF; + coeffs[1] = V2GF; + coeffs[2] = U2GF; + coeffs[3] = U2BF; + if (dcn <= 0) + dcn = 3; + cvtYCrCbtoBGR(src, dst, dcn, coeffs, true, true, stream); +} + +void cvtColor(InputArray src, OutputArray dst, int code, int dcn, AscendStream& stream) +{ + typedef void (*func_t)(InputArray& src, OutputArray& dst, int dcn, AscendStream& stream); + static const func_t funcs[] = { + BGR2BGRA, // CV_BGR2BGRA =0 + BGRA2BGR, // CV_BGRA2BGR =1 + BGR2RGBA, // CV_BGR2RGBA =2 + RGBA2BGR, // CV_RGBA2BGR =3 + BGR2RGB, // CV_BGR2RGB =4 + BGRA2RGBA, // CV_BGRA2RGBA =5 + + BGR2GRAY, // CV_BGR2GRAY =6 + RGB2GRAY, // CV_RGB2GRAY =7 + GRAY2BGR, // CV_GRAY2BGR =8 + GRAY2BGRA, // CV_GRAY2BGRA =9 + BGRA2GRAY, // CV_BGRA2GRAY =10 + RGBA2GRAY, // CV_RGBA2GRAY =11 + + 0, // CV_BGR2BGR565 =12 + 0, // CV_RGB2BGR565 =13 + 0, // CV_BGR5652BGR =14 + 0, // CV_BGR5652RGB =15 + 0, // CV_BGRA2BGR565 =16 + 0, // CV_RGBA2BGR565 =17 + 0, // CV_BGR5652BGRA =18 + 0, // CV_BGR5652RGBA =19 + + 0, // CV_GRAY2BGR565 =20 + 0, // CV_BGR5652GRAY =21 + + 0, // CV_BGR2BGR555 =22 + 0, // CV_RGB2BGR555 =23 + 0, // CV_BGR5552BGR =24 + 0, // CV_BGR5552RGB =25 + 0, // CV_BGRA2BGR555 =26 + 0, // CV_RGBA2BGR555 =27 + 0, // CV_BGR5552BGRA =28 + 0, // CV_BGR5552RGBA =29 + + 0, // CV_GRAY2BGR555 =30 + 0, // CV_BGR5552GRAY =31 + + BGR2XYZ, // CV_BGR2XYZ =32 + RGB2XYZ, // CV_RGB2XYZ =33 + XYZ2BGR, // CV_XYZ2BGR =34 + XYZ2RGB, // CV_XYZ2RGB =35 + + BGR2YCrCb, // CV_BGR2YCrCb =36 + RGB2YCrCb, // CV_RGB2YCrCb =37 + YCrCb2BGR, // CV_YCrCb2BGR =38 + YCrCb2RGB, // CV_YCrCb2RGB =39 + + 0, // CV_BGR2HSV =40 + 0, // CV_RGB2HSV =41 + + 0, // =42 + 0, // =43 + + 0, // CV_BGR2Lab =44 + 0, // CV_RGB2Lab =45 + + 0, // CV_BayerBG2BGR =46 + 0, // CV_BayeRGB2BGR =47 + 0, // CV_BayerRG2BGR =48 + 0, // CV_BayerGR2BGR =49 + + 0, // CV_BGR2Luv =50 + 0, // CV_RGB2Luv =51 + + 0, // CV_BGR2HLS =52 + 0, // CV_RGB2HLS =53 + + 0, // CV_HSV2BGR =54 + 0, // CV_HSV2RGB =55 + + 0, // CV_Lab2BGR =56 + 0, // CV_Lab2RGB =57 + 0, // CV_Luv2BGR =58 + 0, // CV_Luv2RGB =59 + + 0, // CV_HLS2BGR =60 + 0, // CV_HLS2RGB =61 + + 0, // CV_BayerBG2BGR_VNG =62 + 0, // CV_BayeRGB2BGR_VNG =63 + 0, // CV_BayerRG2BGR_VNG =64 + 0, // CV_BayerGR2BGR_VNG =65 + + 0, // CV_BGR2HSV_FULL = 66 + 0, // CV_RGB2HSV_FULL = 67 + 0, // CV_BGR2HLS_FULL = 68 + 0, // CV_RGB2HLS_FULL = 69 + + 0, // CV_HSV2BGR_FULL = 70 + 0, // CV_HSV2RGB_FULL = 71 + 0, // CV_HLS2BGR_FULL = 72 + 0, // CV_HLS2RGB_FULL = 73 + + 0, // CV_LBGR2Lab = 74 + 0, // CV_LRGB2Lab = 75 + 0, // CV_LBGR2Luv = 76 + 0, // CV_LRGB2Luv = 77 + + 0, // CV_Lab2LBGR = 78 + 0, // CV_Lab2LRGB = 79 + 0, // CV_Luv2LBGR = 80 + 0, // CV_Luv2LRGB = 81 + + BGR2YUV, // CV_BGR2YUV = 82 + RGB2YUV, // CV_RGB2YUV = 83 + YUV2BGR, // CV_YUV2BGR = 84 + YUV2RGB, // CV_YUV2RGB = 85 + + 0, // CV_BayerBG2GRAY = 86 + 0, // CV_BayeRGB2GRAY = 87 + 0, // CV_BayerRG2GRAY = 88 + 0, // CV_BayerGR2GRAY = 89 + + // YUV 4:2:0 formats family + 0, // CV_YUV2RGB_NV12 = 90, + 0, // CV_YUV2BGR_NV12 = 91, + 0, // CV_YUV2RGB_NV21 = 92, + 0, // CV_YUV2BGR_NV21 = 93, + + 0, // CV_YUV2RGBA_NV12 = 94, + 0, // CV_YUV2BGRA_NV12 = 95, + 0, // CV_YUV2RGBA_NV21 = 96, + 0, // CV_YUV2BGRA_NV21 = 97, + + 0, // CV_YUV2RGB_YV12 = 98, + 0, // CV_YUV2BGR_YV12 = 99, + 0, // CV_YUV2RGB_IYUV = 100, + 0, // CV_YUV2BGR_IYUV = 101, + + 0, // CV_YUV2RGBA_YV12 = 102, + 0, // CV_YUV2BGRA_YV12 = 103, + 0, // CV_YUV2RGBA_IYUV = 104, + 0, // CV_YUV2BGRA_IYUV = 105, + + 0, // CV_YUV2GRAY_420 = 106, + + // YUV 4:2:2 formats family + 0, // CV_YUV2RGB_UYVY = 107, + 0, // CV_YUV2BGR_UYVY = 108, + 0, // //CV_YUV2RGB_VYUY = 109, + 0, // //CV_YUV2BGR_VYUY = 110, + + 0, // CV_YUV2RGBA_UYVY = 111, + 0, // CV_YUV2BGRA_UYVY = 112, + 0, // //CV_YUV2RGBA_VYUY = 113, + 0, // //CV_YUV2BGRA_VYUY = 114, + + 0, // CV_YUV2RGB_YUY2 = 115, + 0, // CV_YUV2BGR_YUY2 = 116, + 0, // CV_YUV2RGB_YVYU = 117, + 0, // CV_YUV2BGR_YVYU = 118, + + 0, // CV_YUV2RGBA_YUY2 = 119, + 0, // CV_YUV2BGRA_YUY2 = 120, + 0, // CV_YUV2RGBA_YVYU = 121, + 0, // CV_YUV2BGRA_YVYU = 122, + + 0, // CV_YUV2GRAY_UYVY = 123, + 0, // CV_YUV2GRAY_YUY2 = 124, + + // alpha premultiplication + 0, // CV_RGBA2mRGBA = 125, + 0, // CV_mRGBA2RGBA = 126, + + 0, // CV_COLORCVT_MAX = 127 + }; + + CV_Assert(code < 128); + + func_t func = funcs[code]; + + if (func == 0) + CV_Error(Error::StsBadFlag, "Unknown/unsupported color conversion code"); + + func(src, dst, dcn, stream); +} + +} // namespace cann +} // namespace cv \ No newline at end of file diff --git a/modules/cannops/src/core.cpp b/modules/cannops/src/core.cpp new file mode 100644 index 00000000000..398394e6714 --- /dev/null +++ b/modules/cannops/src/core.cpp @@ -0,0 +1,205 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +namespace cv +{ +namespace cann +{ +static inline aclFormat getAclFormat(const char* type) +{ + if (strcmp(type, "NCHW") == 0) + { + return ACL_FORMAT_NCHW; + } + else if (strcmp(type, "NHWC") == 0) + { + return ACL_FORMAT_NHWC; + } + else + { + CV_Error(Error::StsBadArg, "Unknown/unsupported matrix format"); + } +} + +void transData(const NpuMat& src, NpuMat& dst, const char* from, const char* to, + AscendStream& stream) +{ + AclStringAttribute fromAttr("src_format", from); + AclStringAttribute toAttr("dst_format", to); + std::vector attrs{&fromAttr, &toAttr}; + + std::vector srcTensors, dstTensors; + srcTensors.emplace_back(src, "", getAclFormat(from)); + dstTensors.emplace_back(dst, "", getAclFormat(to)); + callAscendOperator("TransData", srcTensors, dstTensors, stream, attrs); +} + +void merge(const NpuMat* src, size_t n, NpuMat& dst, AscendStream& stream) +{ + if (src == nullptr || n < 2) + return; + + int depth = src->depth(); + int rows = src->rows; + int cols = src->cols; + + // all matrix must have same size and type + for (size_t i = 1; i < n; i++) + { + CV_Assert(src[i].depth() == depth && src[i].channels() == 1); + CV_Assert(src[i].rows == rows && src[i].cols == cols); + } + + AclIntAttribute concatDim("concat_dim", 3); + std::vector attrs{&concatDim}; + + std::vector srcTensors, dstTensors; + + for (size_t i = 0; i < n; i++) + { + srcTensors.emplace_back(src[i], "x" + std::to_string(i)); + } + dstTensors.emplace_back(dst); + + callAscendOperator("ConcatD", srcTensors, dstTensors, stream, attrs); +} + +void merge(const NpuMat* src, size_t n, OutputArray _dst, AscendStream& stream) +{ + NpuMat dst = getOutputMat(_dst, src->rows, src->cols, CV_MAKE_TYPE(src->depth(), n), stream); + merge(src, n, dst, stream); + syncOutput(dst, _dst, stream); +} + +void merge(const std::vector& src, OutputArray dst, AscendStream& stream) +{ + merge(&src[0], src.size(), dst, stream); +} + +void split(const NpuMat& src, NpuMat* dst, AscendStream& stream) +{ + if (src.empty() || dst == nullptr) + return; + + int cn = src.channels(); + AclIntAttribute splitDim("split_dim", 3); + AclIntAttribute numSplit("num_split", cn); + + for (int i = 0; i < cn; i++) + dst[i].create(src.rows, src.cols, CV_MAKE_TYPE(src.depth(), 1)); + + std::vector attrs{&splitDim, &numSplit}; + + callAscendOperator(src, dst, cn, "SplitD", stream, attrs); +} + +void split(InputArray _src, NpuMat* dst, AscendStream& stream) +{ + NpuMat src = getInputMat(_src, stream); + split(src, dst, stream); +} + +void split(InputArray _src, std::vector& dst, AscendStream& stream) +{ + NpuMat src = getInputMat(_src, stream); + dst.resize(src.channels()); + split(_src, &dst[0], stream); +} + +void transpose(const NpuMat& src, int64_t* perm, NpuMat& dst, AscendStream& stream) +{ + AclListIntAttribute permAttr("perm", 4, perm); + std::vector attrs{&permAttr}; + + std::vector srcTensors, dstTensors; + srcTensors.emplace_back(src); + dstTensors.emplace_back(dst); + callAscendOperator("TransposeD", srcTensors, dstTensors, stream, attrs); +} + +void transpose(InputArray _src, OutputArray _dst, AscendStream& stream) +{ + NpuMat src = getInputMat(_src, stream); + + NpuMat dst = getOutputMat(_dst, src.cols, src.rows, src.type(), stream); + + int64_t perm[] = {0, 2, 1, 3}; + transpose(src, perm, dst, stream); + syncOutput(dst, _dst, stream); +} + +void flip(const NpuMat& src, std::vector& asixs, NpuMat& dst, AscendStream& stream) +{ + size_t dataSize = asixs.size() * sizeof(int32_t); + std::shared_ptr axisPtr = mallocAndUpload(&asixs.at(0), dataSize, stream); + + int64_t dims[] = {(int64_t)asixs.size()}; + AscendTensor asixTensor(axisPtr, dataSize, dims, 1, ACL_INT32); + + std::vector srcTensors, dstTensors; + srcTensors.emplace_back(src); + srcTensors.push_back(std::move(asixTensor)); + dstTensors.emplace_back(dst); + callAscendOperator("ReverseV2", srcTensors, dstTensors, stream, emptyattr); +} + +void flip(InputArray _src, OutputArray _dst, int flipCode, AscendStream& stream) +{ + NpuMat src = getInputMat(_src, stream); + NpuMat dst = getOutputMat(_dst, src.rows, src.cols, src.type(), stream); + + std::vector asix; + if (flipCode == 0) + { + asix.push_back(1); + } + else if (flipCode > 0) + { + asix.push_back(2); + } + else + { + asix.push_back(1); + asix.push_back(2); + } + flip(src, asix, dst, stream); + syncOutput(dst, _dst, stream); +} + +void rotate(InputArray _src, OutputArray _dst, int rotateMode, AscendStream& stream) +{ + CV_Assert(_src.dims() <= 2); + NpuMat src = getInputMat(_src, stream), dst, tempMat; + switch (rotateMode) + { + case ROTATE_90_CLOCKWISE: + { + dst = getOutputMat(_dst, src.cols, src.rows, src.type(), stream); + transpose(src, tempMat, stream); + flip(tempMat, dst, 1, stream); + break; + } + case ROTATE_180: + { + dst = getOutputMat(_dst, src.rows, src.cols, src.type(), stream); + flip(src, dst, -1, stream); + break; + } + case ROTATE_90_COUNTERCLOCKWISE: + { + dst = getOutputMat(_dst, src.cols, src.rows, src.type(), stream); + transpose(_src, tempMat, stream); + flip(tempMat, dst, 0, stream); + break; + } + default: + break; + } + syncOutput(dst, _dst, stream); +} + +} // namespace cann +} // namespace cv diff --git a/modules/cannops/src/element_operations.cpp b/modules/cannops/src/element_operations.cpp new file mode 100644 index 00000000000..907c995ce45 --- /dev/null +++ b/modules/cannops/src/element_operations.cpp @@ -0,0 +1,240 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" +namespace cv +{ +namespace cann +{ +static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, + float scale, int dtype, const char* op, AscendStream& stream) +{ + const bool isScalar1 = (_src1.kind() == _InputArray::MATX); + const bool isScalar2 = (_src2.kind() == _InputArray::MATX); + + if (isScalar1 && isScalar2) + CV_Error(Error::StsBadArg, "At list one matrix parameter shoule be passwd."); + + NpuMat src1, src2; + Mat scalar; + + if (!isScalar1) + src1 = getInputMat(_src1, stream); + if (!isScalar2) + src2 = getInputMat(_src2, stream); + + if (isScalar1) + scalar = _src1.getMat(); + else if (isScalar2) + scalar = _src2.getMat(); + + const int sdepth = src1.empty() ? src2.depth() : src1.depth(); + const int cn = src1.empty() ? src2.channels() : src1.channels(); + const Size size = src1.empty() ? src2.size() : src1.size(); + + if (dtype < 0) + dtype = sdepth; + + const int ddepth = CV_MAT_DEPTH(dtype); + CV_Assert(sdepth <= CV_16F && ddepth <= CV_16F); + CV_Assert(!scalar.empty() || src2.empty() || + (src2.depth() == src1.depth() && src2.size() == src1.size())); + + Scalar val; + + if (!scalar.empty()) + { + CV_Assert(scalar.total() <= 4); + scalar.convertTo(Mat_(scalar.rows, scalar.cols, &val[0]), CV_64F); + } + + NpuMat dst = getOutputMat(_dst, size.height, size.width, CV_MAKE_TYPE(ddepth, cn), stream); + + if (isScalar1) + callAscendOperator(src2, val, true, dst, op, stream); + else if (isScalar2) + callAscendOperator(src1, val, false, dst, op, stream); + else + { + if (src2.empty()) + callAscendOperator(src1, dst, op, stream); + else + callAscendOperator(src1, src2, dst, op, stream); + } + + NpuMat mask = getInputMat(_mask, stream); + if (!mask.empty()) + { + int mtype = mask.type(); + CV_Assert((mtype == CV_8UC1 || mtype == CV_8SC1) && mask.size() == size); + NpuMat onesMask, castedMask; + onesMask.create(mask.rows, mask.cols, mask.type()); + callAscendOperator(mask, mask, onesMask, "Div", stream); + onesMask.convertTo(castedMask, dst.depth(), stream); + callAscendOperator(dst, castedMask, dst, "Mul", stream); + } + + if (scale != 1) + { + muls(dst, scale, dst, stream); + } + + syncOutput(dst, _dst, stream); +} + +void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, + AscendStream& stream) +{ + arithm_op(src1, src2, dst, mask, 1, dtype, "Add", stream); +} + +void subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, + AscendStream& stream) +{ + arithm_op(src1, src2, dst, mask, 1, dtype, "Sub", stream); +} + +void multiply(InputArray src1, InputArray src2, OutputArray dst, float scale, int dtype, + AscendStream& stream) +{ + arithm_op(src1, src2, dst, noArray(), scale, dtype, "Mul", stream); +} + +void divide(InputArray src1, InputArray src2, OutputArray dst, float scale, int dtype, + AscendStream& stream) +{ + arithm_op(src1, src2, dst, noArray(), scale, dtype, "Div", stream); +} + +void bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, + AscendStream& stream) +{ + arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseAnd", stream); +} + +void bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, + AscendStream& stream) +{ + arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseOr", stream); +} + +void bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, + AscendStream& stream) +{ + arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseXor", stream); +} + +void bitwise_not(InputArray src, OutputArray dst, InputArray mask, AscendStream& stream) +{ + arithm_op(src, noArray(), dst, mask, 1, -1, "Invert", stream); +} + +void addWeighted(InputArray _src1, double alpha, InputArray _src2, double beta, double gamma, + OutputArray _dst, int dtype, AscendStream& stream) +{ + NpuMat src1, src2; + src1 = getInputMat(_src1, stream); + src2 = getInputMat(_src2, stream); + + if (dtype < 0) + dtype = src1.depth(); + + CV_Assert(src2.depth() == src1.depth() && src2.size() == src1.size() && + src1.channels() == src2.channels()); + + int type = CV_MAKE_TYPE(dtype, src1.channels()); + NpuMat dst = getOutputMat(_dst, src1.rows, src1.cols, type, stream); + + // TODO Consider overflow, should extend type or not? + NpuMat src1Weighted(src1.size(), type), src2Weighted(src1.size(), type), + srcWeightedSumRet(src1.size(), type); + muls(src1, alpha, src1Weighted, stream); + muls(src2, beta, src2Weighted, stream); + callAscendOperator(src1Weighted, src2Weighted, srcWeightedSumRet, "Add", stream); + adds(srcWeightedSumRet, gamma, dst, stream); + + syncOutput(dst, _dst, stream); +} + +double threshold(NpuMat& src, NpuMat& dst, double thresh, double maxval, int type, + AscendStream& stream) +{ + // ThresholdTypes is defined in opencv2/imgproc, This type is the only Symbol we need. + // Add imgproc to dependence is too heavy, use magic number instead. + CV_Assert(type <= 4 /*THRESH_TOZERO_INV*/); + + NpuMat threshMat(src.size(), src.type()); + + AclFloatAttribute attr("threshold", (float)thresh); + std::vector attrs{&attr}; + callAscendOperator(src, threshMat, "Threshold", stream, attrs); + + // THRESH_*_INV, THRESH_TRUNC need a inverse threshMat. + // THRESH_BINARY_INV = 1, THRESH_TRUNC = 2, THRESH_TOZERO_INV = 4, + if (type == 1 || type == 2 || type == 4) + { + NpuMat threshInvMat(src.size(), src.type()); + NpuMat ones(src.size(), src.type()); + Scalar s(1, 1, 1, 1); + ones.setTo(s, stream); + callAscendOperator(ones, threshMat, threshInvMat, "Sub", stream); + + if (type == 1) + { + muls(threshInvMat, maxval, dst, stream); + } + else if (type == 2) + { + NpuMat ToZeroInvMat(src.size(), src.type()); + NpuMat TruncMat(src.size(), src.type()); + callAscendOperator(threshInvMat, src, ToZeroInvMat, "Mul", stream); + muls(threshMat, thresh, TruncMat, stream); + callAscendOperator(ToZeroInvMat, TruncMat, dst, "Add", stream); + } + else + { + callAscendOperator(threshInvMat, src, dst, "Mul", stream); + } + } + else + { + if (type == 0) /* THRESH_BINARY = 0 */ + { + muls(threshMat, maxval, dst, stream); + } + else if (type == 3) /* THRESH_TOZERO = 3 */ + { + callAscendOperator(threshMat, src, dst, "Mul", stream); + } + else + { + CV_Error(Error::AscendApiCallError, "Unknown/unsupported threshold type"); + } + } + return thresh; +} + +double threshold(InputArray _src, OutputArray _dst, double thresh, double maxval, int type, + AscendStream& stream) +{ + NpuMat src = getInputMat(_src, stream); + NpuMat dst = getOutputMat(_dst, src.rows, src.cols, src.type(), stream); + double ret = threshold(src, dst, thresh, maxval, type, stream); + syncOutput(dst, _dst, stream); + return ret; +} + +#define OpScalar(name, op) \ + void name(const NpuMat& arr, float scalar, NpuMat& dst, AscendStream& stream) \ + { \ + AclFloatAttribute attr("value", scalar); \ + std::vector attrs{&attr}; \ + callAscendOperator(arr, dst, #op, stream, attrs); \ + } + +OpScalar(muls, Muls); +OpScalar(adds, Adds); + +} // namespace cann +} // namespace cv diff --git a/modules/cannops/src/npumat.cpp b/modules/cannops/src/npumat.cpp new file mode 100644 index 00000000000..0332b891ec9 --- /dev/null +++ b/modules/cannops/src/npumat.cpp @@ -0,0 +1,276 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" +#include + +namespace +{ +class DefaultAllocator : public cv::cann::NpuMat::Allocator +{ +public: + std::shared_ptr allocate(size_t size) CV_OVERRIDE; + bool allocate(cv::cann::NpuMat* mat, int rows, int cols, size_t elemSize) CV_OVERRIDE; +}; + +std::shared_ptr DefaultAllocator::allocate(size_t size) +{ + uchar* data; + cv::cann::aclrtMallocWarpper((void**)(&data), size); + return std::shared_ptr(data, [](void* ptr) { cv::cann::aclrtFreeWarpper(ptr); }); +} + +bool DefaultAllocator::allocate(cv::cann::NpuMat* mat, int rows, int cols, size_t elemSize) +{ + mat->data = allocate(elemSize * cols * rows); + mat->step = cols * elemSize; + + return true; +} + +DefaultAllocator cannDefaultAllocator; +cv::cann::NpuMat::Allocator* g_defaultAllocator = &cannDefaultAllocator; +} // namespace + +namespace cv +{ +namespace cann +{ +NpuMat::Allocator* NpuMat::defaultAllocator() { return g_defaultAllocator; } + +void NpuMat::setDefaultAllocator(NpuMat::Allocator* allocator) +{ + CV_Assert(allocator != 0); + g_defaultAllocator = allocator; +} + +// TODO: this function is copied from matrix.cpp, which is a local symbol there and can be +// refreneced. +static int updateContinuityFlag(int flags, int dims, const int* size, const size_t* step) +{ + int i, j; + for (i = 0; i < dims; i++) + { + if (size[i] > 1) + break; + } + + uint64 t = (uint64)size[std::min(i, dims - 1)] * CV_MAT_CN(flags); + for (j = dims - 1; j > i; j--) + { + t *= size[j]; + if (step[j] * size[j] < step[j - 1]) + break; + } + + if (j <= i && t == (uint64)(int)t) + return flags | Mat::CONTINUOUS_FLAG; + return flags & ~Mat::CONTINUOUS_FLAG; +} + +void NpuMat::updateContinuityFlag() +{ + int sz[] = {rows, cols}; + size_t steps[] = {step, elemSize()}; + flags = cv::cann::updateContinuityFlag(flags, 2, sz, steps); +} + +void NpuMat::create(int _rows, int _cols, int _type) +{ + CV_DbgAssert(_rows >= 0 && _cols >= 0); + + _type &= Mat::TYPE_MASK; + + if (rows == _rows && cols == _cols && type() == _type && data) + return; + + if (_rows > 0 && _cols > 0) + { + flags = Mat::MAGIC_VAL + _type; + rows = _rows; + cols = _cols; + + const size_t esz = elemSize(); + + bool allocSuccess = allocator->allocate(this, rows, cols, esz); + + if (!allocSuccess) + { + // custom allocator fails, try default allocator + allocator = defaultAllocator(); + allocSuccess = allocator->allocate(this, rows, cols, esz); + CV_Assert(allocSuccess); + } + + if (esz * cols == step) + flags |= Mat::CONTINUOUS_FLAG; + + datastart = data.get(); + dataend = data.get() + step * (rows - 1) + cols * esz; + } +} + +void NpuMat::upload(InputArray arr) { upload(arr, AscendStream::Null()); } + +void NpuMat::upload(InputArray arr, AscendStream& stream) +{ + Mat mat = arr.getMat(); + CV_DbgAssert(!mat.empty()); + create(mat.rows, mat.cols, mat.type()); + aclrtMemcpy2dWarpper(data, 0, step, mat.data, mat.step[0], cols * elemSize(), rows, stream); +} + +void NpuMat::download(OutputArray dst) const { download(dst, AscendStream::Null()); } + +void NpuMat::download(OutputArray _dst, AscendStream& stream) const +{ + CV_DbgAssert(!empty()); + + _dst.create(size(), type()); + Mat dst = _dst.getMat(); + aclrtMemcpy2dWarpper(dst.data, dst.step[0], data, 0, step, cols * elemSize(), rows, stream); +} + +NpuMat::NpuMat(int rows_, int cols_, int type_, Scalar& s_, NpuMat::Allocator* allocator_) + : flags(0), rows(rows_), cols(cols_), step(0), datastart(0), dataend(0), allocator(allocator_) +{ + create(rows_, cols_, type_); + setTo(s_); +} + +NpuMat::NpuMat(Size size_, int type_, Scalar& s_, NpuMat::Allocator* allocator_) + : flags(0), rows(size_.height), cols(size_.width), step(0), datastart(0), dataend(0), + allocator(allocator_) +{ + create(size_.height, size_.width, type_); + setTo(s_); +} + +NpuMat::NpuMat(InputArray _m, const Rect& roi) : NpuMat(_m, roi, AscendStream::Null()) {} + +NpuMat::NpuMat(InputArray _m, const Rect& roi, AscendStream& stream) + : rows(roi.height), cols(roi.width), allocator(defaultAllocator()) +{ + NpuMat m = getInputMat(_m, stream); + step = m.step; + data = m.data; + flags = m.flags; + CV_Assert(0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols && 0 <= roi.y && + 0 <= roi.height && roi.y + roi.height <= m.rows); + size_t esz = CV_ELEM_SIZE(flags); + size_t sizeMem = esz * roi.width * roi.height * m.channels(); + size_t offset = roi.y * m.step + roi.x * esz; + + void* dst = malloc(sizeMem); + size_t dpitch = roi.width * esz; + std::shared_ptr dstDevice = allocator->allocate(sizeMem); + aclrtMemcpy2dWarpper(dst, dpitch, data, offset, step, dpitch, roi.height, stream); + aclrtMemcpy2dWarpper(dstDevice, 0, dpitch, dst, dpitch, dpitch, roi.height, stream); + data = dstDevice; + step = dpitch; + free(dst); + updateContinuityFlag(); +} + +NpuMat& NpuMat::setTo(const Scalar& sc) { return setTo(sc, AscendStream::Null()); } + +NpuMat& NpuMat::setTo(const Scalar& sc, AscendStream& stream) +{ + size_t totalBytes = (size_t)rows * cols * elemSize(); + if (totalBytes == 0) + return *this; + + aclrtMemsetWarpper(data, 0, totalBytes, stream); + + NpuMat dst(rows, cols, type()); + // TODO use AssignAdd to avoid memcpy, or use broadcase. + callAscendOperator(*this, sc, false, dst, "Add", stream); + swap(dst); + + return *this; +} + +NpuMat& NpuMat::setTo(float sc) { return setTo(sc, AscendStream::Null()); } + +NpuMat& NpuMat::setTo(float sc, AscendStream& stream) +{ + size_t totalBytes = (size_t)rows * cols * elemSize(); + if (totalBytes == 0) + return *this; + + aclrtMemsetWarpper(data, 0, totalBytes, stream); + + NpuMat dst(rows, cols, type()); + adds(*this, sc, dst, stream); + swap(dst); + + return *this; +} + +void NpuMat::convertTo(NpuMat& dst, int rtype) const +{ + convertTo(dst, rtype, AscendStream::Null()); +} + +void NpuMat::convertTo(NpuMat& dst, int _rtype, AscendStream& _stream) const +{ + int cn = channels(); + dst.create(rows, cols, CV_MAKE_TYPE(_rtype, cn)); + callAscendOperator(*this, dst, "Cast", _stream); +} + +static NpuMat getNpuMat(InputArray arr) +{ + _InputArray::KindFlag k = arr.kind(); + if (k == _InputArray::NPU_MAT) + { + const cann::NpuMat* n_mat = (const cann::NpuMat*)arr.getObj(); + return *n_mat; + } + + if (k == _InputArray::NONE) + return cann::NpuMat(); + + CV_Error(cv::Error::StsNotImplemented, "getNpuMat is available only for cann::NpuMat"); +} + +NpuMat getInputMat(InputArray _src, AscendStream& stream) +{ + NpuMat src; + if (_src.kind() == _InputArray::NPU_MAT) + { + src = getNpuMat(_src); + } + else if (!_src.empty()) + { + src.upload(_src, stream); + } + return src; +} + +NpuMat getOutputMat(OutputArray _dst, int rows, int cols, int type, AscendStream& stream) +{ + CV_UNUSED(stream); + NpuMat dst; + if (_dst.kind() == _InputArray::NPU_MAT) + { + ((cann::NpuMat*)(_dst.getObj()))->create(rows, cols, type); + dst = getNpuMat(_dst); + } + else + { + dst.create(rows, cols, type); + } + return dst; +} + +void syncOutput(const NpuMat& dst, OutputArray _dst, AscendStream& stream) +{ + if (_dst.kind() != _InputArray::NPU_MAT) + { + dst.download(_dst, stream); + } +} +} // namespace cann +} // namespace cv diff --git a/modules/cannarithm/src/precomp.hpp b/modules/cannops/src/precomp.hpp similarity index 67% rename from modules/cannarithm/src/precomp.hpp rename to modules/cannops/src/precomp.hpp index 1541ec80a69..8411cc40407 100644 --- a/modules/cannarithm/src/precomp.hpp +++ b/modules/cannops/src/precomp.hpp @@ -5,12 +5,10 @@ #ifndef __OPENCV_PRECOMP_H__ #define __OPENCV_PRECOMP_H__ -#include -#include #include "opencv2/cann.hpp" -#include "opencv2/cann_prepare.hpp" -#include "opencv2/acl_stream_accessor.hpp" +#include "opencv2/stream_accessor.hpp" #include "opencv2/cann_call.hpp" -#include "opencv2/cann_arithm.hpp" +#include "opencv2/cann_interface.hpp" +#include "opencv2/cann_private.hpp" #endif /* __OPENCV_PRECOMP_H__ */ diff --git a/modules/cannops/test/test_core.cpp b/modules/cannops/test/test_core.cpp new file mode 100644 index 00000000000..fca24133ca5 --- /dev/null +++ b/modules/cannops/test/test_core.cpp @@ -0,0 +1,135 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" +#include + +namespace opencv_test +{ +namespace +{ +TEST(IMGPROC, MERGE) +{ + Mat m1 = (Mat_(2, 2) << 1, 4, 7, 10); + Mat m2 = (Mat_(2, 2) << 2, 5, 8, 11); + Mat m3 = (Mat_(2, 2) << 3, 6, 9, 12); + Mat channels[3] = {m1, m2, m3}; + Mat m; + cv::merge(channels, 3, m); + + cv::cann::setDevice(0); + + NpuMat a1, a2, a3; + a1.upload(m1); + a2.upload(m2); + a3.upload(m3); + NpuMat aclChannels[3] = {a1, a2, a3}; + std::vector aclChannelsVector; + aclChannelsVector.push_back(a1); + aclChannelsVector.push_back(a2); + aclChannelsVector.push_back(a3); + + Mat checker1, checker2; + cv::cann::merge(aclChannels, 3, checker1); + cv::cann::merge(aclChannelsVector, checker2); + + EXPECT_MAT_NEAR(m, checker1, 0.0); + EXPECT_MAT_NEAR(m, checker2, 0.0); + + cv::cann::resetDevice(); +} + +TEST(IMGPROC, SPLIT) +{ + char d[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + Mat m(2, 2, CV_8UC3, d); + Mat channels[3]; + cv::split(m, channels); + + cv::cann::setDevice(0); + + NpuMat aclChannels[3]; + std::vector aclChannelsVector; + + cv::cann::split(m, aclChannels); + cv::cann::split(m, aclChannelsVector); + + Mat checker1[3], checker2[3]; + aclChannels[0].download(checker1[0]); + aclChannels[1].download(checker1[1]); + aclChannels[2].download(checker1[2]); + + aclChannelsVector[0].download(checker2[0]); + aclChannelsVector[1].download(checker2[1]); + aclChannelsVector[2].download(checker2[2]); + + EXPECT_MAT_NEAR(channels[0], checker1[0], 0.0); + EXPECT_MAT_NEAR(channels[1], checker1[1], 0.0); + EXPECT_MAT_NEAR(channels[2], checker1[2], 0.0); + + EXPECT_MAT_NEAR(channels[0], checker2[0], 0.0); + EXPECT_MAT_NEAR(channels[1], checker2[1], 0.0); + EXPECT_MAT_NEAR(channels[2], checker2[2], 0.0); + + cv::cann::resetDevice(); +} + +TEST(IMGPROC, TRANSPOSE) +{ + Mat cpuMat = randomMat(10, 10, CV_32SC3), cpuRetMat, checker; + cv::transpose(cpuMat, cpuRetMat); + cv::cann::transpose(cpuMat, checker); + + EXPECT_MAT_NEAR(cpuRetMat, checker, 0.0); +} + +TEST(IMGPROC, FLIP) +{ + Mat cpuMat = randomMat(10, 10, CV_32SC3), cpuRetMat, checker; + + cv::flip(cpuMat, cpuRetMat, 0); + cv::cann::flip(cpuMat, checker, 0); + EXPECT_MAT_NEAR(cpuRetMat, checker, 0.0); + + cv::flip(cpuMat, cpuRetMat, 1); + cv::cann::flip(cpuMat, checker, 1); + EXPECT_MAT_NEAR(cpuRetMat, checker, 0.0); + + cv::flip(cpuMat, cpuRetMat, -1); + cv::cann::flip(cpuMat, checker, -1); + EXPECT_MAT_NEAR(cpuRetMat, checker, 0.0); +} + +TEST(IMGPROC, ROTATE) +{ + Mat cpuRetMat, checker, cpuMat = randomMat(3, 5, CV_16S, 0.0, 255.0); + + int rotateMode = 0; + cv::rotate(cpuMat, cpuRetMat, rotateMode); + cv::cann::rotate(cpuMat, checker, rotateMode); + EXPECT_MAT_NEAR(cpuRetMat, checker, 0.0); + + rotateMode = 1; + cv::rotate(cpuMat, cpuRetMat, rotateMode); + cv::cann::rotate(cpuMat, checker, rotateMode); + EXPECT_MAT_NEAR(cpuRetMat, checker, 0.0); + + rotateMode = 2; + cv::rotate(cpuMat, cpuRetMat, rotateMode); + cv::cann::rotate(cpuMat, checker, rotateMode); + EXPECT_MAT_NEAR(cpuRetMat, checker, 0.0); +} + +TEST(CORE, CROP) +{ + Mat cpuOpRet, checker, cpuMat = randomMat(6, 6, CV_32SC3, 0.0, 255.0); + Rect b(1, 2, 4, 4); + Mat cropped_cv(cpuMat, b); + NpuMat cropped_cann(cpuMat, b); + cropped_cann.download(checker); + EXPECT_MAT_NEAR(cropped_cv, checker, 1e-10); +} + +} // namespace +} // namespace opencv_test diff --git a/modules/cannops/test/test_cvtcolor.cpp b/modules/cannops/test/test_cvtcolor.cpp new file mode 100644 index 00000000000..70dc11f297f --- /dev/null +++ b/modules/cannops/test/test_cvtcolor.cpp @@ -0,0 +1,72 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" + +namespace opencv_test +{ +namespace +{ + +void cvtColorTest(int code, int cn, int dcn = 3, float diff = 0.0f) +{ + cv::cann::setDevice(DEVICE_ID); + Mat cpuRet, npuRet; + + Mat img8U = randomMat(512, 512, CV_MAKETYPE(CV_8U, cn), 0.0f, 255.0f); + Mat img16U = randomMat(512, 512, CV_MAKETYPE(CV_16U, cn), 0.0f, 65535.0f); + Mat img32F = randomMat(512, 512, CV_MAKETYPE(CV_32F, cn), 0.0f, 65535.0f); + + cv::cvtColor(img8U, cpuRet, code, dcn); + cv::cann::cvtColor(img8U, npuRet, code, dcn); + EXPECT_MAT_NEAR(cpuRet, npuRet, diff); + + cv::cvtColor(img16U, cpuRet, code, dcn); + cv::cann::cvtColor(img16U, npuRet, code, dcn); + EXPECT_MAT_NEAR(cpuRet, npuRet, diff); + + cv::cvtColor(img32F, cpuRet, code, dcn); + cv::cann::cvtColor(img32F, npuRet, code, dcn); + EXPECT_MAT_NEAR(cpuRet, npuRet, diff); + cv::cann::resetDevice(); +} + +TEST(CVT_COLOR, BGR2BGRA) { cvtColorTest(COLOR_BGR2BGRA, 3, 4); } +TEST(CVT_COLOR, BGRA2BGR) { cvtColorTest(COLOR_BGRA2BGR, 4); } +TEST(CVT_COLOR, BGR2RGBA) { cvtColorTest(COLOR_BGR2RGBA, 3, 4); } +TEST(CVT_COLOR, RGBA2BGR) { cvtColorTest(COLOR_RGBA2BGR, 4); } +TEST(CVT_COLOR, BGR2RGB) { cvtColorTest(COLOR_BGR2RGB, 3); } +TEST(CVT_COLOR, BGRA2RGBA) { cvtColorTest(COLOR_BGRA2RGBA, 4, 4); } + +// Due to parameter accuracy issues, the calculation results have certain accuracy differences. +TEST(CVT_COLOR, BGR2GRAY) { cvtColorTest(COLOR_BGR2GRAY, 3, 1, 10.0f); } +TEST(CVT_COLOR, RGB2GRAY) { cvtColorTest(COLOR_BGR2GRAY, 3, 1, 10.0f); } +TEST(CVT_COLOR, GRAY2BGR) { cvtColorTest(COLOR_GRAY2BGR, 1); } +TEST(CVT_COLOR, GRAY2BGRA) { cvtColorTest(COLOR_GRAY2BGRA, 1, 4); } +TEST(CVT_COLOR, BGRA2GRAY) { cvtColorTest(COLOR_BGRA2GRAY, 4, 1, 10.0f); } +TEST(CVT_COLOR, RGBA2GRAY) { cvtColorTest(COLOR_RGBA2GRAY, 4, 1, 10.0f); } + +TEST(CVT_COLOR, BGR2XYZ) { cvtColorTest(COLOR_BGR2XYZ, 3, 3, 50.0f); } +TEST(CVT_COLOR, RGB2XYZ) { cvtColorTest(COLOR_RGB2XYZ, 3, 3, 50.0f); } +TEST(CVT_COLOR, XYZ2BGR) { cvtColorTest(COLOR_XYZ2BGR, 3, 3, 150.0f); } +TEST(CVT_COLOR, XYZ2RGB) { cvtColorTest(COLOR_XYZ2RGB, 3, 3, 150.0f); } +TEST(CVT_COLOR, XYZ2BGR_DC4) { cvtColorTest(COLOR_XYZ2BGR, 3, 4, 150.0f); } +TEST(CVT_COLOR, XYZ2RGB_DC4) { cvtColorTest(COLOR_XYZ2RGB, 3, 4, 150.0f); } + +TEST(CVT_COLOR, BGR2YCrCb) { cvtColorTest(COLOR_BGR2YCrCb, 3, 3, 10.0f); } +TEST(CVT_COLOR, RGB2YCrCb) { cvtColorTest(COLOR_RGB2YCrCb, 3, 3, 10.0f); } +TEST(CVT_COLOR, YCrCb2BGR) { cvtColorTest(COLOR_YCrCb2BGR, 3, 3, 10.0f); } +TEST(CVT_COLOR, YCrCb2RGB) { cvtColorTest(COLOR_YCrCb2RGB, 3, 3, 10.0f); } +TEST(CVT_COLOR, YCrCb2BGR_DC4) { cvtColorTest(COLOR_YCrCb2BGR, 3, 4, 10.0f); } +TEST(CVT_COLOR, YCrCb2RGB_DC4) { cvtColorTest(COLOR_YCrCb2RGB, 3, 4, 10.0f); } + +TEST(CVT_COLOR, BGR2YUV) { cvtColorTest(COLOR_BGR2YUV, 3, 3, 10.0f); } +TEST(CVT_COLOR, RGB2YUV) { cvtColorTest(COLOR_RGB2YUV, 3, 3, 10.0f); } +TEST(CVT_COLOR, YUV2BGR) { cvtColorTest(COLOR_YUV2BGR, 3, 3, 10.0f); } +TEST(CVT_COLOR, YUV2RGB) { cvtColorTest(COLOR_YUV2RGB, 3, 3, 10.0f); } +TEST(CVT_COLOR, YUV2BGR_DC4) { cvtColorTest(COLOR_YUV2BGR, 3, 4, 10.0f); } +TEST(CVT_COLOR, YUV2RGB_DC4) { cvtColorTest(COLOR_YUV2RGB, 3, 4, 10.0f); } + +} // namespace +} // namespace opencv_test \ No newline at end of file diff --git a/modules/cannops/test/test_element_operations.cpp b/modules/cannops/test/test_element_operations.cpp new file mode 100644 index 00000000000..4364a1ebe23 --- /dev/null +++ b/modules/cannops/test/test_element_operations.cpp @@ -0,0 +1,259 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" +#include + +namespace opencv_test +{ +namespace +{ +template +void testMatOpMat(FCV cvFunc, FCANN cannFunc, PARAMS... param) +{ + cv::cann::setDevice(DEVICE_ID); + Mat mat1 = randomMat(10, 10, CV_32SC3); + Mat mat2 = randomMat(10, 10, CV_32SC3); + Mat cpuDst, check; + + cvFunc(mat1, mat2, cpuDst, param...); + cannFunc(mat1, mat2, check, param..., AscendStream::Null()); + EXPECT_MAT_NEAR(cpuDst, check, 0.0); + + AscendStream stream; + cannFunc(mat1, mat2, check, param..., stream); + stream.waitForCompletion(); + EXPECT_MAT_NEAR(cpuDst, check, 0.0); + + cv::cann::resetDevice(); +} + +TEST(ELEMENTWISE_OP, MAT_ADD_MAT) { testMatOpMat(cv::add, cv::cann::add, noArray(), -1); } + +TEST(ELEMENTWISE_OP, MAT_SUB_MAT) { testMatOpMat(cv::subtract, cv::cann::subtract, noArray(), -1); } + +TEST(ELEMENTWISE_OP, MAT_MUL_MAT) { testMatOpMat(cv::multiply, cv::cann::multiply, 1, -1); } + +/* + * TODO cv::divide will round each element by cvRound while Ascend DIV op will floor each element. + * In order to pass the testcase, using interger for all matrix and scalar, fixme after Ascend + * support round element. + */ +/* +TEST(ELEMENTWISE_OP, MAT_DIV_MAT) +{ + + testMatOpMat([](const cv::Mat& src1, const cv::Mat& src2, cv::Mat& dst, double scale, int dtype) + { cv::divide(src1, src2, dst, scale, dtype); }, + cv::cann::divide, 1, -1); +} +*/ + +TEST(ELEMENTWISE_OP, MAT_BITWISE_AND_MAT) +{ + testMatOpMat(cv::bitwise_and, cv::cann::bitwise_and, noArray()); +} + +TEST(ELEMENTWISE_OP, MAT_BITWISE_OR_MAT) +{ + testMatOpMat(cv::bitwise_or, cv::cann::bitwise_or, noArray()); +} + +TEST(ELEMENTWISE_OP, MAT_BITWISE_XOR_MAT) +{ + testMatOpMat(cv::bitwise_xor, cv::cann::bitwise_xor, noArray()); +} + +TEST(ELEMENTWISE_OP, MAT_ADD_MAT_WITH_MASK_AND_DTYPE) +{ + testMatOpMat(cv::add, cv::cann::add, genMask(), CV_32SC3); +} + +TEST(ELEMENTWISE_OP, MAT_SUB_MAT_WITH_MASK_AND_DTYPE) +{ + testMatOpMat(cv::subtract, cv::cann::subtract, genMask(), CV_32SC3); +} + +TEST(ELEMENTWISE_OP, MAT_BITWISE_AND_MAT_WITH_MASK) +{ + testMatOpMat(cv::bitwise_and, cv::cann::bitwise_and, genMask()); +} + +TEST(ELEMENTWISE_OP, MAT_BITWISE_OR_MAT_WITH_MASK) +{ + testMatOpMat(cv::bitwise_or, cv::cann::bitwise_or, genMask()); +} + +TEST(ELEMENTWISE_OP, MAT_BITWISE_XOR_MAT_WITH_MASK) +{ + testMatOpMat(cv::bitwise_xor, cv::cann::bitwise_xor, genMask()); +} + +/* Ascend Mul will case scale to interger first if matrix dtype is interger. + * Result is not match, fixme after Ascend Op updated. + */ +float randomScale = randomInterger(); +TEST(ELEMENTWISE_OP, MAT_MUL_MAT_WITH_SCALE) +{ + testMatOpMat(cv::multiply, cv::cann::multiply, randomScale, -1); +} + +/* +TEST(ELEMENTWISE_OP, MAT_DIV_MAT_WITH_SCALE) +{ + testMatOpMat([](const cv::Mat& src1, const cv::Mat& src2, cv::Mat& dst, double scale, int dtype) + { cv::divide(src1, src2, dst, scale, dtype); }, + cv::cann::divide, randomScale, -1); +} +*/ + +template +void testMatOpScalar(FCV cvFunc, FCANN cannFunc, PARAMS... param) +{ + Scalar scalar = randomScalar(); + Mat mat(10, 10, CV_32SC3, randomScalar()); + Mat cpuDst1, cpuDst2, checker1, checker2; + + cvFunc(Mat(10, 10, CV_32SC3, scalar), mat, cpuDst1, param...); + cvFunc(mat, Mat(10, 10, CV_32SC3, scalar), cpuDst2, param...); + cv::cann::setDevice(DEVICE_ID); + + cannFunc(scalar, mat, checker1, param..., AscendStream::Null()); + cannFunc(mat, scalar, checker2, param..., AscendStream::Null()); + EXPECT_MAT_NEAR(cpuDst1, checker1, 0.0); + EXPECT_MAT_NEAR(cpuDst2, checker2, 0.0); + + AscendStream stream; + cannFunc(scalar, mat, checker1, param..., stream); + cannFunc(mat, scalar, checker2, param..., stream); + stream.waitForCompletion(); + EXPECT_MAT_NEAR(cpuDst1, checker1, 0.0); + EXPECT_MAT_NEAR(cpuDst2, checker2, 0.0); + + cv::cann::resetDevice(); +} + +TEST(ELEMENTWISE_OP, MAT_ADD_SCALAR) { testMatOpScalar(cv::add, cv::cann::add, noArray(), -1); } + +TEST(ELEMENTWISE_OP, MAT_SUB_SCALAR) +{ + testMatOpScalar(cv::subtract, cv::cann::subtract, noArray(), -1); +} + +TEST(ELEMENTWISE_OP, MAT_MUL_SCALAR) { testMatOpScalar(cv::multiply, cv::cann::multiply, 1, -1); } + +/* +TEST(ELEMENTWISE_OP, MAT_DIV_SCALAR) +{ + testMatOpScalar([](const cv::Mat& src1, const cv::Mat& src2, cv::Mat& dst, double scale, + int dtype) { cv::divide(src1, src2, dst, scale, dtype); }, + cv::cann::divide, 1, -1); +} +*/ + +TEST(ELEMENTWISE_OP, MAT_BITWISE_AND_SCALAR) +{ + testMatOpScalar(cv::bitwise_and, cv::cann::bitwise_and, noArray()); +} + +TEST(ELEMENTWISE_OP, MAT_BITWISE_OR_SCALAR) +{ + testMatOpScalar(cv::bitwise_or, cv::cann::bitwise_or, noArray()); +} + +TEST(ELEMENTWISE_OP, MAT_BITWISE_XOR_SCALAR) +{ + testMatOpScalar(cv::bitwise_xor, cv::cann::bitwise_xor, noArray()); +} + +TEST(ELEMENTWISE_OP, MAT_ADD_SCALAR_WITH_MASK_AND_DETYPE) +{ + testMatOpScalar(cv::add, cv::cann::add, genMask(), CV_32SC3); +} + +TEST(ELEMENTWISE_OP, MAT_SUB_SCALAR_WITH_MASK_AND_DETYPE) +{ + testMatOpScalar(cv::subtract, cv::cann::subtract, genMask(), CV_32SC3); +} + +TEST(ELEMENTWISE_OP, MAT_BITWISE_AND_SCALAR_WITH_MASK) +{ + testMatOpScalar(cv::bitwise_and, cv::cann::bitwise_and, genMask()); +} + +TEST(ELEMENTWISE_OP, MAT_BITWISE_OR_SCALAR_WITH_MASK) +{ + testMatOpScalar(cv::bitwise_or, cv::cann::bitwise_or, genMask()); +} + +TEST(ELEMENTWISE_OP, MAT_BITWISE_XOR_SCALAR_WITH_MASK) +{ + testMatOpScalar(cv::bitwise_xor, cv::cann::bitwise_xor, genMask()); +} + +TEST(ELEMENTWISE_OP, MAT_MUL_SCALAR_WITH_SCALE) +{ + testMatOpScalar(cv::multiply, cv::cann::multiply, randomScale, -1); +} + +/* +TEST(ELEMENTWISE_OP, MAT_DIV_SCALAR_WITH_SCALE) +{ + testMatOpScalar([](const cv::Mat& src1, const cv::Mat& src2, cv::Mat& dst, double scale, + int dtype) { cv::divide(src1, src2, dst, scale, dtype); }, + cv::cann::divide, randomScale, -1); +} +*/ + +TEST(ELEMENTWISE_OP, MAT_BITWISE_NOT_1) +{ + Mat cpuOpRet, checker, cpuMat = randomMat(10, 10, CV_32SC3); + + cv::cann::setDevice(DEVICE_ID); + + cv::bitwise_not(cpuMat, cpuOpRet); + cv::cann::bitwise_not(cpuMat, checker); + EXPECT_MAT_NEAR(cpuOpRet, checker, 0.0); + + cv::cann::resetDevice(); +} + +// TODO random test matrix +TEST(ELEMENTWISE_OP, MAT_ADD_WEIGHTED_1) +{ + Mat cpuOpRet, checker, cpuMat1 = Mat::ones(5, 5, CV_32S), cpuMat2 = Mat::ones(5, 5, CV_32S); + + cv::cann::setDevice(DEVICE_ID); + + cv::addWeighted(cpuMat1, 2, cpuMat2, 3, 5, cpuOpRet); + cv::cann::addWeighted(cpuMat1, 2, cpuMat2, 3, 5, checker); + EXPECT_MAT_NEAR(cpuOpRet, checker, 0.0); + + cv::cann::resetDevice(); +} + +TEST(ELEMENTWISE_OP, MAT_THRESHOLD_1) +{ + Mat cpuOpRet, checker, cpuMat = randomMat(10, 10, CV_16SC3, 0.0, 255.0); + + NpuMat npuMat, npuMat16F, aclOpRet, aclOpRet16S; + cv::cann::setDevice(DEVICE_ID); + npuMat.upload(cpuMat); + npuMat.convertTo(npuMat16F, CV_16F); + + for (int i = 0; i <= 4; i++) + { + cv::threshold(cpuMat, cpuOpRet, 128, 250, i); + cv::cann::threshold(npuMat16F, aclOpRet, 128, 250, i); + aclOpRet.convertTo(aclOpRet16S, CV_16S); + aclOpRet16S.download(checker); + + EXPECT_MAT_NEAR(cpuOpRet, checker, 1e-10); + } + + cv::cann::resetDevice(); +} + +} // namespace +} // namespace opencv_test diff --git a/modules/cannarithm/test/test_main.cpp b/modules/cannops/test/test_main.cpp similarity index 93% rename from modules/cannarithm/test/test_main.cpp rename to modules/cannops/test/test_main.cpp index 14bd66005ec..202c6af27ee 100644 --- a/modules/cannarithm/test/test_main.cpp +++ b/modules/cannops/test/test_main.cpp @@ -18,4 +18,4 @@ static void initTests() ::testing::AddGlobalTestEnvironment(cannEnv); } -CV_TEST_MAIN("cannarithm", initTests()); +CV_TEST_MAIN("cannops", initTests()); diff --git a/modules/cannops/test/test_npumat.cpp b/modules/cannops/test/test_npumat.cpp new file mode 100644 index 00000000000..7e40afda184 --- /dev/null +++ b/modules/cannops/test/test_npumat.cpp @@ -0,0 +1,146 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" + +namespace opencv_test +{ +namespace +{ + +class DummyAllocator : public NpuMat::Allocator +{ +public: + std::shared_ptr allocate(size_t size) CV_OVERRIDE + { + CV_UNUSED(size); + return std::shared_ptr(); + } + bool allocate(cv::cann::NpuMat* mat, int rows, int cols, size_t elemSize) CV_OVERRIDE + { + CV_UNUSED(rows); + CV_UNUSED(cols); + CV_UNUSED(elemSize); + mat->data = std::shared_ptr((uchar*)0x12345, [](void* ptr) { CV_UNUSED(ptr); }); + return true; + } +}; + +TEST(NpuMat, Construct) +{ + cv::cann::setDevice(0); + // 1 Default constructor. + NpuMat defaultNpuMat; + NpuMat::Allocator* defaultAllocator = NpuMat::defaultAllocator(); + ASSERT_EQ(defaultNpuMat.allocator, defaultAllocator); + + // 2 get & set allocator. + DummyAllocator dummyAllocator; + NpuMat::setDefaultAllocator(&dummyAllocator); + ASSERT_EQ(defaultNpuMat.defaultAllocator(), &dummyAllocator); + NpuMat::setDefaultAllocator(defaultAllocator); + + // 3 constructs NpuMat of the specified size and type + NpuMat specifiedSizeNpuMat1(5, 6, CV_8UC3); + NpuMat specifiedSizeNpuMat2(Size(300, 200), CV_64F); + + ASSERT_EQ(specifiedSizeNpuMat1.rows, 5); + ASSERT_EQ(specifiedSizeNpuMat1.cols, 6); + ASSERT_EQ(specifiedSizeNpuMat1.depth(), CV_8U); + ASSERT_EQ(specifiedSizeNpuMat1.channels(), 3); + + ASSERT_EQ(specifiedSizeNpuMat2.cols, 300); + ASSERT_EQ(specifiedSizeNpuMat2.rows, 200); + ASSERT_EQ(specifiedSizeNpuMat2.depth(), CV_64F); + ASSERT_EQ(specifiedSizeNpuMat2.channels(), 1); + + // 4 constructs NpuMat and fills it with the specified value s + srand((unsigned int)(time(NULL))); + Scalar sc(rand() % 256, rand() % 256, rand() % 256, rand() % 256); + + Mat scalarToMat(7, 8, CV_8UC3, sc); + NpuMat scalarToNpuMat1(7, 8, CV_8UC3, sc); + Mat scalarToMatChecker; + scalarToNpuMat1.download(scalarToMatChecker); + + EXPECT_MAT_NEAR(scalarToMat, scalarToMatChecker, 0.0); + + NpuMat scalarToNpuMat2(Size(123, 345), CV_32S); + + ASSERT_EQ(scalarToNpuMat1.rows, 7); + ASSERT_EQ(scalarToNpuMat1.cols, 8); + ASSERT_EQ(scalarToNpuMat1.depth(), CV_8U); + ASSERT_EQ(scalarToNpuMat1.channels(), 3); + + ASSERT_EQ(scalarToNpuMat2.cols, 123); + ASSERT_EQ(scalarToNpuMat2.rows, 345); + ASSERT_EQ(scalarToNpuMat2.depth(), CV_32S); + ASSERT_EQ(scalarToNpuMat2.channels(), 1); + + // 6 builds NpuMat from host memory + Scalar sc2(rand() % 256, rand() % 256, rand() % 256, rand() % 256); + Mat randomMat(7, 8, CV_8UC3, sc2); + InputArray arr = randomMat; + + NpuMat fromInputArray(arr, AscendStream::Null()); + Mat randomMatChecker; + fromInputArray.download(randomMatChecker); + EXPECT_MAT_NEAR(randomMat, randomMatChecker, 0.0); + + cv::cann::resetDevice(); +} + +TEST(NpuMat, Assignment) +{ + DummyAllocator dummyAllocator; + NpuMat mat1; + NpuMat mat2(3, 4, CV_8SC1, &dummyAllocator); + mat1 = mat2; + + ASSERT_EQ(mat1.rows, 3); + ASSERT_EQ(mat1.cols, 4); + ASSERT_EQ(mat1.depth(), CV_8S); + ASSERT_EQ(mat1.channels(), 1); + ASSERT_EQ(mat1.data.get(), (uchar*)0x12345); +} + +TEST(NpuMat, SetTo) +{ + cv::cann::setDevice(0); + + srand((unsigned int)(time(NULL))); + Scalar sc(rand() % 256, rand() % 256, rand() % 256, rand() % 256); + + NpuMat npuMat(2, 2, CV_8UC4); + npuMat.setTo(sc); + Mat mat(2, 2, CV_8UC4, sc); + Mat checker; + npuMat.download(checker); + + EXPECT_MAT_NEAR(mat, checker, 0.0); + + cv::cann::resetDevice(); +} + +TEST(NpuMat, ConvertTo) +{ + cv::cann::setDevice(0); + + srand((unsigned int)(time(NULL))); + Scalar sc(rand() % 256, rand() % 256, rand() % 256, rand() % 256); + + NpuMat npuMat(2, 2, CV_8UC4, sc); + NpuMat convertedNpuMat; + npuMat.convertTo(convertedNpuMat, CV_16S); + Mat mat(2, 2, CV_16SC4, sc); + Mat checker; + convertedNpuMat.download(checker); + + EXPECT_MAT_NEAR(mat, checker, 0.0); + + cv::cann::resetDevice(); +} + +} // namespace +} // namespace opencv_test diff --git a/modules/cannarithm/test/test_precomp.hpp b/modules/cannops/test/test_precomp.hpp similarity index 64% rename from modules/cannarithm/test/test_precomp.hpp rename to modules/cannops/test/test_precomp.hpp index e95abb86e1c..439ad70af0d 100644 --- a/modules/cannarithm/test/test_precomp.hpp +++ b/modules/cannops/test/test_precomp.hpp @@ -7,10 +7,21 @@ #include "opencv2/ts.hpp" #include "opencv2/cann.hpp" +#include "opencv2/ts/cuda_test.hpp" +#include "opencv2/cann_interface.hpp" +using namespace cv; using namespace cv::cann; #undef EXPECT_MAT_NEAR #define EXPECT_MAT_NEAR(m1, m2, eps) EXPECT_PRED_FORMAT3(cvtest::assertMatNear, m1, m2, eps) #define ASSERT_MAT_NEAR(m1, m2, eps) ASSERT_PRED_FORMAT3(cvtest::assertMatNear, m1, m2, eps) -#endif +#define DEVICE_ID 0 + +Mat randomMat(int w, int h, int dtype, float min = 1.0f, float max = 10.0f); +Scalar randomScalar(); +float randomNum(); +int randomInterger(); +Mat genMask(); + +#endif //__OPENCV_TEST_PRECOMP_HPP__ diff --git a/modules/cannops/test/test_utils.cpp b/modules/cannops/test/test_utils.cpp new file mode 100644 index 00000000000..2141ebf906a --- /dev/null +++ b/modules/cannops/test/test_utils.cpp @@ -0,0 +1,41 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" + +// Random Generator +Mat randomMat(int w, int h, int dtype, float min, float max) +{ + Mat rnMat(w, h, dtype); + RNG rng(getTickCount()); + rng.fill(rnMat, RNG::UNIFORM, min, max); + return rnMat; +} +Scalar randomScalar() +{ + RNG rng(getTickCount()); + Scalar sc; + rng.fill(sc, RNG::UNIFORM, 1.0, 5.0); + return sc; +} +float randomNum() +{ + RNG rng(getTickCount()); + float rdnNum = float(rng.uniform(1.0, 5.0)); + return rdnNum; +} + +int randomInterger() +{ + RNG rng(getTickCount()); + float rdnNum = float(rng.uniform(1, 5)); + return rdnNum; +} + +Mat genMask() +{ + Mat mask = Mat::zeros(Size(10, 10), CV_8UC1); + rectangle(mask, cv::Rect(5, 5, 3, 3), Scalar(255), -1); + return mask; +}