diff --git a/modules/cannarithm/CMakeLists.txt b/modules/cannarithm/CMakeLists.txt new file mode 100644 index 00000000000..55bcc028510 --- /dev/null +++ b/modules/cannarithm/CMakeLists.txt @@ -0,0 +1,16 @@ + if(IOS OR WINRT OR ANDROID OR APPLE OR WIN32 OR (NOT HAVE_CANN)) + ocv_module_disable(cannarithm) + endif() + +set(the_description "Ascend-accelerated Operations on Matrices") + +ocv_add_module(cannarithm opencv_core WRAP python) +ocv_module_include_directories(${CANN_INCLUDE_DIRS}) +ocv_glob_module_sources() +ocv_install_used_external_targets(${CANN_LIBRARIES}) +ocv_create_module(${CANN_LIBRARIES}) + +ocv_include_directories(${CMAKE_SOURCE_DIR}/modules/ts/include) + +ocv_add_accuracy_tests(DEPENDS_ON opencv_cannarithm) +ocv_add_perf_tests(DEPENDS_ON opencv_cannarithm) diff --git a/modules/cannarithm/include/opencv2/acl_stream_accessor.hpp b/modules/cannarithm/include/opencv2/acl_stream_accessor.hpp new file mode 100644 index 00000000000..27118d807e3 --- /dev/null +++ b/modules/cannarithm/include/opencv2/acl_stream_accessor.hpp @@ -0,0 +1,40 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CANN_STREAM_ACCESSOR_HPP +#define OPENCV_CANN_STREAM_ACCESSOR_HPP + +#include +#include "opencv2/cann.hpp" + +namespace cv +{ +namespace cann +{ + +//! @addtogroup cann_struct +//! @{ + +/** @brief Class that enables getting aclrtAclStream from cann::AclStream + */ +struct AclStreamAccessor +{ + CV_EXPORTS static aclrtStream getStream(const AclStream& stream); + CV_EXPORTS static AclStream wrapStream(aclrtStream stream); +}; + +/** @brief Class that enables getting aclrtAclEvent from cann::AclEvent + */ +struct AclEventAccessor +{ + CV_EXPORTS static aclrtEvent getEvent(const AclEvent& event); + CV_EXPORTS static AclEvent wrapEvent(aclrtEvent event); +}; + +//! @} cann_struct + +} // namespace cann +} // namespace cv + +#endif // OPENCV_CANN_STREAM_ACCESSOR_HPP diff --git a/modules/cannarithm/include/opencv2/cann.hpp b/modules/cannarithm/include/opencv2/cann.hpp new file mode 100644 index 00000000000..6b79f045c0e --- /dev/null +++ b/modules/cannarithm/include/opencv2/cann.hpp @@ -0,0 +1,335 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CANN_HPP +#define OPENCV_CANN_HPP + +#include "opencv2/core.hpp" + +/** + @defgroup cann Ascend-accelerated Computer Vision + @{ + @defgroup canncore Core part + @{ + @defgroup cann_struct Data Structures + @defgroup cann_init Initializeation and Information + @} + @} + */ + +namespace cv +{ +namespace cann +{ +class AclStream; + +//! @addtogroup cann_struct +//! @{ + +//=================================================================================== +// AclMat +//=================================================================================== + +/** @brief Base storage class for NPU memory with reference counting. + * AclMat class has a similar interface with Mat and AclMat, and work on [Ascend + * NPU](https://www.hiascend.com/) backend. + * @sa Mat cuda::GpuMat + */ + +class CV_EXPORTS_W AclMat +{ +public: + class CV_EXPORTS_W Allocator + { + public: + virtual ~Allocator() {} + + // allocator must fill data, step and refcount fields + virtual bool allocate(AclMat* mat, int rows, int cols, size_t elemSize) = 0; + virtual void free(AclMat* mat) = 0; + }; + + /** + * @brief Create default allocator for AclMat. This allocator alloc memory from device for + * specific size. + */ + CV_WRAP static AclMat::Allocator* defaultAllocator(); + + /** + * @brief Set allocator for AclMat. + * @param allocator + */ + CV_WRAP static void setDefaultAllocator(AclMat::Allocator* allocator); + + //! default constructor + CV_WRAP explicit AclMat(AclMat::Allocator* allocator_ = AclMat::defaultAllocator()); + + //! constructs AclMat of the specified size and type + CV_WRAP AclMat(int rows, int cols, int type, + AclMat::Allocator* allocator = AclMat::defaultAllocator()); + //! constructs AclMat of the specified size and type + CV_WRAP AclMat(Size size, int type, AclMat::Allocator* allocator = AclMat::defaultAllocator()); + + //! constructs AclMat and fills it with the specified value s + CV_WRAP AclMat(int rows, int cols, int type, Scalar& s, + AclMat::Allocator* allocator = AclMat::defaultAllocator()); + //! constructs AclMat and fills it with the specified value s + CV_WRAP AclMat(Size size, int type, Scalar& s, + AclMat::Allocator* allocator = AclMat::defaultAllocator()); + + //! copy constructor + CV_WRAP AclMat(const AclMat& m); + + //! constructor for AclMat headers pointing to user-allocated data + AclMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP); + //! constructor for AclMat headers pointing to user-allocated data + AclMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP); + + //! builds AclMat from host memory (Blocking call) + CV_WRAP explicit AclMat(InputArray arr, + AclMat::Allocator* allocator = AclMat::defaultAllocator()); + + //! assignment operators + AclMat& operator=(const AclMat& m); + + //! destructor - calls release() + ~AclMat(); + + //! sets some of the AclMat elements to s (Blocking call) + CV_WRAP AclMat& setTo(Scalar s); + //! sets some of the AclMat elements to s (Non-Blocking call) + CV_WRAP AclMat& setTo(Scalar s, AclStream& stream); + + //! swaps with other smart pointer + CV_WRAP void swap(AclMat& mat); + + //! allocates new AclMat data unless the AclMat already has specified size and type + CV_WRAP void create(int rows, int cols, int type); + + //! upload host memory data to AclMat (Blocking call) + CV_WRAP void upload(InputArray arr); + //! upload host memory data to AclMat (Non-Blocking call) + CV_WRAP void upload(InputArray arr, AclStream& stream); + + //! download data from AclMat to host (Blocking call) + CV_WRAP void download(OutputArray dst) const; + //! download data from AclMat to host (Non-Blocking call) + CV_WRAP void download(OutputArray dst, AclStream& stream) const; + + //! converts AclMat to another datatype (Blocking call) + CV_WRAP void convertTo(CV_OUT AclMat& dst, int rtype) const; + + //! converts AclMat to another datatype (Non-Blocking call) + CV_WRAP void convertTo(CV_OUT AclMat& dst, int rtype, AclStream& stream) const; + + //! decreases reference counter, deallocate the data when reference counter reaches 0 + CV_WRAP void release(); + + //! returns element size in bytes + CV_WRAP size_t elemSize() const; + + //! returns the size of element channel in bytes + CV_WRAP size_t elemSize1() const; + + //! returns element type + CV_WRAP int type() const; + + //! returns element type + CV_WRAP int depth() const; + + //! returns number of channels + CV_WRAP int channels() const; + + //! returns step/elemSize1() + CV_WRAP size_t step1() const; + + //! returns AclMat size : width == number of columns, height == number of rows + CV_WRAP Size size() const; + + //! returns true if AclMat data is NULL + CV_WRAP bool empty() const; + + //! internal use method: updates the continuity flag + CV_WRAP void updateContinuityFlag(); + + //! expand one channel mat to multi-channels (Blocking call) + //! @note, source mat must only have one channel, copy value to all channels. + CV_WRAP void expandTo(CV_OUT AclMat& dst, int channels) const; + + //! expand one channel mat to multi-channels (Non-Blocking call) + //! @note, source mat must only have one channel, copy value to all channels. + CV_WRAP void expandTo(CV_OUT AclMat& dst, int channels, AclStream& stream) const; + + /*! includes several bit-fields: + - the magic signature + - continuity flag + - depth + - number of channels + */ + int flags; + + //! the number of rows and columns + int rows, cols; + + //! a distance between successive rows in bytes; includes the gap if any + CV_PROP size_t step; + + //! pointer to the data + uchar* data; + + //! pointer to the reference counter; + //! when AclMat points to user-allocated data, the pointer is NULL + int* refcount; + + //! helper fields used in locateROI and adjustROI + uchar* datastart; + const uchar* dataend; + + //! allocator + Allocator* allocator; +}; + +class AclStream; +class AclStreamAccessor; +class AclEvent; +class AclEventAccessor; +class DefaultDeviceInitializer; + +//=================================================================================== +// AclStream +//=================================================================================== + +/** @brief In AscendCL Stream(AclStream) is a task queue. Stream is used to manage the parallelism + * of tasks. The tasks inside a Stream are executed sequentially, that is, the Stream executes + * sequentially according to the sent tasks; the tasks in different Streams are executed in + * parallel. + * + * All Non-blocking functions should pass parameter stream, These function returns immediately after + * the task is submitted. Caller should wait stream until completion. + * + * Blocking functions implicityly use the default stream, and synchronize stream before function + * return. + * @sa cuda::Stream + */ + +// TODO: Stream is defined in namespace cuda, and pybind code does not use a namespace of stream, +// change stream name to AclStream to avoid confilct. +class CV_EXPORTS_W AclStream +{ +public: + CV_WRAP AclStream(); + + //! blocks the current CPU thread until all operations in the stream are complete. + CV_WRAP void waitForCompletion(); + + //! blocks the current CPU thread until event trigger. + CV_WRAP void waitAclEvent(const cv::cann::AclEvent& event); + + /** + * @brief return default AclStream object for default Acl stream. + */ + CV_WRAP static AclStream& Null(); + + // acl symbols CANNOT used in any hpp files. Use a inner class to avoid acl symbols defined in + // hpp. + class Impl; + + // add temporary mat for async release. + void addToAsyncRelease(const AclMat& mat); + +private: + Ptr impl_; + AclStream(const Ptr& impl); + + friend class AclStreamAccessor; + friend class DefaultDeviceInitializer; +}; + +/** + * @brief AclEvent to synchronize between different streams. + */ +class CV_EXPORTS_W AclEvent +{ +public: + CV_WRAP AclEvent(); + + //! records an event + CV_WRAP void record(AclStream& stream = AclStream::Null()); + + //! waits for an event to complete + CV_WRAP void waitForComplete() const; + + class Impl; + +private: + Ptr impl_; + AclEvent(const Ptr& impl); + + friend class AclEventAccessor; +}; + +/** @brief Bindings overload to create a Stream object from the address stored in an existing CANN + * Runtime API stream pointer (aclrtStream). + * @param aclStreamAddress Memory address stored in a CANN Runtime API stream pointer + * (aclrtStream). The created Stream object does not perform any allocation or deallocation and simply + * wraps existing raw CANN Runtime API stream pointer. + * @note Overload for generation of bindings only, not exported or intended for use internally fro C++. + */ +CV_EXPORTS_W AclStream wrapStream(size_t aclStreamAddress); + +//! @} cann_struct + +//=================================================================================== +// Initialization & Info +//=================================================================================== + +//! @addtogroup cann_init +//! @{ + +//! Get Ascend matrix object from Input array, upload matrix memory if need. (Blocking call) +AclMat getInputMat(InputArray src); +//! Get Ascend matrix object from Input array, upload matrix memory if need. (Non-Blocking call) +AclMat getInputMat(InputArray src, AclStream& stream); + +//! Get Ascend matrix object from Output array, upload matrix memory if need. +AclMat getOutputMat(OutputArray dst, int rows, int cols, int type); + +//! Sync output matrix to Output array, download matrix memory if need. +void syncOutput(const AclMat& dst, OutputArray _dst); + +/** + * @brief Choose Ascend npu device. + */ +CV_EXPORTS_W void setDevice(int device); + +/** + * @brief Clear all context created in current Ascend device. + */ +CV_EXPORTS_W void resetDevice(); + +/** + * @brief Get current Ascend device. + */ +CV_EXPORTS_W int32_t getDevice(); + +/** + * @brief init AscendCL. + */ +CV_EXPORTS_W void initAcl(); + +/** + * @brief finalize AscendCL. + * @note finalizeAcl only can be called once for a process. Call this function after all AscendCL + * options finished. + */ +CV_EXPORTS_W void finalizeAcl(); + +//! @} cann_init + +} // namespace cann +} // namespace cv + +#include "opencv2/cann.inl.hpp" + +#endif /* OPENCV_CANN_HPP */ diff --git a/modules/cannarithm/include/opencv2/cann.inl.hpp b/modules/cannarithm/include/opencv2/cann.inl.hpp new file mode 100644 index 00000000000..0c85e8dcc7a --- /dev/null +++ b/modules/cannarithm/include/opencv2/cann.inl.hpp @@ -0,0 +1,111 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CANNINL_HPP +#define OPENCV_CANNINL_HPP + +#include "opencv2/cann.hpp" + +namespace cv +{ +namespace cann +{ +inline AclMat::AclMat(AclMat::Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), + allocator(allocator_) +{ +} + +inline AclMat::AclMat(int rows_, int cols_, int type_, AclMat::Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), + allocator(allocator_) +{ + if (rows_ > 0 && cols_ > 0) + create(rows_, cols_, type_); +} + +inline AclMat::AclMat(Size size_, int type_, AclMat::Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), + allocator(allocator_) +{ + if (size_.height > 0 && size_.width > 0) + create(size_.height, size_.width, type_); +} + +inline AclMat::AclMat(InputArray arr, AclMat::Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), + allocator(allocator_) +{ + upload(arr); +} + +inline AclMat::AclMat(const AclMat& m) + : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), + datastart(m.datastart), dataend(m.dataend), allocator(m.allocator) +{ + if (refcount) + CV_XADD(refcount, 1); +} + +inline AclMat::~AclMat() { release(); } + +inline AclMat& AclMat::operator=(const AclMat& m) +{ + if (this != &m) + { + AclMat temp(m); + swap(temp); + } + + return *this; +} + +inline void AclMat::swap(AclMat& b) +{ + std::swap(flags, b.flags); + std::swap(rows, b.rows); + std::swap(cols, b.cols); + std::swap(step, b.step); + std::swap(data, b.data); + std::swap(datastart, b.datastart); + std::swap(dataend, b.dataend); + std::swap(refcount, b.refcount); + std::swap(allocator, b.allocator); +} + +inline void AclMat::release() +{ + CV_DbgAssert(allocator != 0); + + if (refcount && CV_XADD(refcount, -1) == 1) + allocator->free(this); + + dataend = data = datastart = 0; + step = rows = cols = 0; + refcount = 0; +} + +inline size_t AclMat::elemSize() const { return CV_ELEM_SIZE(flags); } + +inline size_t AclMat::elemSize1() const { return CV_ELEM_SIZE1(flags); } + +inline int AclMat::type() const { return CV_MAT_TYPE(flags); } + +inline int AclMat::depth() const { return CV_MAT_DEPTH(flags); } + +inline int AclMat::channels() const { return CV_MAT_CN(flags); } + +inline size_t AclMat::step1() const { return step / elemSize1(); } + +inline Size AclMat::size() const { return Size(cols, rows); } + +inline bool AclMat::empty() const { return data == 0; } + +inline AclStream::AclStream(const Ptr& impl) : impl_(impl) {} + +inline AclEvent::AclEvent(const Ptr& impl) : impl_(impl) {} +} // namespace cann +} // namespace cv + +#endif // OPENCV_CANNINL_HPP diff --git a/modules/cannarithm/include/opencv2/cann_arithm.hpp b/modules/cannarithm/include/opencv2/cann_arithm.hpp new file mode 100644 index 00000000000..9a0f3f1655f --- /dev/null +++ b/modules/cannarithm/include/opencv2/cann_arithm.hpp @@ -0,0 +1,176 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CANNARITHM_HPP +#define OPENCV_CANNARITHM_HPP + +#include "opencv2/cann.hpp" + +namespace cv +{ +namespace cann +{ + +/** + @addtogroup cann + @{ + @defgroup cannarithm Operations on Matrices + @{ + @defgroup cannarithm_elem Per-element Operations + @} + @} + */ + +//! @addtogroup cannarithm_elem +//! @{ + +/** @brief Computes a matrix-matrix or matrix-scalar sum. + * @param src1 First source matrix or scalar. + * @param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . + * @param dst Destination matrix that has the same size and number of channels as the input + * array(s). The depth is defined by dtype or src1 depth. + * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the + * destination array to be changed. The mask can be used only with single channel images. + * @param dtype Optional depth of the output array. + * @param stream AclStream for the asynchronous version. + * @sa cv::add cuda::add + */ +CV_EXPORTS_W void add(InputArray src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), int dtype = -1, + AclStream& stream = AclStream::Null()); +// This code should not be compiled nor analyzed by doxygen. This interface only for python binding +// code generation. add(InputArray, InputArray ...) can accept Scalar as its parametr.(Scalar -> Mat +// -> InputArray) +#ifdef NEVER_DEFINED +CV_EXPORTS_W void add(InputArray src1, Scalar src2, OutputArray dst, InputArray mask = noArray(), + int dtype = -1, AclStream& stream = AclStream::Null()); +CV_EXPORTS_W void add(Scalar src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), + int dtype = -1, AclStream& stream = AclStream::Null()); +#endif + +/** @brief Computes a matrix-matrix or matrix-scalar difference. + * @param src1 First source matrix or scalar. + * @param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . + * @param dst Destination matrix that has the same size and number of channels as the input + * array(s). The depth is defined by dtype or src1 depth. + * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the + * destination array to be changed. The mask can be used only with single channel images. + * @param dtype Optional depth of the output array. + * @param stream AclStream for the asynchronous version. + * @sa cv::subtract cuda::subtract + */ +CV_EXPORTS_W void subtract(InputArray src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), int dtype = -1, + AclStream& stream = AclStream::Null()); +#ifdef NEVER_DEFINED +CV_EXPORTS_W void subtract(InputArray src1, Scalar src2, OutputArray dst, + InputArray mask = noArray(), int dtype = -1, + AclStream& stream = AclStream::Null()); +CV_EXPORTS_W void subtract(Scalar src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), int dtype = -1, + AclStream& stream = AclStream::Null()); +#endif + +/** @brief Computes a matrix-matrix or matrix-scalar per-element product. + * @param src1 First source matrix or scalar. + * @param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . + * @param dst Destination matrix that has the same size and number of channels as the input + * array(s). The depth is defined by dtype or src1 depth. + * @param scale Optional scale factor. + * @param dtype Optional depth of the output array. + * @param stream AclStream for the asynchronous version. + * @sa cv::multiply cuda::multiply + */ +CV_EXPORTS_W void multiply(InputArray src1, InputArray src2, OutputArray dst, float scale, + int dtype = -1, AclStream& stream = AclStream::Null()); +#ifdef NEVER_DEFINED +CV_EXPORTS_W void multiply(InputArray src1, Scalar src2, OutputArray dst, float scale, + int dtype = -1, AclStream& stream = AclStream::Null()); +CV_EXPORTS_W void multiply(Scalar src1, InputArray src2, OutputArray dst, float scale, + int dtype = -1, AclStream& stream = AclStream::Null()); +#endif + +/** @brief Computes a matrix-matrix or matrix-scalar division. + * @param src1 First source matrix or scalar. + * @param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . + * @param dst Destination matrix that has the same size and number of channels as the input + * array(s). The depth is defined by dtype or src1 depth. + * @param scale Optional scale factor. + * @param dtype Optional depth of the output array. + * @param stream AclStream for the asynchronous version. + * @sa cv::divide cuda::divide + */ +CV_EXPORTS_W void divide(InputArray src1, InputArray src2, OutputArray dst, float scale, + int dtype = -1, AclStream& stream = AclStream::Null()); +#ifdef NEVER_DEFINED +CV_EXPORTS_W void divide(InputArray src1, Scalar src2, OutputArray dst, float scale, int dtype = -1, + AclStream& stream = AclStream::Null()); +CV_EXPORTS_W void divide(Scalar src1, InputArray src2, OutputArray dst, float scale, int dtype = -1, + AclStream& stream = AclStream::Null()); +#endif + +/** @brief Performs a per-element bitwise conjunction of two matrices (or of matrix and scalar). + * @param src1 First source matrix or scalar. + * @param src2 Second source matrix or scalar. + * @param dst Destination matrix that has the same size and number of channels as the input + * array(s). The depth is defined by dtype or src1 depth. + * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the + * destination array to be changed. The mask can be used only with single channel images. + * @param stream AclStream for the asynchronous version. + * @sa cv::bitwise_and cuda::bitwise_and + */ +CV_EXPORTS_W void bitwise_and(InputArray src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), AclStream& stream = AclStream::Null()); +#ifdef NEVER_DEFINED +CV_EXPORTS_W void bitwise_and(InputArray src1, Scalar src2, OutputArray dst, + InputArray mask = noArray(), AclStream& stream = AclStream::Null()); +CV_EXPORTS_W void bitwise_and(Scalar src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), AclStream& stream = AclStream::Null()); +#endif + +/** @brief Performs a per-element bitwise disjunction of two matrices (or of matrix and scalar). + * @param src1 First source matrix or scalar. + * @param src2 Second source matrix or scalar. + * @param dst Destination matrix that has the same size and number of channels as the input + * array(s). The depth is defined by dtype or src1 depth. + * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the + * destination array to be changed. The mask can be used only with single channel images. + * @param stream AclStream for the asynchronous version. + * @sa cv::bitwise_or cuda::bitwise_or + */ +CV_EXPORTS_W void bitwise_or(InputArray src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), AclStream& stream = AclStream::Null()); +#ifdef NEVER_DEFINED +CV_EXPORTS_W void bitwise_or(InputArray src1, Scalar src2, OutputArray dst, + InputArray mask = noArray(), AclStream& stream = AclStream::Null()); +CV_EXPORTS_W void bitwise_or(Scalar src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), AclStream& stream = AclStream::Null()); +#endif + +/** @brief Performs a per-element bitwise exclusive or operation of two matrices (or of matrix and + * scalar). + * @param src1 First source matrix or scalar. + * @param src2 Second source matrix or scalar. + * @param dst Destination matrix that has the same size and number of channels as the input + * array(s). The depth is defined by dtype or src1 depth. + * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the + * destination array to be changed. The mask can be used only with single channel images. + * @param stream AclStream for the asynchronous version. + * @sa cv::bitwise_xor cuda::bitwise_xor + */ +CV_EXPORTS_W void bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), AclStream& stream = AclStream::Null()); +#ifdef NEVER_DEFINED +CV_EXPORTS_W void bitwise_xor(InputArray src1, Scalar src2, OutputArray dst, + InputArray mask = noArray(), AclStream& stream = AclStream::Null()); +CV_EXPORTS_W void bitwise_xor(Scalar src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), AclStream& stream = AclStream::Null()); +#endif + +//! @} cannarithm_elem + +} // namespace cann +} // namespace cv + +#endif /* OPENCV_CANNARITHM_HPP */ diff --git a/modules/cannarithm/include/opencv2/cann_call.hpp b/modules/cannarithm/include/opencv2/cann_call.hpp new file mode 100644 index 00000000000..6afdd266a21 --- /dev/null +++ b/modules/cannarithm/include/opencv2/cann_call.hpp @@ -0,0 +1,52 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CANNCALL_HPP +#define OPENCV_CANNCALL_HPP + +#include +#include +#include "opencv2/cann.hpp" + +namespace cv +{ +namespace cann +{ +struct AclAttribute +{ + virtual ~AclAttribute() = default; + virtual void addAttr(aclopAttr* opAttr) = 0; +}; + +#define DEFINE_ATTR(FUNC, TYPE) \ + class Acl##FUNC##Attribute : public AclAttribute \ + { \ + const char* name; \ + TYPE value; \ + \ + public: \ + Acl##FUNC##Attribute(const char* _name, TYPE _value) : name(_name), value(_value){}; \ + void addAttr(aclopAttr* opAttr) override \ + { \ + CV_ACL_SAFE_CALL(aclopSetAttr##FUNC(opAttr, name, value)); \ + } \ + } + +DEFINE_ATTR(Float, float); +DEFINE_ATTR(String, const char*); + +static std::vector emptyattr; +void aclOneInput(const AclMat& src, AclMat& dst, const char* op, + AclStream& stream = AclStream::Null(), + std::vector& attrs = emptyattr); + +void aclTwoInputs(const AclMat& src1, const AclMat& src2, AclMat& dst, const char* op, + AclStream& stream = AclStream::Null()); + +void transNCHWToNHWC(const AclMat& src, AclMat& dst, AclStream& stream = AclStream::Null()); + +} // namespace cann +} // namespace cv + +#endif // OPENCV_CANNCALL_HPP diff --git a/modules/cannarithm/include/opencv2/cann_common.hpp b/modules/cannarithm/include/opencv2/cann_common.hpp new file mode 100644 index 00000000000..ecff9f07589 --- /dev/null +++ b/modules/cannarithm/include/opencv2/cann_common.hpp @@ -0,0 +1,43 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CANN_COMMON_HPP +#define OPENCV_CANN_COMMON_HPP + +#include + +namespace cv +{ +namespace cann +{ +static inline void checkAclError(aclError err, const char* file, const int line, const char* func) +{ + if (ACL_SUCCESS != err) + { + const char* errMsg = aclGetRecentErrMsg(); + cv::error(cv::Error::AscendApiCallError, errMsg == nullptr ? "" : errMsg, func, file, line); + } +} + +static inline void checkAclPtr(void* ptr, const char* file, const int line, const char* func) +{ + if (nullptr == ptr) + { + const char* errMsg = aclGetRecentErrMsg(); + cv::error(cv::Error::AscendApiCallError, errMsg == nullptr ? "" : errMsg, func, file, line); + } +} + +} // namespace cann +} // namespace cv + +#define CV_ACL_SAFE_CALL(expr) cv::cann::checkAclError((expr), __FILE__, __LINE__, CV_Func) +#define CV_ACL_SAFE_CALL_PTR(expr) \ + ({ \ + auto ptr = (expr); \ + cv::cann::checkAclPtr(ptr, __FILE__, __LINE__, CV_Func); \ + ptr; \ + }) + +#endif // OPENCV_CANN_COMMON_HPP diff --git a/modules/cannarithm/include/opencv2/cann_prepare.hpp b/modules/cannarithm/include/opencv2/cann_prepare.hpp new file mode 100644 index 00000000000..cc1aba25618 --- /dev/null +++ b/modules/cannarithm/include/opencv2/cann_prepare.hpp @@ -0,0 +1,96 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CANNPREPARE_HPP +#define OPENCV_CANNPREPARE_HPP + +#include +#include +#include "opencv2/core.hpp" +#include "opencv2/cann_common.hpp" + +namespace cv +{ +namespace cann +{ +struct CannPreparation +{ + CannPreparation() { opAttr_ = CV_ACL_SAFE_CALL_PTR(aclopCreateAttr()); } + + virtual ~CannPreparation() + { + for (auto desc : inputDesc_) + { + aclDestroyTensorDesc(desc); + } + + for (auto desc : outputDesc_) + { + aclDestroyTensorDesc(desc); + } + + for (auto buf : inputBuffers_) + { + aclDestroyDataBuffer(buf); + } + + for (auto buf : outputBuffers_) + { + aclDestroyDataBuffer(buf); + } + + aclopDestroyAttr(opAttr_); + } + + std::vector inputBuffers_; + std::vector outputBuffers_; + std::vector inputDesc_; + std::vector outputDesc_; + aclopAttr* opAttr_; +}; + +#define CANN_PREPARE_ADD_ATTR(var, type, ...) \ + do \ + { \ + CV_ACL_SAFE_CALL(aclopSetAttr##type(var.opAttr_, __VA_ARGS__)); \ + } while (0) + +#define CANN_PREPARE_INPUTDESC(var, ...) \ + do \ + { \ + auto _rPtr = CV_ACL_SAFE_CALL_PTR(aclCreateTensorDesc(__VA_ARGS__)); \ + if (_rPtr != nullptr) \ + var.inputDesc_.push_back(_rPtr); \ + } while (0) + +#define CANN_PREPARE_OUTPUTDESC(var, ...) \ + do \ + { \ + auto _rPtr = CV_ACL_SAFE_CALL_PTR(aclCreateTensorDesc(__VA_ARGS__)); \ + if (_rPtr != nullptr) \ + var.outputDesc_.push_back(_rPtr); \ + } while (0) + +#define CANN_PREPARE_INPUTBUFFER(var, ...) \ + do \ + { \ + auto _rPtr = CV_ACL_SAFE_CALL_PTR(aclCreateDataBuffer(__VA_ARGS__)); \ + if (_rPtr != nullptr) \ + var.inputBuffers_.push_back(_rPtr); \ + } while (0) + +#define CANN_PREPARE_OUTPUTBUFFER(var, ...) \ + do \ + { \ + auto _rPtr = CV_ACL_SAFE_CALL_PTR(aclCreateDataBuffer(__VA_ARGS__)); \ + if (_rPtr != nullptr) \ + var.outputBuffers_.push_back(_rPtr); \ + } while (0) + +aclDataType getACLType(int opencvdepth); + +} // namespace cann +} // namespace cv + +#endif // OPENCV_CANNPREPARE_HPP diff --git a/modules/cannarithm/misc/python/pyopencv_cann.hpp b/modules/cannarithm/misc/python/pyopencv_cann.hpp new file mode 100644 index 00000000000..61dc824c886 --- /dev/null +++ b/modules/cannarithm/misc/python/pyopencv_cann.hpp @@ -0,0 +1,23 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifdef HAVE_OPENCV_CORE + +#include "opencv2/cann.hpp" + +typedef std::vector vector_AclMat; +typedef cann::AclMat::Allocator AclMat_Allocator; + +CV_PY_TO_CLASS(cann::AclMat); +CV_PY_TO_CLASS(cann::AclStream); + +CV_PY_TO_CLASS_PTR(cann::AclMat); +CV_PY_TO_CLASS_PTR(cann::AclMat::Allocator); + +CV_PY_FROM_CLASS(cann::AclMat); +CV_PY_FROM_CLASS(cann::AclStream); + +CV_PY_FROM_CLASS_PTR(cann::AclMat::Allocator); + +#endif diff --git a/modules/cannarithm/perf/perf_element_operations.cpp b/modules/cannarithm/perf/perf_element_operations.cpp new file mode 100644 index 00000000000..5299f4b3c78 --- /dev/null +++ b/modules/cannarithm/perf/perf_element_operations.cpp @@ -0,0 +1,81 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "perf_precomp.hpp" +#include "opencv2/cann_arithm.hpp" + +namespace opencv_test +{ +namespace +{ + +#define ARITHM_MAT_DEPTH Values(CV_32S, CV_32SC3) +#define TYPICAL_ACL_MAT_SIZES ::perf::sz1080p, ::perf::sz2K, ::perf::sz2160p, ::perf::sz4320p +#define DEVICE_ID 0 +#define DEF_PARAM_TEST(name, ...) \ + typedef ::perf::TestBaseWithParam> name + +// NPU Perf Test +DEF_PARAM_TEST(NPU, cv::Size, perf::MatDepth); +#define TEST_NPU_OP_MAT(idx, op, ...) \ + PERF_TEST_P(NPU, MAT_##op##_MAT_##idx, \ + testing::Combine(testing::Values(TYPICAL_ACL_MAT_SIZES), ARITHM_MAT_DEPTH)) \ + { \ + Size size = GET_PARAM(0); \ + int depth = GET_PARAM(1); \ + \ + Mat src1(size, depth), src2(size, depth); \ + declare.in(src1, WARMUP_RNG); \ + declare.in(src2, WARMUP_RNG); \ + cv::cann::setDevice(DEVICE_ID); \ + \ + AclMat npu_src1, npu_src2, dst; \ + npu_src1.upload(src1); \ + npu_src2.upload(src2); \ + AclStream stream; \ + TEST_CYCLE() { cv::cann::op(npu_src1, npu_src2, dst, __VA_ARGS__); } \ + SANITY_CHECK_NOTHING(); \ + cv::cann::resetDevice(); \ + } + +// CPU Perf Test +DEF_PARAM_TEST(CPU, cv::Size, perf::MatDepth); +#define TEST_CPU_OP_MAT(idx, op, ...) \ + PERF_TEST_P(CPU, MAT_##op##_MAT_##idx, \ + testing::Combine(testing::Values(TYPICAL_ACL_MAT_SIZES), ARITHM_MAT_DEPTH)) \ + { \ + Size size = GET_PARAM(0); \ + int depth = GET_PARAM(1); \ + \ + Mat src1(size, depth), src2(size, depth), dst(size, depth); \ + declare.in(src1, WARMUP_RNG); \ + declare.in(src2, WARMUP_RNG); \ + \ + TEST_CYCLE() cv::op(src1, src2, dst, __VA_ARGS__); \ + SANITY_CHECK_NOTHING(); \ + } + +TEST_NPU_OP_MAT(1, add, noArray(), -1); +TEST_CPU_OP_MAT(1, add, noArray(), -1); + +TEST_NPU_OP_MAT(1, subtract, noArray(), -1); +TEST_CPU_OP_MAT(1, subtract, noArray(), -1); + +TEST_NPU_OP_MAT(1, multiply, 1, -1); +TEST_CPU_OP_MAT(1, multiply, 1, -1); + +TEST_NPU_OP_MAT(1, divide, 1, -1); +TEST_CPU_OP_MAT(1, divide, 1, -1); + +TEST_NPU_OP_MAT(1, bitwise_and, noArray()); +TEST_CPU_OP_MAT(1, bitwise_and, noArray()); + +TEST_NPU_OP_MAT(1, bitwise_or, noArray()); +TEST_CPU_OP_MAT(1, bitwise_or, noArray()); + +TEST_NPU_OP_MAT(1, bitwise_xor, noArray()); +TEST_CPU_OP_MAT(1, bitwise_xor, noArray()); + +} // namespace +} // namespace opencv_test diff --git a/modules/cannarithm/perf/perf_main.cpp b/modules/cannarithm/perf/perf_main.cpp new file mode 100644 index 00000000000..13cde8f491e --- /dev/null +++ b/modules/cannarithm/perf/perf_main.cpp @@ -0,0 +1,36 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "perf_precomp.hpp" +#include "opencv2/cann_arithm.hpp" +using namespace perf; + +class CannEnvironment : public ::testing::Environment +{ +public: + virtual ~CannEnvironment() = default; + virtual void SetUp() CV_OVERRIDE { + cv::cann::initAcl(); + + // for device warmup + Scalar s1(1,2,3), s2(4,5,6); + Mat src1(10, 10, CV_32SC3, s1), src2(10, 10, CV_32SC3, s2); + cv::cann::setDevice(0); + + cv::cann::AclMat npu_src1, npu_src2, dst; + npu_src1.upload(src1); + npu_src2.upload(src2); + cv::cann::add(npu_src1, npu_src2, dst); + cv::cann::resetDevice(); + } + virtual void TearDown() CV_OVERRIDE { cv::cann::finalizeAcl(); } +}; + +static void initTests() +{ + CannEnvironment* cannEnv = new CannEnvironment(); + ::testing::AddGlobalTestEnvironment(cannEnv); +} + +CV_PERF_TEST_MAIN("cannarithm", initTests()) diff --git a/modules/cannarithm/perf/perf_precomp.hpp b/modules/cannarithm/perf/perf_precomp.hpp new file mode 100644 index 00000000000..d0ff9533235 --- /dev/null +++ b/modules/cannarithm/perf/perf_precomp.hpp @@ -0,0 +1,20 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef __OPENCV_PERF_PRECOMP_HPP__ +#define __OPENCV_PERF_PRECOMP_HPP__ + +#include "opencv2/ts.hpp" +#include "opencv2/ts/ts_perf.hpp" +#include "opencv2/cann.hpp" + +namespace opencv_test +{ +using namespace perf; +using namespace testing; +using namespace cv; +using namespace cv::cann; +} // namespace opencv_test + +#endif diff --git a/modules/cannarithm/samples/sample.cpp b/modules/cannarithm/samples/sample.cpp new file mode 100644 index 00000000000..772ca96f54f --- /dev/null +++ b/modules/cannarithm/samples/sample.cpp @@ -0,0 +1,32 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +//g++ -o sample sample.cpp -I opencv/include/opencv4/ -L opencv/build/install/lib/ -l opencv_cannarithm -l opencv_core -l opencv_imgcodecs + +#include +#include +#include +#include + +int main() +{ + cv::Mat img = cv::imread("/path/to/img"); + + cv::cann::initAcl(); + cv::cann::setDevice(0); + + cv::cann::AclMat aclMat = cv::cann::AclMat(); + aclMat.upload(img); + + cv::cann::AclMat aclMatSum; + cv::cann::add(aclMat, aclMat, aclMatSum); + cv::Mat imgResult; + aclMatSum.download(imgResult); + std::cout<data), elemSize * cols * rows, ACL_MEM_MALLOC_HUGE_FIRST)); + + mat->step = cols * elemSize; + mat->refcount = (int*)cv::fastMalloc(sizeof(int)); + + return true; +} + +void DefaultAllocator::free(cv::cann::AclMat* mat) +{ + aclrtFree(mat->datastart); + cv::fastFree(mat->refcount); +} + +DefaultAllocator cannDefaultAllocator; +cv::cann::AclMat::Allocator* g_defaultAllocator = &cannDefaultAllocator; +} // namespace + +namespace cv +{ +namespace cann +{ +AclMat::Allocator* AclMat::defaultAllocator() { return g_defaultAllocator; } + +void AclMat::setDefaultAllocator(AclMat::Allocator* allocator) +{ + CV_Assert(allocator != 0); + g_defaultAllocator = allocator; +} + +// TODO: this function is copied from matrix.cpp, which is a local symbol there and can be +// refreneced. +static int updateContinuityFlag(int flags, int dims, const int* size, const size_t* step) +{ + int i, j; + for (i = 0; i < dims; i++) + { + if (size[i] > 1) + break; + } + + uint64 t = (uint64)size[std::min(i, dims - 1)] * CV_MAT_CN(flags); + for (j = dims - 1; j > i; j--) + { + t *= size[j]; + if (step[j] * size[j] < step[j - 1]) + break; + } + + if (j <= i && t == (uint64)(int)t) + return flags | Mat::CONTINUOUS_FLAG; + return flags & ~Mat::CONTINUOUS_FLAG; +} + +void AclMat::updateContinuityFlag() +{ + int sz[] = {rows, cols}; + size_t steps[] = {step, elemSize()}; + flags = cv::cann::updateContinuityFlag(flags, 2, sz, steps); +} + +AclMat::AclMat(int rows_, int cols_, int type_, void* data_, size_t step_) + : flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(rows_), cols(cols_), step(step_), + data((uchar*)data_), refcount(0), datastart((uchar*)data_), dataend((const uchar*)data_), + allocator(defaultAllocator()) +{ + size_t minstep = cols * elemSize(); + + if (step == Mat::AUTO_STEP) + { + step = minstep; + } + else + { + if (rows == 1) + step = minstep; + + CV_DbgAssert(step >= minstep); + } + + dataend += step * (rows - 1) + minstep; + updateContinuityFlag(); +} + +AclMat::AclMat(Size size_, int type_, void* data_, size_t step_) + : flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(size_.height), cols(size_.width), + step(step_), data((uchar*)data_), refcount(0), datastart((uchar*)data_), + dataend((const uchar*)data_), allocator(defaultAllocator()) +{ + size_t minstep = cols * elemSize(); + + if (step == Mat::AUTO_STEP) + { + step = minstep; + } + else + { + if (rows == 1) + step = minstep; + + CV_DbgAssert(step >= minstep); + } + + dataend += step * (rows - 1) + minstep; + updateContinuityFlag(); +} + +void AclMat::create(int _rows, int _cols, int _type) +{ + CV_DbgAssert(_rows >= 0 && _cols >= 0); + + _type &= Mat::TYPE_MASK; + + if (rows == _rows && cols == _cols && type() == _type && data) + return; + + if (data) + release(); + + if (_rows > 0 && _cols > 0) + { + flags = Mat::MAGIC_VAL + _type; + rows = _rows; + cols = _cols; + + const size_t esz = elemSize(); + + bool allocSuccess = allocator->allocate(this, rows, cols, esz); + + if (!allocSuccess) + { + // custom allocator fails, try default allocator + allocator = defaultAllocator(); + allocSuccess = allocator->allocate(this, rows, cols, esz); + CV_Assert(allocSuccess); + } + + if (esz * cols == step) + flags |= Mat::CONTINUOUS_FLAG; + + datastart = data; + dataend = data + step * (rows - 1) + cols * esz; + + if (refcount) + *refcount = 1; + } +} + +void AclMat::upload(InputArray arr) +{ + Mat mat = arr.getMat(); + CV_DbgAssert(!mat.empty()); + create(mat.rows, mat.cols, mat.type()); + CV_ACL_SAFE_CALL(aclrtMemcpy2d(data, step, mat.data, mat.step[0], cols * elemSize(), rows, + ACL_MEMCPY_HOST_TO_DEVICE)); +} + +void AclMat::upload(InputArray arr, AclStream& _stream) +{ + Mat mat = arr.getMat(); + CV_DbgAssert(!mat.empty()); + create(mat.rows, mat.cols, mat.type()); + aclrtStream stream = AclStreamAccessor::getStream(_stream); + CV_ACL_SAFE_CALL(aclrtMemcpy2dAsync(data, step, mat.data, mat.step[0], cols * elemSize(), rows, + ACL_MEMCPY_HOST_TO_DEVICE, stream)); +} + +void AclMat::download(OutputArray _dst) const +{ + CV_DbgAssert(!empty()); + + _dst.create(size(), type()); + Mat dst = _dst.getMat(); + CV_ACL_SAFE_CALL(aclrtMemcpy2d(dst.data, dst.step[0], data, step, cols * elemSize(), rows, + ACL_MEMCPY_DEVICE_TO_HOST)); +} + +void AclMat::download(OutputArray _dst, AclStream& _stream) const +{ + CV_DbgAssert(!empty()); + + _dst.create(size(), type()); + Mat dst = _dst.getMat(); + aclrtStream stream = AclStreamAccessor::getStream(_stream); + CV_ACL_SAFE_CALL(aclrtMemcpy2dAsync(dst.data, dst.step[0], data, step, cols * elemSize(), rows, + ACL_MEMCPY_DEVICE_TO_HOST, stream)); +} + +AclMat::AclMat(int rows_, int cols_, int type_, Scalar& s_, AclMat::Allocator* allocator_) + : flags(0), rows(rows_), cols(cols_), step(0), data(0), refcount(0), datastart(0), dataend(0), + allocator(allocator_) +{ + create(rows_, cols_, type_); + setTo(s_); +} + +AclMat::AclMat(Size size_, int type_, Scalar& s_, AclMat::Allocator* allocator_) + : flags(0), rows(size_.height), cols(size_.width), step(0), data(0), refcount(0), datastart(0), + dataend(0), allocator(allocator_) +{ + create(size_.height, size_.width, type_); + setTo(s_); +} + +AclMat& AclMat::setTo(Scalar s_) { return setTo(s_, AclStream::Null()); } + +AclMat& AclMat::setTo(Scalar s_, AclStream& stream_) +{ + size_t totalBytes = (size_t)rows * cols * elemSize(); + if (totalBytes == 0) + return *this; + + CV_ACL_SAFE_CALL(aclrtMemset(data, totalBytes, 0, totalBytes)); + + Mat scMat(1, 1, type(), s_); + AclMat scAclMat; + scAclMat.upload(scMat); + + AclMat dst(rows, cols, type()); + // TODO use AssignAdd to avoid memcpy, or use broadcase. + aclTwoInputs(*this, scAclMat, dst, "Add", stream_); + swap(dst); + + return *this; +} + +void AclMat::convertTo(AclMat& dst, int rtype) const { convertTo(dst, rtype, AclStream::Null()); } + +void AclMat::convertTo(AclMat& dst, int _rtype, AclStream& _stream) const +{ + int cn = channels(); + dst.create(rows, cols, CV_MAKE_TYPE(_rtype, cn)); + aclOneInput(*this, dst, "Cast", _stream); +} + +void AclMat::expandTo(CV_OUT AclMat& dst, int chs) const { expandTo(dst, chs, AclStream::Null()); } + +void AclMat::expandTo(CV_OUT AclMat& dst, int chs, AclStream& stream) const +{ + CV_Assert(channels() == 1); + + // TODO use inplace expand. + AclMat NCHW_mat; + NCHW_mat.create(rows, cols, CV_MAKE_TYPE(depth(), chs)); + + aclrtStream rawStream = AclStreamAccessor::getStream(stream); + size_t expandsize = rows * step * chs; + uchar* dataptr = (uchar*)NCHW_mat.data; + for (int ch = 0; ch < chs; ch++) + { + if (rawStream == nullptr) + { + CV_ACL_SAFE_CALL( + aclrtMemcpy(dataptr, expandsize, data, rows * step, ACL_MEMCPY_DEVICE_TO_DEVICE)); + } + else + { + CV_ACL_SAFE_CALL(aclrtMemcpyAsync(dataptr, expandsize, data, rows * step, + ACL_MEMCPY_DEVICE_TO_DEVICE, rawStream)); + } + + dataptr += (step * rows); + } + + dst.create(rows, cols, CV_MAKE_TYPE(depth(), chs)); + + transNCHWToNHWC(NCHW_mat, dst, stream); +} + +AclStream wrapStream(size_t aclStreamAddress) +{ + return AclStreamAccessor::wrapStream(reinterpret_cast(aclStreamAddress)); +} + +static AclMat getAclMat(InputArray arr) +{ + _InputArray::KindFlag k = arr.kind(); + if (k == _InputArray::ACL_MAT) + { + const cann::AclMat* a_mat = (const cann::AclMat*)arr.getObj(); + return *a_mat; + } + + if (k == _InputArray::NONE) + return cann::AclMat(); + + CV_Error(cv::Error::StsNotImplemented, "getAclMat is available only for cann::AclMat"); +} + +AclMat getInputMat(InputArray _src) +{ + AclMat src; + if (_src.kind() == _InputArray::ACL_MAT) + { + src = getAclMat(_src); + } + else if (!_src.empty()) + { + src.upload(_src); + } + return src; +} + +AclMat getInputMat(InputArray _src, AclStream& stream) +{ + AclMat src; + if (_src.kind() == _InputArray::ACL_MAT) + { + src = getAclMat(_src); + } + else if (!_src.empty()) + { + aclrtStream rawStream = AclStreamAccessor::getStream(stream); + if (rawStream == nullptr) + { + src.upload(_src); + } + else + { + src.upload(_src, stream); + } + } + return src; +} + +AclMat getOutputMat(OutputArray _dst, int rows, int cols, int type) +{ + AclMat dst; + if (_dst.kind() == _InputArray::ACL_MAT) + { + ((cann::AclMat*)(_dst.getObj()))->create(rows, cols, type); + dst = getAclMat(_dst); + } + else + { + dst.create(rows, cols, type); + } + return dst; +} + +void syncOutput(const AclMat& dst, OutputArray _dst) +{ + if (_dst.kind() != _InputArray::ACL_MAT) + { + dst.download(_dst); + } +} + +/********************************************Device********************************************/ + +void setDevice(int device_id) +{ + aclrtContext context; + CV_ACL_SAFE_CALL(aclrtSetDevice(device_id)); + CV_ACL_SAFE_CALL(aclrtCreateContext(&context, device_id)); +} + +void resetDevice() { CV_ACL_SAFE_CALL(aclrtResetDevice(getDevice())); } + +int32_t getDevice() +{ + int32_t deviceId; + CV_ACL_SAFE_CALL(aclrtGetDevice(&deviceId)); + return deviceId; +} + +void initAcl() { CV_ACL_SAFE_CALL(aclInit(nullptr)); } + +void finalizeAcl() { CV_ACL_SAFE_CALL(aclFinalize()); } + +class DefaultDeviceInitializer +{ +public: + DefaultDeviceInitializer(); + ~DefaultDeviceInitializer(); + + AclStream& getNullAclStream(int deviceId); + +private: + std::vector> streams_; + Mutex streams_mtx_; +}; + +DefaultDeviceInitializer::DefaultDeviceInitializer() {} + +DefaultDeviceInitializer::~DefaultDeviceInitializer() { streams_.clear(); } + +AclStream& DefaultDeviceInitializer::getNullAclStream(int deviceId) +{ + AutoLock lock(streams_mtx_); + + if (streams_.empty()) + { + uint32_t deviceCount; + CV_ACL_SAFE_CALL(aclrtGetDeviceCount(&deviceCount)); + + if (deviceCount > 0) + streams_.resize(deviceCount); + } + + CV_DbgAssert(deviceId >= 0 && deviceId < static_cast(streams_.size())); + + if (streams_[deviceId].empty()) + { + aclrtStream stream = nullptr; + Ptr impl = makePtr(stream); + streams_[deviceId] = Ptr(new AclStream(impl)); + } + + return *streams_[deviceId]; +} + +DefaultDeviceInitializer initializer; + +/********************************************AclEvent********************************************/ +class AclEvent::Impl +{ +public: + aclrtEvent event; + bool ownEvent; + + Impl(); + explicit Impl(aclrtEvent event); + + ~Impl(); +}; + +AclEvent::Impl::Impl() : event(nullptr), ownEvent(true) +{ + CV_ACL_SAFE_CALL(aclrtCreateEvent(&event)); +} + +AclEvent::Impl::Impl(aclrtEvent e) : event(e), ownEvent(false) {} + +AclEvent::Impl::~Impl() +{ + if (event && ownEvent) + { + CV_ACL_SAFE_CALL(aclrtDestroyEvent(event)); + } +} + +aclrtEvent AclEventAccessor::getEvent(const AclEvent& event) { return event.impl_->event; } + +AclEvent AclEventAccessor::wrapEvent(aclrtEvent event) +{ + return AclEvent(makePtr(event)); +} + +AclEvent::AclEvent() { impl_ = makePtr(); } + +void AclEvent::record(AclStream& stream) +{ + CV_ACL_SAFE_CALL(aclrtRecordEvent(impl_->event, AclStreamAccessor::getStream(stream))); +} + +void AclEvent::waitForComplete() const { CV_ACL_SAFE_CALL(aclrtSynchronizeEvent(impl_->event)); } + +/******************************************AclStream********************************************/ +struct AsyncThdArgs +{ + bool isExit; + void* context; + pthread_mutex_t mutex; + AsyncThdArgs() : isExit(false), context(nullptr), mutex(PTHREAD_MUTEX_INITIALIZER) {} +}; + +class AclStream::Impl +{ +public: + aclrtStream stream; + bool ownStream; + AsyncThdArgs asyncThdArgs; + pthread_t asyncThdId; + + void bindThread(); + void addToAsyncRelease(const AclMat& mat); + + Impl(); + explicit Impl(aclrtStream stream); + + ~Impl(); +}; + +AclStream::Impl::Impl() : stream(nullptr), ownStream(true), asyncThdId(0) +{ + CV_ACL_SAFE_CALL(aclrtCreateStream(&stream)); +} + +AclStream::Impl::Impl(aclrtStream s) : stream(s), ownStream(false), asyncThdId(0) {} + +AclStream::Impl::~Impl() +{ + if (stream && ownStream) + { + aclrtSynchronizeStream(stream); + if (asyncThdId != 0) + { + asyncThdArgs.isExit = true; + CV_ACL_SAFE_CALL(aclrtUnSubscribeReport(asyncThdId, stream)); + (void)pthread_join(asyncThdId, nullptr); + } + CV_ACL_SAFE_CALL(aclrtDestroyStream(stream)); + } +} + +static void* processReportLoop(void* args_) +{ + AsyncThdArgs* args = (AsyncThdArgs*)args_; + CV_ACL_SAFE_CALL(aclrtSetCurrentContext(args->context)); + + // Wait for subscribe. + pthread_mutex_lock(&args->mutex); + pthread_mutex_unlock(&args->mutex); + + while (!args->isExit) + { + aclError ret = aclrtProcessReport(-1); + // Skip error check if exiting. aclrtProcessReport will report an timeout error when + // unsubscribing. + if (!args->isExit) + CV_ACL_SAFE_CALL(ret); + } + + return (nullptr); +} + +void AclStream::Impl::bindThread() +{ + // Only one thread will created. Lock for parallelling. + pthread_mutex_lock(&asyncThdArgs.mutex); + if (asyncThdId == 0) + { + CV_ACL_SAFE_CALL(aclrtGetCurrentContext(&asyncThdArgs.context)); + (void)pthread_create(&asyncThdId, nullptr, processReportLoop, &asyncThdArgs); + CV_ACL_SAFE_CALL(aclrtSubscribeReport(asyncThdId, stream)); + } + pthread_mutex_unlock(&asyncThdArgs.mutex); +} + +static void releaseAclMatCB(void* releaseHandle) +{ + if (releaseHandle == nullptr) + return; + AclMat* mat = (AclMat*)releaseHandle; + delete mat; +} + +void AclStream::Impl::addToAsyncRelease(const AclMat& mat) +{ + if (stream != nullptr) + { + if (asyncThdId == 0) + bindThread(); + AclMat* releaseHandle = new AclMat(mat); + CV_ACL_SAFE_CALL( + aclrtLaunchCallback(releaseAclMatCB, releaseHandle, ACL_CALLBACK_BLOCK, stream)); + } +} + +aclrtStream AclStreamAccessor::getStream(const AclStream& stream) { return stream.impl_->stream; } + +AclStream AclStreamAccessor::wrapStream(aclrtStream stream) +{ + return AclStream(makePtr(stream)); +} + +AclStream::AclStream() { impl_ = makePtr(); } + +void AclStream::waitForCompletion() { CV_ACL_SAFE_CALL(aclrtSynchronizeStream(impl_->stream)); } + +void AclStream::waitAclEvent(const AclEvent& event) +{ + CV_ACL_SAFE_CALL(aclrtStreamWaitEvent(impl_->stream, AclEventAccessor::getEvent(event))); +} + +AclStream& AclStream::Null() +{ + const uint32_t deviceId = getDevice(); + return initializer.getNullAclStream(deviceId); +} + +void AclStream::addToAsyncRelease(const AclMat& mat) { impl_->addToAsyncRelease(mat); } + +} // namespace cann +} // namespace cv diff --git a/modules/cannarithm/src/cann_call.cpp b/modules/cannarithm/src/cann_call.cpp new file mode 100644 index 00000000000..0e9ad8036bb --- /dev/null +++ b/modules/cannarithm/src/cann_call.cpp @@ -0,0 +1,140 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +namespace cv +{ +namespace cann +{ +void aclOneInput(const AclMat& src, AclMat& dst, const char* op, AclStream& stream, + std::vector& attrs) +{ + CannPreparation prepare; + for (auto& attrIterator : attrs) + { + attrIterator->addAttr(prepare.opAttr_); + } + + int64_t dimSrc[] = {1, src.rows, src.cols, src.channels()}; + int64_t dimDst[] = {1, dst.rows, dst.cols, dst.channels()}; + CANN_PREPARE_INPUTDESC(prepare, getACLType(src.depth()), sizeof(dimSrc) / sizeof(dimSrc[0]), + dimSrc, ACL_FORMAT_NHWC); + CANN_PREPARE_OUTPUTDESC(prepare, getACLType(dst.depth()), sizeof(dimDst) / sizeof(dimDst[0]), + dimDst, ACL_FORMAT_NHWC); + + CANN_PREPARE_INPUTBUFFER(prepare, const_cast(src.data), src.rows * src.step); + CANN_PREPARE_OUTPUTBUFFER(prepare, const_cast(dst.data), dst.rows * dst.step); + + aclrtStream rawStream = AclStreamAccessor::getStream(stream); + + CV_ACL_SAFE_CALL(aclopCompileAndExecute( + op, prepare.inputDesc_.size(), prepare.inputDesc_.data(), prepare.inputBuffers_.data(), + prepare.outputDesc_.size(), prepare.outputDesc_.data(), prepare.outputBuffers_.data(), + prepare.opAttr_, ACL_ENGINE_SYS, ACL_COMPILE_SYS, NULL, rawStream)); + if (rawStream == nullptr) + CV_ACL_SAFE_CALL(aclrtSynchronizeStream(rawStream)); + else + { + stream.addToAsyncRelease(src); + stream.addToAsyncRelease(dst); + } +} + +void aclTwoInputs(const AclMat& src1, const AclMat& src2, AclMat& dst, const char* op, + AclStream& stream) +{ + CannPreparation prepare; + aclrtStream rawStream = AclStreamAccessor::getStream(stream); + + int64_t dimSrc1[] = {1, src1.rows, src1.cols, src1.channels()}; + int64_t dimSrc2[] = {1, src2.rows, src2.cols, src2.channels()}; + + int64_t dimDst[] = {1, dst.rows, dst.cols, dst.channels()}; + + CANN_PREPARE_INPUTDESC(prepare, getACLType(src1.depth()), sizeof(dimSrc1) / sizeof(dimSrc1[0]), + dimSrc1, ACL_FORMAT_NHWC); + + CANN_PREPARE_INPUTDESC(prepare, getACLType(src2.depth()), sizeof(dimSrc2) / sizeof(dimSrc2[0]), + dimSrc2, ACL_FORMAT_NHWC); + + CANN_PREPARE_OUTPUTDESC(prepare, getACLType(dst.depth()), sizeof(dimDst) / sizeof(dimDst[0]), + dimDst, ACL_FORMAT_NHWC); + + CANN_PREPARE_INPUTBUFFER(prepare, const_cast(src1.data), src1.rows * src1.step); + CANN_PREPARE_INPUTBUFFER(prepare, const_cast(src2.data), src2.rows * src2.step); + CANN_PREPARE_OUTPUTBUFFER(prepare, const_cast(dst.data), dst.rows * dst.step); + + CV_ACL_SAFE_CALL(aclopCompileAndExecute( + op, prepare.inputDesc_.size(), prepare.inputDesc_.data(), prepare.inputBuffers_.data(), + prepare.outputDesc_.size(), prepare.outputDesc_.data(), prepare.outputBuffers_.data(), + prepare.opAttr_, ACL_ENGINE_SYS, ACL_COMPILE_SYS, NULL, rawStream)); + if (rawStream == nullptr) + CV_ACL_SAFE_CALL(aclrtSynchronizeStream(rawStream)); + else + { + stream.addToAsyncRelease(src1); + stream.addToAsyncRelease(src2); + stream.addToAsyncRelease(dst); + } +} + +void transNCHWToNHWC(const AclMat& src, AclMat& dst, AclStream& stream) +{ + CannPreparation prepare; + CANN_PREPARE_ADD_ATTR(prepare, String, "src_format", "NCHW"); + CANN_PREPARE_ADD_ATTR(prepare, String, "dst_format", "NHWC"); + + int64_t dimSrc[] = {1, src.channels(), src.rows, src.cols}; + int64_t dimDst[] = {1, dst.rows, dst.cols, dst.channels()}; + + CANN_PREPARE_INPUTDESC(prepare, getACLType(src.depth()), sizeof(dimSrc) / sizeof(dimSrc[0]), + dimSrc, ACL_FORMAT_NCHW); + CANN_PREPARE_OUTPUTDESC(prepare, getACLType(dst.depth()), sizeof(dimDst) / sizeof(dimDst[0]), + dimDst, ACL_FORMAT_NHWC); + + CANN_PREPARE_INPUTBUFFER(prepare, const_cast(src.data), src.rows * src.step); + CANN_PREPARE_OUTPUTBUFFER(prepare, const_cast(dst.data), dst.rows * dst.step); + + aclrtStream rawStream = AclStreamAccessor::getStream(stream); + + CV_ACL_SAFE_CALL(aclopCompileAndExecute("TransData", prepare.inputDesc_.size(), + prepare.inputDesc_.data(), prepare.inputBuffers_.data(), + prepare.outputDesc_.size(), prepare.outputDesc_.data(), + prepare.outputBuffers_.data(), prepare.opAttr_, + ACL_ENGINE_SYS, ACL_COMPILE_SYS, NULL, rawStream)); + if (rawStream == nullptr) + CV_ACL_SAFE_CALL(aclrtSynchronizeStream(rawStream)); + else + { + stream.addToAsyncRelease(src); + stream.addToAsyncRelease(dst); + } +} + +aclDataType getACLType(int opencvdepth) +{ + switch (opencvdepth) + { + case CV_8S: + return ACL_INT8; + case CV_16S: + return ACL_INT16; + case CV_8U: + return ACL_UINT8; + case CV_16U: + return ACL_UINT16; + case CV_32S: + return ACL_INT32; + case CV_64F: + return ACL_DOUBLE; + case CV_16F: + return ACL_FLOAT16; + default: + return ACL_DT_UNDEFINED; + } +} + +} // namespace cann +} // namespace cv diff --git a/modules/cannarithm/src/element_operations.cpp b/modules/cannarithm/src/element_operations.cpp new file mode 100644 index 00000000000..f23323bf384 --- /dev/null +++ b/modules/cannarithm/src/element_operations.cpp @@ -0,0 +1,165 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" +#include + +namespace cv +{ +namespace cann +{ +void opMatMat(AclMat&, AclMat&, AclMat&, const char*, AclStream& stream = AclStream::Null()); +void opMatMat(AclMat& src1, AclMat& src2, AclMat& dst, const char* op, AclStream& stream) +{ + aclTwoInputs(src1, src2, dst, op, stream); +} + +void opMatScalar(AclMat&, AclMat&, bool, Scalar, const char*, + AclStream& stream = AclStream::Null()); +void opMatScalar(AclMat& src, AclMat& dst, bool inv, Scalar s, const char* op, AclStream& stream) +{ + Mat scMat(1, 1, src.type(), s); + AclMat scAclMat; + scAclMat.upload(scMat); + if (inv) + aclTwoInputs(scAclMat, src, dst, op, stream); + else + aclTwoInputs(src, scAclMat, dst, op, stream); +} + +void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, float scale, int dtype, + const char* op, AclStream& stream = AclStream::Null()); +void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, float scale, int dtype, + const char* op, AclStream& stream) +{ + const int kind1 = _src1.kind(); + const int kind2 = _src2.kind(); + + const bool isScalar1 = (kind1 == _InputArray::MATX); + const bool isScalar2 = (kind2 == _InputArray::MATX); + + AclMat src1, src2; + + if (!isScalar1) + src1 = getInputMat(_src1, stream); + + if (!isScalar2) + src2 = getInputMat(_src2, stream); + + Mat scalar; + if (isScalar1) + scalar = _src1.getMat(); + else if (isScalar2) + scalar = _src2.getMat(); + + Scalar val; + if (!scalar.empty()) + { + CV_Assert(scalar.total() <= 4); + scalar.convertTo(Mat_(scalar.rows, scalar.cols, &val[0]), CV_64F); + } + + const int sdepth = src1.empty() ? src2.depth() : src1.depth(); + const int cn = src1.empty() ? src2.channels() : src1.channels(); + const Size size = src1.empty() ? src2.size() : src1.size(); + + if (dtype < 0) + dtype = sdepth; + + const int ddepth = CV_MAT_DEPTH(dtype); + + CV_Assert(sdepth <= CV_64F && ddepth <= CV_64F); + CV_Assert(!scalar.empty() || (src2.depth() == src1.depth() && src2.size() == src1.size())); + + AclMat dst = getOutputMat(_dst, size.height, size.width, CV_MAKE_TYPE(ddepth, cn)); + + if (isScalar1) + opMatScalar(src2, dst, true, val, op, stream); + else if (isScalar2) + opMatScalar(src1, dst, false, val, op, stream); + else + opMatMat(src1, src2, dst, op, stream); + + // TODO implement emtpy for AclMat in InputArray + AclMat mask = getInputMat(_mask, stream); + if (!mask.empty()) + { + int mtype = mask.type(); + + CV_Assert((mtype == CV_8UC1 || mtype == CV_8SC1) && mask.size() == size); + // TODO use MaskSelect? + AclMat formatedMask; + if (mask.depth() != dst.depth()) + mask.convertTo(formatedMask, dst.depth()); + else + formatedMask = mask; + + AclMat expandedMask; + if (dst.channels() != 1) + formatedMask.expandTo(expandedMask, dst.channels()); + else + expandedMask = formatedMask; + + // TODO call DIV before expand? + AclMat divRet; + arithm_op(expandedMask, expandedMask, divRet, noArray(), 1, -1, "Div", stream); + AclMat dstCopy = dst; + // TODO dst memory and dskCopy mempry point to a same memory area, seems no harm yet. + arithm_op(dstCopy, divRet, dst, noArray(), 1, -1, "Mul", stream); + } + + if(scale != 1) + { + AclMat dstCpy = dst; + AclFloatAttribute scaleOP("value", scale); + std::vector attrs{&scaleOP}; + aclOneInput(dstCpy, dst, "Muls", stream, attrs); + } + + syncOutput(dst, _dst); +} + +void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, + AclStream& stream) +{ + arithm_op(src1, src2, dst, mask, 1, dtype, "Add", stream); +} + +void subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, + AclStream& stream) +{ + arithm_op(src1, src2, dst, mask, 1, dtype, "Sub", stream); +} + +void multiply(InputArray src1, InputArray src2, OutputArray dst, float scale, int dtype, AclStream& stream) +{ + arithm_op(src1, src2, dst, noArray(), scale, dtype, "Mul", stream); +} + +void divide(InputArray src1, InputArray src2, OutputArray dst, float scale, int dtype, AclStream& stream) +{ + arithm_op(src1, src2, dst, noArray(), scale, dtype, "Div", stream); +} + +void bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, + AclStream& stream) +{ + arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseAnd", stream); +} + +void bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, + AclStream& stream) +{ + arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseOr", stream); +} + +void bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, + AclStream& stream) +{ + arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseXor", stream); +} + + +} // namespace cann +} // namespace cv diff --git a/modules/cannarithm/src/precomp.hpp b/modules/cannarithm/src/precomp.hpp new file mode 100644 index 00000000000..1541ec80a69 --- /dev/null +++ b/modules/cannarithm/src/precomp.hpp @@ -0,0 +1,16 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef __OPENCV_PRECOMP_H__ +#define __OPENCV_PRECOMP_H__ + +#include +#include +#include "opencv2/cann.hpp" +#include "opencv2/cann_prepare.hpp" +#include "opencv2/acl_stream_accessor.hpp" +#include "opencv2/cann_call.hpp" +#include "opencv2/cann_arithm.hpp" + +#endif /* __OPENCV_PRECOMP_H__ */ diff --git a/modules/cannarithm/test/test_cann.cpp b/modules/cannarithm/test/test_cann.cpp new file mode 100644 index 00000000000..6c2e65beefe --- /dev/null +++ b/modules/cannarithm/test/test_cann.cpp @@ -0,0 +1,227 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" +#include + +namespace opencv_test +{ +namespace +{ + +class DummyAllocator : public AclMat::Allocator +{ +public: + bool allocate(cv::cann::AclMat* mat, int rows, int cols, size_t elemSize) CV_OVERRIDE + { + CV_UNUSED(rows); + CV_UNUSED(cols); + CV_UNUSED(elemSize); + mat->data = (uchar*)0x12345; + mat->refcount = (int*)cv::fastMalloc(sizeof(int)); + return true; + } + void free(cv::cann::AclMat* mat) CV_OVERRIDE + { + mat->data = (uchar*)0x54321; + cv::fastFree(mat->refcount); + } +}; + +TEST(AclMat, Construct) +{ + cv::cann::setDevice(0); + // 1 Default constructor. + AclMat defaultAclMat; + AclMat::Allocator* defaultAllocator = AclMat::defaultAllocator(); + ASSERT_EQ(defaultAclMat.allocator, defaultAllocator); + + // 2 get & set allocator. + DummyAllocator dummyAllocator; + AclMat::setDefaultAllocator(&dummyAllocator); + ASSERT_EQ(defaultAclMat.defaultAllocator(), &dummyAllocator); + AclMat::setDefaultAllocator(defaultAllocator); + + // 3 constructs AclMat of the specified size and type + AclMat specifiedSizeAclMat1(5, 6, CV_8UC3); + AclMat specifiedSizeAclMat2(Size(300, 200), CV_64F); + + ASSERT_EQ(specifiedSizeAclMat1.rows, 5); + ASSERT_EQ(specifiedSizeAclMat1.cols, 6); + ASSERT_EQ(specifiedSizeAclMat1.depth(), CV_8U); + ASSERT_EQ(specifiedSizeAclMat1.channels(), 3); + + ASSERT_EQ(specifiedSizeAclMat2.cols, 300); + ASSERT_EQ(specifiedSizeAclMat2.rows, 200); + ASSERT_EQ(specifiedSizeAclMat2.depth(), CV_64F); + ASSERT_EQ(specifiedSizeAclMat2.channels(), 1); + + // 4 constructs AclMat and fills it with the specified value s + srand((unsigned int)(time(NULL))); + Scalar sc(rand() % 256, rand() % 256, rand() % 256, rand() % 256); + + Mat scalarToMat(7, 8, CV_8UC3, sc); + AclMat scalarToAclMat1(7, 8, CV_8UC3, sc); + Mat scalarToMatChecker; + scalarToAclMat1.download(scalarToMatChecker); + + EXPECT_MAT_NEAR(scalarToMat, scalarToMatChecker, 0.0); + + AclMat scalarToAclMat2(Size(123, 345), CV_32S); + + ASSERT_EQ(scalarToAclMat1.rows, 7); + ASSERT_EQ(scalarToAclMat1.cols, 8); + ASSERT_EQ(scalarToAclMat1.depth(), CV_8U); + ASSERT_EQ(scalarToAclMat1.channels(), 3); + + ASSERT_EQ(scalarToAclMat2.cols, 123); + ASSERT_EQ(scalarToAclMat2.rows, 345); + ASSERT_EQ(scalarToAclMat2.depth(), CV_32S); + ASSERT_EQ(scalarToAclMat2.channels(), 1); + + // 5 constructor for AclMat headers pointing to user-allocated data + void* userAllocatedData = malloc(1); + AclMat userAllocatedAclMat1(9, 10, CV_16SC2, userAllocatedData); + AclMat userAllocatedAclMat2(Size(1024, 2048), CV_16F, userAllocatedData); + + ASSERT_EQ(userAllocatedAclMat1.rows, 9); + ASSERT_EQ(userAllocatedAclMat1.cols, 10); + ASSERT_EQ(userAllocatedAclMat1.depth(), CV_16S); + ASSERT_EQ(userAllocatedAclMat1.channels(), 2); + ASSERT_EQ(userAllocatedAclMat1.data, userAllocatedData); + + ASSERT_EQ(userAllocatedAclMat2.cols, 1024); + ASSERT_EQ(userAllocatedAclMat2.rows, 2048); + ASSERT_EQ(userAllocatedAclMat2.depth(), CV_16F); + ASSERT_EQ(userAllocatedAclMat2.channels(), 1); + ASSERT_EQ(userAllocatedAclMat1.data, userAllocatedData); + + // 6 builds AclMat from host memory + Scalar sc2(rand() % 256, rand() % 256, rand() % 256, rand() % 256); + Mat randomMat(7, 8, CV_8UC3, sc2); + InputArray arr = randomMat; + + AclMat fromInputArray(arr); + Mat randomMatChecker; + fromInputArray.download(randomMatChecker); + EXPECT_MAT_NEAR(randomMat, randomMatChecker, 0.0); + + cv::cann::resetDevice(); +} + +TEST(AclMat, RefCount) +{ + DummyAllocator dummyAllocator; + AclMat* mat = new AclMat(1, 1, CV_8U, &dummyAllocator); + ASSERT_EQ(*(mat->refcount), 1); + ASSERT_EQ(mat->data, (uchar*)0x12345); + + AclMat* copy1 = new AclMat(*mat); + ASSERT_EQ(mat->refcount, copy1->refcount); + ASSERT_EQ(*(copy1->refcount), 2); + + AclMat* copy2 = new AclMat(*copy1); + ASSERT_EQ(mat->refcount, copy2->refcount); + ASSERT_EQ(*(copy2->refcount), 3); + + delete copy1; + ASSERT_EQ(mat->data, (uchar*)0x12345); + ASSERT_EQ(*(mat->refcount), 2); + + delete copy2; + ASSERT_EQ(mat->data, (uchar*)0x12345); + ASSERT_EQ(*(mat->refcount), 1); + + delete mat; +} + +TEST(AclMat, Assignment) +{ + DummyAllocator dummyAllocator; + AclMat mat1; + AclMat mat2(3, 4, CV_8SC1, &dummyAllocator); + mat1 = mat2; + + ASSERT_EQ(mat1.rows, 3); + ASSERT_EQ(mat1.cols, 4); + ASSERT_EQ(mat1.depth(), CV_8S); + ASSERT_EQ(mat1.channels(), 1); + ASSERT_EQ(mat1.data, (uchar*)0x12345); +} + +TEST(AclMat, SetTo) +{ + cv::cann::setDevice(0); + + srand((unsigned int)(time(NULL))); + Scalar sc(rand() % 256, rand() % 256, rand() % 256, rand() % 256); + + AclMat aclMat(2, 2, CV_8UC4); + aclMat.setTo(sc); + Mat mat(2, 2, CV_8UC4, sc); + Mat checker; + aclMat.download(checker); + + EXPECT_MAT_NEAR(mat, checker, 0.0); + + cv::cann::resetDevice(); +} + +TEST(AclMat, ConvertTo) +{ + cv::cann::setDevice(0); + + srand((unsigned int)(time(NULL))); + Scalar sc(rand() % 256, rand() % 256, rand() % 256, rand() % 256); + + AclMat aclMat(2, 2, CV_8UC4, sc); + AclMat convertedAclMat; + aclMat.convertTo(convertedAclMat, CV_16S); + Mat mat(2, 2, CV_16SC4, sc); + Mat checker; + convertedAclMat.download(checker); + + EXPECT_MAT_NEAR(mat, checker, 0.0); + + cv::cann::resetDevice(); +} + +TEST(AclMat, ExpandTo) +{ + cv::cann::setDevice(0); + + Scalar sc1(1); + Scalar sc2(1, 1, 1); + AclMat aclMat(10, 10, CV_8UC1, sc1); + Mat mat(10, 10, CV_8UC3, sc2); + AclMat expandedAclMat; + aclMat.expandTo(expandedAclMat, 3); + Mat checker; + expandedAclMat.download(checker); + + EXPECT_MAT_NEAR(mat, checker, 0.0); + + cv::cann::resetDevice(); +} + +TEST(AclStream, AsyncProcess) +{ + cv::cann::setDevice(0); + + DummyAllocator dummyAllocator; + AclMat* mat = new AclMat(&dummyAllocator); + AclStream stream; + + stream.addToAsyncRelease(*mat); + stream.waitForCompletion(); + + // TODO: need sync point to check: + // 1. mat->data is not freed after it add to async release list even mat is deleted. + // 2. mat->data is freed after callback is called. + + cv::cann::resetDevice(); +} + +} // namespace +} // namespace opencv_test diff --git a/modules/cannarithm/test/test_element_operation.cpp b/modules/cannarithm/test/test_element_operation.cpp new file mode 100644 index 00000000000..db20321d43f --- /dev/null +++ b/modules/cannarithm/test/test_element_operation.cpp @@ -0,0 +1,137 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" +#include "opencv2/ts/cuda_test.hpp" +#include "opencv2/cann_arithm.hpp" + +namespace opencv_test +{ +namespace +{ +// Random Generator +Mat randomMat(int w, int h, int dtype) +{ + Mat rnMat(w, h, dtype); + RNG rng; + rng.fill(rnMat, RNG::UNIFORM, 0.f, 1.f); + return rnMat; +} +cv::Scalar randomScalar() +{ + RNG rng; + return Scalar(rng, rng.next(), rng.next(), rng.next()); +} +float randomNum() +{ + RNG rng; + float rdnNum = float(rng.uniform(0.3, 3.0)); + return rdnNum; +} +Mat genMask() +{ + Mat mask = Mat::zeros(Size(10, 10), CV_8UC1); + rectangle(mask, cv::Rect(5, 5, 3, 3), Scalar(255), -1); + return mask; +} + +#define DEVICE_ID 0 + +/****************TEST CASE***************/ +// MAT & Mat +#define TEST_MAT_OP_MAT(idx, op, ...) \ + TEST(ELEMENTWISE_OP, MAT_##op##_MAT_##idx) \ + { \ + cv::cann::setDevice(DEVICE_ID); \ + \ + Mat cpuMat1 = randomMat(10, 10, CV_32SC3); \ + Mat cpuMat2 = randomMat(10, 10, CV_32SC3); \ + Mat cpuDst; \ + cv::op(cpuMat1, cpuMat2, cpuDst, __VA_ARGS__); \ + \ + AclMat mat1, mat2; \ + mat1.upload(cpuMat1); \ + mat2.upload(cpuMat2); \ + AclMat dst, dstS; \ + cv::cann::op(mat1, mat2, dst, __VA_ARGS__); \ + Mat npuDst, npuDstS; \ + dst.download(npuDst); \ + AclStream stream; \ + cv::cann::op(mat1, mat2, dstS, __VA_ARGS__, stream); \ + stream.waitForCompletion(); \ + dstS.download(npuDstS); \ + \ + EXPECT_MAT_NEAR(npuDst, cpuDst, 0.0); \ + EXPECT_MAT_NEAR(npuDst, npuDstS, 0.0); \ + cv::cann::resetDevice(); \ + } + +TEST_MAT_OP_MAT(1, add, noArray(), -1); +TEST_MAT_OP_MAT(1, subtract, noArray(), -1); +TEST_MAT_OP_MAT(1, multiply, 1, -1); +TEST_MAT_OP_MAT(1, divide, 1, -1); +TEST_MAT_OP_MAT(1, bitwise_and, noArray()); +TEST_MAT_OP_MAT(1, bitwise_or, noArray()); +TEST_MAT_OP_MAT(1, bitwise_xor, noArray()); + +TEST_MAT_OP_MAT(2, add, genMask(), CV_32SC3); +TEST_MAT_OP_MAT(2, subtract, genMask(), CV_32SC3); +TEST_MAT_OP_MAT(2, multiply, randomNum(), -1); +TEST_MAT_OP_MAT(2, divide, randomNum(), -1); +TEST_MAT_OP_MAT(2, bitwise_and, genMask()); +TEST_MAT_OP_MAT(2, bitwise_or, genMask()); +TEST_MAT_OP_MAT(2, bitwise_xor, genMask()); + +// SCALAR & MAT +#define TEST_MAT_OP_SCALAR(idx, op, ...) \ + TEST(ELEMENTWISE_OP, MAT_##op##_SCALAR_##idx) \ + { \ + Scalar cpuS1 = randomScalar(); \ + Scalar cpuS2 = randomScalar(); \ + Mat cpuMatS1(10, 10, CV_32SC3, cpuS1); \ + Mat cpuMatS2(10, 10, CV_32SC3, cpuS2); \ + Mat cpuDst, cpuDstC; \ + cv::op(cpuMatS1, cpuMatS2, cpuDst, __VA_ARGS__); \ + cv::op(cpuMatS2, cpuMatS1, cpuDstC, __VA_ARGS__); \ + cv::cann::setDevice(DEVICE_ID); \ + \ + AclMat mat; \ + mat.upload(cpuMatS2); \ + AclMat dst, dstS, dstC, dstCS; \ + cv::cann::op(cpuS1, cpuMatS2, dst, __VA_ARGS__); \ + cv::cann::op(cpuMatS2, cpuS1, dstC, __VA_ARGS__); \ + Mat npuDst, npuDstS, npuDstC, npuDstCS; \ + dst.download(npuDst); \ + dstC.download(npuDstC); \ + AclStream stream; \ + cv::cann::op(cpuS1, cpuMatS2, dstS, __VA_ARGS__, stream); \ + cv::cann::op(cpuMatS2, cpuS1, dstCS, __VA_ARGS__, stream); \ + stream.waitForCompletion(); \ + dstS.download(npuDstS); \ + dstCS.download(npuDstCS); \ + \ + EXPECT_MAT_NEAR(npuDst, npuDstS, 0.0); \ + EXPECT_MAT_NEAR(npuDst, cpuDst, 0.0); \ + EXPECT_MAT_NEAR(npuDstC, npuDstCS, 0.0); \ + EXPECT_MAT_NEAR(npuDstC, cpuDstC, 0.0); \ + \ + cv::cann::resetDevice(); \ + } +TEST_MAT_OP_SCALAR(1, add, noArray(), -1); +TEST_MAT_OP_SCALAR(1, subtract, noArray(), -1); +TEST_MAT_OP_SCALAR(1, multiply, 1, -1); +TEST_MAT_OP_SCALAR(1, divide, 1, -1); +TEST_MAT_OP_SCALAR(1, bitwise_and, noArray()); +TEST_MAT_OP_SCALAR(1, bitwise_or, noArray()); +TEST_MAT_OP_SCALAR(1, bitwise_xor, noArray()); + +TEST_MAT_OP_SCALAR(2, add, genMask(), CV_32SC3); +TEST_MAT_OP_SCALAR(2, subtract, genMask(), CV_32SC3); +TEST_MAT_OP_SCALAR(2, bitwise_and, genMask()); +TEST_MAT_OP_SCALAR(2, bitwise_or, genMask()); +TEST_MAT_OP_SCALAR(2, bitwise_xor, genMask()); +TEST_MAT_OP_SCALAR(2, multiply, randomNum(), -1); +TEST_MAT_OP_SCALAR(2, divide, randomNum(), -1); +} // namespace +} // namespace opencv_test diff --git a/modules/cannarithm/test/test_main.cpp b/modules/cannarithm/test/test_main.cpp new file mode 100644 index 00000000000..14bd66005ec --- /dev/null +++ b/modules/cannarithm/test/test_main.cpp @@ -0,0 +1,21 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" + +class CannEnvironment : public ::testing::Environment +{ +public: + virtual ~CannEnvironment() = default; + virtual void SetUp() CV_OVERRIDE { initAcl(); } + virtual void TearDown() CV_OVERRIDE { finalizeAcl(); } +}; + +static void initTests() +{ + CannEnvironment* cannEnv = new CannEnvironment(); + ::testing::AddGlobalTestEnvironment(cannEnv); +} + +CV_TEST_MAIN("cannarithm", initTests()); diff --git a/modules/cannarithm/test/test_precomp.hpp b/modules/cannarithm/test/test_precomp.hpp new file mode 100644 index 00000000000..e95abb86e1c --- /dev/null +++ b/modules/cannarithm/test/test_precomp.hpp @@ -0,0 +1,16 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef __OPENCV_TEST_PRECOMP_HPP__ +#define __OPENCV_TEST_PRECOMP_HPP__ + +#include "opencv2/ts.hpp" +#include "opencv2/cann.hpp" + +using namespace cv::cann; +#undef EXPECT_MAT_NEAR +#define EXPECT_MAT_NEAR(m1, m2, eps) EXPECT_PRED_FORMAT3(cvtest::assertMatNear, m1, m2, eps) +#define ASSERT_MAT_NEAR(m1, m2, eps) ASSERT_PRED_FORMAT3(cvtest::assertMatNear, m1, m2, eps) + +#endif