diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..504e12c2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,35 @@ +*~ +*.egg/ +*.pyc +*.pyo +*.cpp +*.so +cscope.* +tags +build +\#*\# +.\#* +.coverage +.eggs/ +_readthedocs_build +ideep.egg-info/ +dist/ +htmlcov/ +.idea/ +ideep/python/api/c_api.py +ideep/python/api/support.py +ideep/python/api/memory.py +ideep/python/api/inner_product_*.py +ideep/python/api/reorder.py +ideep/python/api/convolution_*.py +ideep/python/api/eltwise_*.py +ideep/python/api/concat.py +ideep/python/api/lrn_*.py +ideep/python/api/pooling_*.py +ideep/python/api/bn_*.py +ideep/python/api/view.py +ideep/python/api/sum.py +ideep/python/api/cosim_dump.py +ideep/python/api/dropout.py +ideep/python/mdarray.py +external/mkldnn/source diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..d66e8c35 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "mkl-dnn"] + path = mkl-dnn + url = https://github.com/01org/mkl-dnn.git diff --git a/.pep8 b/.pep8 new file mode 100644 index 00000000..3051e81f --- /dev/null +++ b/.pep8 @@ -0,0 +1,3 @@ +[pep8] +exclude=caffe_pb*,.eggs,*.egg,build + diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..2956417a --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2018 Intel Corporation. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 00000000..4646dae2 --- /dev/null +++ b/README.md @@ -0,0 +1,51 @@ +# iDeep: Intel Deep Learning Package + +Intel Deep Learning Package (iDeep) is an open source performance library of primitives for accelerating deep learning frameworks on Intel Architecture. iDeep provides user-friendly API and highly tuned implementations for DNN standard routines. + +The package provides C and Python API. + +## iDeep Python Package (ideep4py) Requirements + +We recommend these Linux distributions. +- Ubuntu 14.04/16.04 LTS 64bit +- CentOS 7 64bit + +The following versions of Python can be used: +- 2.7.5+, 3.5.2+, and 3.6.0+ + +Above recommended environments are tested. We cannot guarantee that ideep4py works on other environments including Windows and macOS, even if ideep4py looks running correctly. + + +Minimum requirements: +- Numpy 1.9+ +- Six 1.9+ +- Swig 3.0.12 +- Glog 0.3.5 +- Cmake 2.8.0 +- Doxygen 1.8.5 +- C++ compiler with C++11 standard support + +## Installation of ideep4py + +If you use old ``setuptools``, upgrade it: + +``` +pip install -U setuptools +``` + +Then, install ideep from the source code: +``` +python setup.py install +``` + +Use pip to uninstall ideep4py: + +```sh +$ pip uninstall ideep4py +``` + +## More information +- ideep github: https://github.com/intel/ideep.git + +## License +MIT License (see `LICENSE` file). diff --git a/dlcp/Makefile b/dlcp/Makefile new file mode 100644 index 00000000..e781bdfd --- /dev/null +++ b/dlcp/Makefile @@ -0,0 +1,88 @@ +################################################################################## + # # Copyright (c) 2018 Intel Corporation. + # # + # # Permission is hereby granted, free of charge, to any person obtaining a copy + # # of this software and associated documentation files (the "Software"), to deal + # # in the Software without restriction, including without limitation the rights + # # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + # # copies of the Software, and to permit persons to whom the Software is + # # furnished to do so, subject to the following conditions: + # # + # # The above copyright notice and this permission notice shall be included in + # # all copies or substantial portions of the Software. + # # + # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + # # THE SOFTWARE. + # # +################################################################################## + + +LOCAL_DIR = $(shell pwd) + +DLCP_CXX ?= icpc + +FLAG_DEBUG ?= 0 + +AR = ar +CXXFLAGS += -fPIC + +ifneq (,$(findstring icpc, $(DLCP_CXX))) + CXX = $(DLCP_CXX) + CXXFLAGS += -std=c++11 + LDFLAGS += -static-intel +else + $(error Unsupported compiler $(DLCP_CXX)) +endif + +ifeq ($(FLAG_DEBUG), 1) + CXXFLAGS += -O0 -g +else + CXXFLAGS += -O2 +endif + +ifneq (,$(findstring icpc,$(CXX))) + LDFLAGS += -static-intel +endif + +ifneq (,$(findstring icpc,$(CXX))) + CXXFLAGS += -qopenmp +endif + +COMPRESSION_LIB = lib/libdlcomp.so +COMPRESSION_LIBNAME = libdlcomp.so +SRC_DIR = $(LOCAL_DIR)/src +INCL_DIR = $(LOCAL_DIR)/include $(LOCAL_DIR)/src + +TARGET = libdlcomp.so +INCS = -I$(INCLUDE_DIR) -I$(SRC_DIR) +LDFLAGS += -ldl -lrt -lpthread -liomp5 +CXXFLAGS += $(addprefix -I,$(INCL_DIR)) + + +SRCS += src/dl_compression_impl.cpp +SRCS += src/dl_compression_util.cpp +SRCS += src/dl_compression.cpp + +OBJS := $(SRCS:.cpp=.o) + + +all: $(TARGET) + +$(TARGET): $(COMPRESSION_LIB) + +$(COMPRESSION_LIB): $(OBJS) + $(CXX) $(CXXFLAGS) -shared -Wl,-soname,$(COMPRESSION_LIBNAME) -o $(COMPRESSION_LIB) $(OBJS) $(LDFLAGS) + +$(SRC_DIR)/%.o: $(SRC_DIR)/%.cpp + $(CXX) -c $(CXXFLAGS) $< -o $@ + +clean: + rm -f $(SRC_DIR)/*.o $(COMPRESSION_LIB) + +cleanall: clean + diff --git a/dlcp/env_setup.sh b/dlcp/env_setup.sh new file mode 100755 index 00000000..6ecaef56 --- /dev/null +++ b/dlcp/env_setup.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +source /opt/intel/bin/compilervars.sh intel64 diff --git a/dlcp/include/dl_compression.h b/dlcp/include/dl_compression.h new file mode 100644 index 00000000..e081461b --- /dev/null +++ b/dlcp/include/dl_compression.h @@ -0,0 +1,193 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef DL_COMPRESSION_H +#define DL_COMPRESSION_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + DL_COMP_NONE = 0, + DL_COMP_DFP = 1, +} dl_comp_method_t; + +typedef enum { + DL_COMP_OK = 0, + DL_COMP_FAIL = 1, + DL_COMP_FAIL_SRC_DATA_TYPE_NOT_SUPPORTED = 2, + DL_COMP_FAIL_RATIO_NOT_SUPPORTED = 3, + DL_COMP_FAIL_COMP_METHOD_NOT_SUPPORTED = 4, + DL_COMP_FAIL_INVALID_COMPRESSED_FORMAT = 5, + DL_COMP_FAIL_NOT_SUPPORTED = 6 +} dl_comp_return_t; + +typedef enum { + DL_COMP_INT8 = 0, + DL_COMP_FLOAT16 = 1, + DL_COMP_FLOAT32 = 2, + DL_COMP_FLOAT64 = 3, +} dl_comp_data_type_t; + +// Compress src buffer into dst buffer. +// +// Parameters: +// src [in] pointer to src buffer +// dst [out] pointer to dst buffer +// dataCount [in] num of element needs to be compressed +// diff [in/out] place the precision lost from the last compress +// return the precision lost from this compress. +// If you don't care about lost precision, you can +// set it NULL pointer. +// src_data_type [in/out] data type in src buffer +// comp_ratio [in] compression ratio, it should only be 2,4,8,16,32. +// e.g. If we compress FLOAT32 to INT8, the comp_ratio +// is 4. +// method [in] compression algorithm +// Returns: +// compress successful or not. DL_COMP_OK means successful, otherwise not. +dl_comp_return_t dl_comp_compress_buffer( const void *src, + void *dst, + size_t dataCount, + void *diff, + dl_comp_data_type_t src_data_type, + size_t comp_ratio, + dl_comp_method_t method ); + +// de-Compress src buffer into dst buffer. +// +// Parameters: +// src [in] pointer to src buffer +// dst [out] pointer to dst buffer +// dataCount [in] num of element needs to be de-Compressed +// Returns: +// de-compress successful or not. +dl_comp_return_t dl_comp_decompress_buffer( const void *src, + void *dst, + size_t dataCount ); + +// Sum up compressed data from two input buffer and put the result +// in the outBuffer. +// +// Parameters: +// inBuffer1 [in] pointer to quantized data vector +// inBuffer2 [in] pointer to quantized data vector +// dataCount [in] num of element in inBuffer1 and inBuffer2 +// needs to be sum up. +// outBuffer [out] pointer to quantized data vector and the result +// will be placed in this inoutBuffer. +// Returns: +// sum up successful or not. +dl_comp_return_t dl_comp_compressed_buffer_sum( const void *inBuffer1, + const void *inBuffer2, + size_t dataCount, + void *outBuffer ); + +// Get compress meta data info(block). Some operation like multi-node all-reduce +// will divide payload into parts to enhance communication efficiency.This api +// is to notify of the compressed meta data info: The minimum slicing granularity. +// Its size is related with src DataType, comp_ratio, compression algorithm +// +// Parameters: +// srcDataType [in] data type of src data before compression. +// comp_ratio [in] compression ratio +// method [in] compression algorithm +// Returns: +// N/A. +size_t dl_comp_get_sizeof_block( dl_comp_data_type_t src_data_type, + size_t comp_ratio, + dl_comp_method_t method ); + +// Sum up two buffer's compressed data and put the result in +// second buffer.Please attention here we use blockCount +// as input parameter. 1 block can contain multiple data. +// +// Parameters: +// inBuffer [in] pointer to quantized data +// inoutBuffer [in/out] pointer to quantized data. Result will +// be placed in this buffer. +// Returns: +// +dl_comp_return_t dl_comp_compressed_buffer_reduce_sum( const void *inBuffer, + void *inoutBuffer, + size_t blockCount ); + +// Util function for converting data count into block count. +// +// Prameters: +// dataCount [in] num of digit +// Returns: +// return corresponding num of block +size_t dl_comp_convert_block_count(size_t dataCount); + +// Util function to get how many elements in one block. +// Parameters: +// N/A +// Returns: +// return how many elements in one block +size_t dl_comp_get_elem_num_in_block(); + +// Check Running Environment. +// Parameters: +// N/A +// Returns: +// If check successful, return true. otherwise false. +// If false, pls disable quantization functionality. +// E.g. We sugguest not to use quantization on machine +// not support avx512 instructions, because there's no +// performance gain. +bool dl_comp_check_running_environ(); + +// Uitl function for compress float32 data to int8 +// Parameters: +// srcBuffer [in] src float32 data +// dstBuffer [out] dst int8 data +// diff: [in/out] precision lost in compression +// dataCount [in] data count +// Return: +// If successful, return 0, otherwise error code. +int dl_comp_compress_buffer_FLOAT32ToINT8( const void *srcBuffer, + void *dstBuffer, + void *diff, + size_t dataCount); + +// Util function for de-compress int8 to float32 +// Parameters: +// srcBuffer [in] contain int8 compressed data +// dstBuffer [out] de-comressed float32 data +// dataCount [in] data count +// Return: +// If successful, return 0, othersise error code. +int dl_comp_decompress_buffer_INT8ToFLOAT32(const void *srcBuffer, + void *dstBuffer, + size_t dataCount); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/dlcp/lib/.gitignore b/dlcp/lib/.gitignore new file mode 100644 index 00000000..da883b4b --- /dev/null +++ b/dlcp/lib/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +# +# Except this file +!.gitignore diff --git a/dlcp/src/dl_compression.cpp b/dlcp/src/dl_compression.cpp new file mode 100644 index 00000000..671a4175 --- /dev/null +++ b/dlcp/src/dl_compression.cpp @@ -0,0 +1,168 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include + +#include "dl_compression.h" +#include "dl_compression_impl.hpp" + + +dl_comp_return_t dl_comp_compress_buffer( const void *src, + void *dst, + size_t dataCount, + void *diff, + dl_comp_data_type_t src_data_type, + size_t comp_ratio, + dl_comp_method_t method ) +{ + // Parameter checking + if (src_data_type != DL_COMP_FLOAT32) { + return DL_COMP_FAIL_SRC_DATA_TYPE_NOT_SUPPORTED; + } + + if (comp_ratio != 4) { + return DL_COMP_FAIL_RATIO_NOT_SUPPORTED; + } + + if (method != DL_COMP_DFP) { + return DL_COMP_FAIL_COMP_METHOD_NOT_SUPPORTED; + } + + // Do compession + DLCompressBase *compInst = DLCompressBase::get_compression_instance(DL_COMP_DFP); + + return compInst->compress_buffer((float *)src, + (int8_t *)dst, + (float *)diff, + dataCount, + src == dst); +} + +dl_comp_return_t dl_comp_decompress_buffer( const void *src, + void *dst, + size_t dataCount ) +{ + dl_comp_head *compHead = (dl_comp_head *)src; + + if (compHead->magic != DL_COMP_HEAD_MAGIC) { + // This is a work-around for MLSL. Because in MPI_Test + // sometimes an already de-compressed buffer may be sent + // to compress lib to do de-compressed buffer. So we + // simply ignore it in this case. + return DL_COMP_OK; + } + + size_t blockCount = dataCount % DL_COMP_BLOCK_NUM == 0 ? (dataCount / DL_COMP_BLOCK_NUM) : (dataCount / DL_COMP_BLOCK_NUM + 1); + // do de-compression + DLCompressBase *compInst = DLCompressBase::get_compression_instance(DL_COMP_DFP); + + return compInst->decompress_buffer((const int8_t *)src, (float *)dst, blockCount); +} + +dl_comp_return_t dl_comp_compressed_buffer_sum( const void *inBuffer1, + const void *inBuffer2, + size_t dataCount, + void *outBuffer ) +{ + return DL_COMP_FAIL_NOT_SUPPORTED; +} + +size_t dl_comp_get_sizeof_block( dl_comp_data_type_t src_data_type, + size_t comp_ratio, + dl_comp_method_t method ) +{ + size_t blockSize = 0; + if (src_data_type == DL_COMP_FLOAT32 && + comp_ratio == 4 && + method == DL_COMP_DFP) { + blockSize = sizeof(int8_t) * DL_COMP_BLOCK_NUM + sizeof(dl_comp_head); + } + + return blockSize; +} + +size_t dl_comp_get_elem_num_in_block() +{ + return DL_COMP_BLOCK_NUM; +} + +dl_comp_return_t dl_comp_compressed_buffer_reduce_sum( const void *inBuffer, + void *inoutBuffer, + size_t blockCount ) +{ + DLCompressBase *compInst = DLCompressBase::get_compression_instance(DL_COMP_DFP); + + return compInst->compress_sum2((const int8_t *)inBuffer, (int8_t *)inoutBuffer, blockCount); +} + +size_t dl_comp_convert_block_count(size_t dataCount) +{ + size_t blockCount = dataCount % DL_COMP_BLOCK_NUM == 0 ? + (dataCount / DL_COMP_BLOCK_NUM) : (dataCount / DL_COMP_BLOCK_NUM + 1); + return blockCount; +} + +bool dl_comp_check_running_environ() +{ + // Currently, we only check whether avx512 instruction supported. + return dl_comp_check_avx512_supported(); +} + +int dl_comp_compress_buffer_FLOAT32ToINT8( const void *srcBuffer, + void *dstBuffer, + void *diff, + size_t dataCount) +{ + DLCompressBase *compInst = DLCompressBase::get_compression_instance(DL_COMP_DFP); + + dl_comp_return_t ret = compInst->compress_buffer((float *)srcBuffer, + (int8_t *)dstBuffer, + (float *)diff, + dataCount, + srcBuffer == dstBuffer); + return ret; +} + +int dl_comp_decompress_buffer_INT8ToFLOAT32(const void *srcBuffer, + void *dstBuffer, + size_t dataCount) +{ + dl_comp_head *compHead = (dl_comp_head *)srcBuffer; + + if (compHead->magic != DL_COMP_HEAD_MAGIC) { + // This is a work-around for MLSL. Because in MPI_Test + // sometimes an already de-compressed buffer may be sent + // to compress lib to do de-compressed buffer. So we + // simply ignore it in this case. + return DL_COMP_OK; + } + + // do de-compression + size_t blockCount = dataCount % DL_COMP_BLOCK_NUM == 0 ? + (dataCount / DL_COMP_BLOCK_NUM) : (dataCount / DL_COMP_BLOCK_NUM + 1); + DLCompressBase *compInst = DLCompressBase::get_compression_instance(DL_COMP_DFP); + dl_comp_return_t ret = compInst->decompress_buffer((const int8_t *)srcBuffer, (float *)dstBuffer, blockCount); + + return ret; +} diff --git a/dlcp/src/dl_compression_impl.cpp b/dlcp/src/dl_compression_impl.cpp new file mode 100644 index 00000000..c51cb3b9 --- /dev/null +++ b/dlcp/src/dl_compression_impl.cpp @@ -0,0 +1,696 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include +#include +#include + +#include "dl_compression.h" +#include "dl_compression_impl.hpp" +#include "dl_compression_util.hpp" + +bool g_avx512_supported = dl_comp_check_avx512_supported(); + +bool dl_comp_check_avx512_supported() +{ + const unsigned long avx512_features = (_FEATURE_AVX512F | _FEATURE_AVX512CD | _FEATURE_AVX512VL | _FEATURE_AVX512BW); + return _may_i_use_cpu_feature( avx512_features ); +} + +DLCompressBase* DLCompressBase::get_compression_instance(dl_comp_method_t method) +{ + DLCompressBase *pInstance = NULL; + static DLCompressDFP dfpInstance; + + switch(method) { + case DL_COMP_DFP: + pInstance = &dfpInstance; + break; + + case DL_COMP_NONE: + + default: + pInstance = NULL; + DLCP_LOG(INFO, "Unsupported Compression Method"); + } + + return pInstance; +} + +dl_comp_return_t DLCompressDFP::compress_block(float *src, int8_t *dst, float *diff, size_t count, int *scale) +{ + // Do quantization + // only handle float buffer as src and int8_t as dst + float max_abs = 0.; + float max_abs_log2 = 0.; + float round_value, d_value; + int8_t decomp_value = 0; + + if (NULL != diff) { +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (size_t i = 0; i < count; ++i) { + src[i] += diff[i]; + } + } + + for (size_t i = 0; i < count; ++i) { + max_abs = std::max(max_abs, std::abs(src[i])); + } + + max_abs_log2 = std::log2f(max_abs); + // If max_log2 is equal to -inf, this means max_abs is 0. + // In this case, we set scale as 0. + if (max_abs_log2 * (-1.0) == std::numeric_limits::infinity()) { + *scale = 0; + } else { + *scale = 8*sizeof(int8_t) - ((int)std::ceil(max_abs_log2) + 1); + } + + float pow2_scale = std::pow(2, *scale); + + for (size_t i = 0; i < count; ++i) { + // It's corner case that the result value of src[i]*pow2_scale will be + // bigger than 127.5f. The value will rounded up to 128. This is out of range + // of int8_t. (-128 - 127) So we set it as 127. + round_value = std::round(src[i]*pow2_scale); + if (round_value <= 127.0f) { + decomp_value = (int8_t)round_value; + } else { + decomp_value = 127; + } + if (NULL != diff) { + d_value = ((float)decomp_value) / pow2_scale; + diff[i] = src[i] - d_value; + } + dst[i] = decomp_value; + } + + return DL_COMP_OK; +} + +dl_comp_return_t DLCompressDFP::avx512_compress_block(float *src, int8_t *dst, float *diff, size_t count, int *scale) +{ + // If count is smaller than 16 we use non-avx512 implementation + // 16 is the element number which one avx512 register can hold + if (count < DL_COMP_BLOCK_NUM) { + return compress_block(src, dst, diff, count, scale); + } + + + DLCP_ASSERT(count % 16 == 0, "count can't be divided by 16!"); + + // Do quantization + // Error FeedBack + if (NULL != diff) { + dl_comp_avx512_float_vector_add(diff, src, count); + } + + float max_abs = 0.; + float max_abs_log2 = 0.; + size_t group_size = 16; + __m512 max_vec = _mm512_set1_ps(0.0f); + + for (size_t idx = 0; idx < count; idx += group_size) { + __m512 float_vec = _mm512_loadu_ps(src+idx); + __m512 float_abs_vec = _mm512_abs_ps(float_vec); + __mmask16 cmp_mask = _mm512_cmp_ps_mask(max_vec, float_abs_vec, _CMP_GE_OS); + max_vec = _mm512_mask_mov_ps(float_abs_vec, cmp_mask, max_vec); + } + + max_abs = _mm512_reduce_max_ps(max_vec); + + max_abs_log2 = std::log2f(max_abs); + // If max_log2 is equal to -inf, this means max_abs is 0. + // In this case, we set scale as 0. + if (max_abs_log2 * (-1.0) == std::numeric_limits::infinity()) { + *scale = 0; + } else { + *scale = 8*sizeof(int8_t) - ((int)std::ceil(max_abs_log2) + 1); + } + + float pow2_scale = std::pow(2, *scale); + + float pow2_scale_inv = 1.0f / std::pow(2, *scale); + __m512 pow2_scale_v = _mm512_set1_ps(pow2_scale); + __m512 pow2_scale_inv_v = _mm512_set1_ps(pow2_scale_inv); + __mmask16 mask = _mm512_int2mask(0xFFFF); + float *f32_diff; + for (size_t idx = 0; idx < count; idx += group_size) { + float *f32_src = src + idx; + int8_t *i8_dst = dst + idx; + __m512 f32_src_v = _mm512_loadu_ps(f32_src); + __m512 f32_result_v = _mm512_mul_ps(f32_src_v, pow2_scale_v); + __m512i i32_round_v = _mm512_cvt_roundps_epi32(f32_result_v, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + // satruation has already been considered in cvt instruction + _mm512_mask_cvtsepi32_storeu_epi8(i8_dst, mask, i32_round_v); + if (NULL != diff) { + f32_diff = diff + idx; + __m512 f32_round_v = _mm512_cvt_roundepi32_ps(i32_round_v, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + __m512 f32_dequant_v = _mm512_mul_ps(f32_round_v, pow2_scale_inv_v); + __m512 f32_diff_v = _mm512_sub_ps(f32_src_v, f32_dequant_v); + _mm512_storeu_ps(f32_diff, f32_diff_v); + } + } + return DL_COMP_OK; +} + +dl_comp_return_t DLCompressDFP::compress_buffer(float *src, int8_t *dst, float *diff, size_t count, bool inPlace) +{ + dl_comp_return_t ret = DL_COMP_FAIL; + dl_comp_head *compHead = NULL; + int scale = 0; + size_t comp_block = 0; + for (size_t i = 0; i < count; i += DL_COMP_BLOCK_NUM) { + comp_block = (i + DL_COMP_BLOCK_NUM) < count ? DL_COMP_BLOCK_NUM : (count - i); + compHead = (dl_comp_head *)dst; + if (!inPlace) { + dst += sizeof(dl_comp_head); + } + if (!avx512_enabled_ || comp_block < DL_COMP_BLOCK_NUM) { + ret = compress_block(src, dst, diff, comp_block, &scale); + } else { + ret = avx512_compress_block(src, dst, diff, comp_block, &scale); + } + if (ret == DL_COMP_FAIL) { + return ret; + } + if (inPlace) { + memmove(dst+sizeof(dl_comp_head), dst, comp_block); + dst += sizeof(dl_comp_head); + } + compHead->magic = DL_COMP_HEAD_MAGIC; + compHead->exponent = scale; + compHead->payloadLen = comp_block; + dst += comp_block; + src += comp_block; + if (NULL != diff) { + diff += comp_block; + } + } + + return DL_COMP_OK; +} + +dl_comp_return_t DLCompressDFP::compress_buffer(float *src, int8_t *dst, size_t count, bool inPlace) +{ + dl_comp_return_t ret = compress_buffer(src, dst, NULL, count, inPlace); + return ret; +} + +dl_comp_return_t DLCompressDFP::decompress_buffer(const int8_t *src, float *dst, size_t blockCount) +{ + dl_comp_head *compHead = NULL; + dl_comp_return_t ret; + size_t count; + int scale; + const int8_t *origSrc = src; + float *origDst = dst; + int8_t decomp_block[DL_COMP_BLOCK_NUM]; + + + if (blockCount == 0) { + return DL_COMP_OK; + } + + do { + src = origSrc + (blockCount - 1) * (sizeof(dl_comp_head) + DL_COMP_BLOCK_NUM); + dst = origDst + (blockCount - 1) * DL_COMP_BLOCK_NUM; + compHead = (dl_comp_head *)src; + if (compHead->magic != DL_COMP_HEAD_MAGIC) { + return DL_COMP_FAIL_INVALID_COMPRESSED_FORMAT; + } + count = compHead->payloadLen; + scale = compHead->exponent; + if (blockCount == 1) { + memcpy(decomp_block, src + sizeof(dl_comp_head), count); + } + if (!avx512_enabled_) { + if (blockCount != 1) { + ret = decompress_block(src + sizeof(dl_comp_head), dst, count, scale); + } else { + ret = decompress_block(decomp_block, dst, count, scale); + } + } else { + if (blockCount != 1) { + ret = avx512_decompress_block(src + sizeof(dl_comp_head), dst, count, scale); + } else { + ret = avx512_decompress_block(decomp_block, dst, count, scale); + } + } + if (ret != DL_COMP_OK) { + return ret; + } + blockCount--; + } while (blockCount > 0); + + return ret; +} + +dl_comp_return_t DLCompressDFP::avx512_decompress_block(const int8_t *src, float *dst, size_t count, int scale) +{ + // If count is smaller than 16 we use non-avx512 implementation + //16 is the element number which one avx512 register can hold + if (count < DL_COMP_BLOCK_NUM) { + return decompress_block(src, dst, count, scale); + } + + DLCP_ASSERT(count % 16 == 0, "count can't be divided by 16!"); + + // Do de-quantization + float pow2_scale_inv = 1.0f / std::pow(2, scale); + size_t group_size = 16; + size_t num_group = count / group_size; + __m512 scale_factor = _mm512_set1_ps(pow2_scale_inv); + +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (size_t idx = 0; idx < count; idx += group_size) { + __m512 float_vec = _mm512_set_ps((float)src[idx + 15], (float)src[idx + 14], + (float)src[idx + 13], (float)src[idx + 12], + (float)src[idx + 11], (float)src[idx + 10], + (float)src[idx + 9], (float)src[idx + 8], + (float)src[idx + 7], (float)src[idx + 6], + (float)src[idx + 5], (float)src[idx + 4], + (float)src[idx + 3], (float)src[idx + 2], + (float)src[idx + 1], (float)src[idx]); + __m512 result_vec = _mm512_mul_ps(float_vec, scale_factor); + _mm512_storeu_ps(dst+idx, result_vec); + } + return DL_COMP_OK; +} + +dl_comp_return_t DLCompressDFP::decompress_block(const int8_t *src, float *dst, size_t count, int scale) +{ + // Do de-quantization + // only handle int8_t as src and float as dst + float pow2_scale_inv = 1.0f / std::pow(2, scale); +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (size_t i = 0; i < count; ++i) { + dst[i] = (float)src[i]; + dst[i] *= pow2_scale_inv; + } + + return DL_COMP_OK; +} + +size_t DLCompressDFP::get_dataCount_in_compressed_buffer(const int8_t *src, size_t blockCount) { + size_t count = 0; + size_t sum = 0; + dl_comp_head *compHead = NULL; + + if (blockCount == 0) { + return sum; + } + + do { + compHead = (dl_comp_head *)src; + DLCP_ASSERT(compHead->magic == DL_COMP_HEAD_MAGIC, "Invalid compHead!!!\n"); + count = compHead->payloadLen; + src += sizeof(dl_comp_head); + src += count; + sum += count; + blockCount--; + } while (blockCount > 0); + + return sum; +} + +dl_comp_return_t DLCompressDFP::compress_sum(const int8_t *invec, int8_t *inoutvec, size_t blockCount) +{ + dl_comp_return_t ret = DL_COMP_OK; + const size_t blockSize = sizeof(dl_comp_head) + DL_COMP_BLOCK_NUM; + size_t inCount = get_dataCount_in_compressed_buffer((const int8_t*)invec, blockCount); + size_t outCount = get_dataCount_in_compressed_buffer((const int8_t*)inoutvec, blockCount); + + DLCP_ASSERT(inCount == outCount, "inCount is not equal to outCount"); + + float deqBuf1[DL_COMP_BLOCK_NUM]; + float deqBuf2[DL_COMP_BLOCK_NUM]; + + for (size_t i = 0; i < inCount; i += DL_COMP_BLOCK_NUM, invec += blockSize, inoutvec += blockSize) { + size_t compBlock = (i + DL_COMP_BLOCK_NUM) < inCount ? DL_COMP_BLOCK_NUM : (inCount - i); + decompress_buffer(invec, deqBuf1, 1); + decompress_buffer(inoutvec, deqBuf2, 1); + if (!avx512_enabled_) { + dl_comp_float_vector_add(deqBuf2, deqBuf1, compBlock); + } else { + dl_comp_avx512_float_vector_add(deqBuf2, deqBuf1, compBlock); + } + ret = compress_buffer(deqBuf1, inoutvec, compBlock, false); + if (ret != DL_COMP_OK) { + return ret; + } + } + + return ret; +} + +dl_comp_return_t DLCompressDFP::compress_sum2(const int8_t *invec, int8_t *inoutvec, size_t blockCount) +{ + const size_t blockSize = sizeof(dl_comp_head) + DL_COMP_BLOCK_NUM; + dl_comp_return_t ret = DL_COMP_OK; + // size_t count = get_dataCount_in_compressed_buffer((const int8_t*)invec, blockCount); + + if (!avx512_enabled_) { + for (size_t i = 0; i < blockCount; i++, invec += blockSize, inoutvec += blockSize) { + ret = compress_block_sum(invec, inoutvec); + if (ret != DL_COMP_OK) { + return ret; + } + } + } else { + for (size_t i = 0; i < blockCount; i++, invec += blockSize, inoutvec += blockSize) { + ret = compress_block_sum2(invec, inoutvec); + if (ret != DL_COMP_OK) { + return ret; + } + } + } + + return ret; +} + +dl_comp_return_t DLCompressDFP::compress_block_sum(const int8_t *invec, int8_t *inoutvec) +{ + dl_comp_head *inHead = (dl_comp_head *)invec; + dl_comp_head *outHead = (dl_comp_head *)inoutvec; + + size_t count = inHead->payloadLen; + int inScale = inHead->exponent; + int outScale = outHead->exponent; + + if ((inHead->magic != DL_COMP_HEAD_MAGIC) || (outHead->magic != DL_COMP_HEAD_MAGIC)) { + return DL_COMP_FAIL_INVALID_COMPRESSED_FORMAT; + } + + if (inScale == 0) { + // Means invec contain all 0. + return DL_COMP_OK; + } + + if (outScale == 0) { + // Means outvec contain all 0. + memcpy(inoutvec, invec, sizeof(dl_comp_head) + count); + return DL_COMP_OK; + } + + // Since scale is 2 exponent, if their gap is bigger than 128 (we don't need to sum up) + if (std::abs(inScale - outScale) > 8) { + if (outScale < inScale) { + return DL_COMP_OK; + } else { + memcpy(inoutvec, invec, sizeof(dl_comp_head) + count); + return DL_COMP_OK; + } + } + + int resvec[DL_COMP_BLOCK_NUM] = {0}; + int minScale = std::min(inScale, outScale); + int inScaleGap = inScale - minScale; + int outScaleGap = outScale - minScale; + int8_t left, right; + int max_abs = 0; + + + invec += sizeof(dl_comp_head); + inoutvec += sizeof(dl_comp_head); + +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (size_t i = 0; i < count; i++) { + left = invec[i] >> inScaleGap; + right = inoutvec[i] >> outScaleGap; + resvec[i] = left + right; + // This is for compensation of final right shift + // To make it an unbiased estimator, we only + // compensate when left number is + resvec[i] += resvec[i] & left & 1; + max_abs |= (resvec[i] > 0 ? resvec[i] : (-resvec[i])); + } + + if (max_abs >= 128) { + minScale -= 1; +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (size_t i = 0; i < count; i++) { + inoutvec[i] = resvec[i] >> 1; + } + } else { +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (size_t i = 0; i < count; i++) { + inoutvec[i] = resvec[i]; + } + } + + outHead->exponent = minScale; + return DL_COMP_OK; +} + +dl_comp_return_t DLCompressDFP::compress_block_sum2(const int8_t *invec, int8_t *inoutvec) +{ + dl_comp_head *inHead = (dl_comp_head *)invec; + dl_comp_head *outHead = (dl_comp_head *)inoutvec; + + size_t count = inHead->payloadLen; + int inScale = inHead->exponent; + int outScale = outHead->exponent; + + if ((inHead->magic != DL_COMP_HEAD_MAGIC) || (outHead->magic != DL_COMP_HEAD_MAGIC)) { + return DL_COMP_FAIL_INVALID_COMPRESSED_FORMAT; + } + + if (count % 16 != 0) { + return compress_block_sum(invec, inoutvec); + } + + if (inScale == 0) { + // Means invec contain all 0. + return DL_COMP_OK; + } + + if (outScale == 0) { + // Means outvec contain all 0. + memcpy(inoutvec, invec, sizeof(dl_comp_head) + count); + return DL_COMP_OK; + } + + // Since scale is 2 exponent, if their gap is bigger than 128 (we don't need to sum up) + if (std::abs(inScale - outScale) > 7) { + if (outScale < inScale) { + return DL_COMP_OK; + } else { + memcpy(inoutvec, invec, sizeof(dl_comp_head) + count); + return DL_COMP_OK; + } + } + + int32_t resvec[DL_COMP_BLOCK_NUM] = {0}; + int minScale = std::min(inScale, outScale); + int inScaleGap = inScale - minScale; + int outScaleGap = outScale - minScale; + int8_t left, right; + int max_abs = 0; + size_t group_size = 16; + __mmask16 mask = _mm512_int2mask(0xFFFF); + __m512i i32_one_v = _mm512_set1_epi32(1); + __m512i i32_or_v = _mm512_set1_epi32(0); + + invec += sizeof(dl_comp_head); + inoutvec += sizeof(dl_comp_head); + + for (size_t i = 0; i < count; i += group_size) { + const int8_t *i8_left = invec + i; + int8_t *i8_right = inoutvec + i; + int32_t *i32_result = resvec + i; + __m128i i8_left_v = _mm_maskz_loadu_epi8(mask, i8_left); + __m128i i8_right_v = _mm_maskz_loadu_epi8(mask, i8_right); + __m512i i32_left_v = _mm512_cvtepi8_epi32(i8_left_v); + __m512i i32_right_v = _mm512_cvtepi8_epi32(i8_right_v); + i32_left_v = _mm512_srai_epi32(i32_left_v, inScaleGap); + i32_right_v = _mm512_srai_epi32(i32_right_v, outScaleGap); + __m512i i32_result_v= _mm512_add_epi32(i32_left_v, i32_right_v); + //compensation + __m512i i32_comp_v = _mm512_and_epi32(i32_result_v, i32_left_v); + i32_comp_v = _mm512_and_epi32(i32_comp_v, i32_one_v); + i32_result_v = _mm512_add_epi32(i32_result_v, i32_comp_v); + _mm512_mask_storeu_epi32(i32_result, mask, i32_result_v); + // To get or of while result + i32_result_v = _mm512_abs_epi32(i32_result_v); + i32_or_v = _mm512_or_epi32(i32_result_v, i32_or_v); + } + + max_abs = _mm512_reduce_or_epi32(i32_or_v); + + if (max_abs >= 128) { + minScale -= 1; + for (size_t i = 0; i < count; i += group_size) { + int32_t *i32_res = resvec + i; + int8_t *i8_inout = inoutvec + i; + __m512i i32resvec_v = _mm512_loadu_si512(i32_res); + i32resvec_v = _mm512_srai_epi32(i32resvec_v, 1); + _mm512_mask_cvtsepi32_storeu_epi8(i8_inout, mask, i32resvec_v); + } + } else { + for (size_t i = 0; i < count; i += group_size) { + int32_t *i32_res = resvec + i; + int8_t *i8_inout = inoutvec + i; + __m512i i32resvec_v = _mm512_loadu_si512(i32_res); + _mm512_mask_cvtsepi32_storeu_epi8(i8_inout, mask, i32resvec_v); + } + } + + outHead->exponent = minScale; + return DL_COMP_OK; +} + +void DLCompressDFP::dump_compressed_buffer(const int8_t *src, size_t blockCount) +{ + size_t count = 0; + dl_comp_head *compHead = NULL; + int scale = 0; + float pow2_scale = .0; + + if (blockCount == 0) return; + + DLCP_LOG(INFO, "Enter function dump_compressed_buffer...\n"); + do { + compHead = (dl_comp_head *)src; + if (compHead->magic != DL_COMP_HEAD_MAGIC) { + DLCP_LOG(INFO, "Invalid compHead!!!\n"); + return; + } + count = compHead->payloadLen; + scale = compHead->exponent; + DLCP_LOG(INFO, "count = %lu Scale = %d\n", count, scale); + pow2_scale = std::pow(2, scale); + src += sizeof(dl_comp_head); + for (size_t i = 0; i < count; i++) { + float d_value = ((float)src[i])/pow2_scale; + DLCP_LOG(INFO, "compressed value %d decompressed value %f\n", src[i], d_value); + } + src += count; + blockCount--; + } while (blockCount > 0); + DLCP_LOG(INFO, "End of function dump_compressed_buffer...\n"); +} + +bool DLCompressDFP::check_compressed_buffer(const float *comp1, const int8_t *comp2, const float *diff, size_t blockCount) +{ + float epislon = 1e-9; + dl_comp_head *compHead = NULL; + int scale = 0; + float pow2_scale = .0; + size_t count = 0; + + do { + compHead = (dl_comp_head *)comp2; + if (compHead->magic != DL_COMP_HEAD_MAGIC) { + DLCP_LOG(ERROR, "Invalid compHead!!!\n"); + return false; + } + count = compHead->payloadLen; + scale = compHead->exponent; + comp2 += sizeof(dl_comp_head); + pow2_scale = std::pow(2, scale); + for (size_t i = 0; i < count; i++) { + float d_value = ((float)comp2[i])/pow2_scale; + if (d_value * comp1[i] < 0.0f) { + DLCP_LOG(ERROR, "detected big gap src = %f d_value = %f diff = %f\n", comp1[i], d_value, diff[i]); + DLCP_LOG(ERROR, "scale = %d, pow2_scale = %f, compressed_value = %d\n", scale, std::pow(2, scale), comp2[i]); + return false; + } + } + comp1 += count; + comp2 += count; + diff += count; + blockCount--; + } while (blockCount > 0); + + return true; +} + +dl_comp_return_t compress_helper(float *src, int8_t *dst, float *diff, dl_comp_method_t method, size_t count) +{ + DLCompressBase *compInst = DLCompressBase::get_compression_instance(method); + dl_comp_return_t ret = compInst->compress_buffer(src, dst, diff, count); + return ret; +} + +dl_comp_return_t decompress_helper(const int8_t *src, float *dst, dl_comp_method_t method) +{ + DLCompressBase *compInst = DLCompressBase::get_compression_instance(method); + return compInst->decompress_buffer(src, dst, 0); +} + +void dl_comp_float_vector_add(const float* invec, float *inoutvec, size_t count) +{ +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (size_t i = 0; i < count; ++i) { + inoutvec[i] += invec[i]; + } +} + +void dl_comp_avx512_float_vector_add(const float* invec, float *inoutvec, size_t count) +{ + // If count is smaller than 16 we use non-avx512 implementation + // 16 is the element number which one avx512 register can hold + if (count < 16) { + return dl_comp_float_vector_add(invec, inoutvec, count); + } + + // If count can't be divided by 16, we handle tailing remainder + // with non-avx512 imeplementation + if (count % 16 != 0) { + size_t remainder = count % 16; + count -= remainder; + dl_comp_float_vector_add(invec+count, inoutvec+count, remainder); + } + + size_t group_size = 16; +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (size_t idx = 0; idx < count; idx += group_size) { + const float *fvec1 = invec + idx; + float *fvec2 = inoutvec + idx; + __m512 operand1 = _mm512_loadu_ps(fvec1); + __m512 operand2 = _mm512_loadu_ps(fvec2); + __m512 result = _mm512_add_ps(operand1, operand2); + _mm512_storeu_ps(fvec2, result); + } +} + diff --git a/dlcp/src/dl_compression_impl.hpp b/dlcp/src/dl_compression_impl.hpp new file mode 100644 index 00000000..9442f72b --- /dev/null +++ b/dlcp/src/dl_compression_impl.hpp @@ -0,0 +1,122 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef DL_COMPRESSION_IMPL_HPP +#define DL_COMPRESSION_IMPL_HPP + + +#include +#include + +// Disable the copy and assignment operator for a class + +#define DISABLE_COPY_AND_ASSIGN(classname) \ +private:\ + classname(const classname&);\ + classname& operator=(const classname&) + +#define DL_COMP_BLOCK_NUM 256 + +#define DL_COMP_HEAD_MAGIC 0xdeadbeef + +typedef struct __attribute__((__packed__)) +{ + int magic; + int payloadLen; + int exponent; +} dl_comp_head; + +bool dl_comp_check_avx512_supported(void); + +void dl_comp_float_vector_add(const float *invec, float *inoutvec, size_t count); + +void dl_comp_avx512_float_vector_add(const float *invec, float *inoutvec, size_t count); + +void dl_comp_int8_vector_add(const int8_t *invec, int8_t *inoutvec, size_t count); + +void dl_comp_avx512_int8_vector_add(const int8_t *invec, int8_t *inoutvec, size_t count); + +dl_comp_return_t compress_helper(float *src, int8_t *dst, float *diff, dl_comp_method_t method, size_t count); + +dl_comp_return_t decompress_helper(const int8_t *src, float *dst, dl_comp_method_t method); + +/* + * Abstract base class for quantization + */ +class DLCompressBase { + +public: + DLCompressBase() = default; + // Compress with error feedback + virtual dl_comp_return_t compress_buffer(float *src, int8_t *dst, float *diff, size_t count, bool inPlace = false) = 0; + // Compress without error feedback + virtual dl_comp_return_t compress_buffer(float *src, int8_t *dst, size_t count, bool inPlace = false) = 0; + virtual dl_comp_return_t decompress_buffer(const int8_t *src, float *dst, size_t blockCount) = 0; + virtual size_t get_dataCount_in_compressed_buffer(const int8_t *src, size_t blockCount) = 0; + virtual dl_comp_return_t compress_sum(const int8_t *invec, int8_t *inoutvec, size_t blockCount) = 0; + virtual dl_comp_return_t compress_sum2(const int8_t *invec, int8_t *inoutvec, size_t blockCount) = 0; + virtual void dump_compressed_buffer(const int8_t *src, size_t blockCount) = 0; + virtual bool check_compressed_buffer(const float *comp1, const int8_t *comp2, const float *diff, size_t blockCount) = 0; + virtual ~DLCompressBase(void) {}; + +public: + static DLCompressBase* get_compression_instance(dl_comp_method_t method); + + DISABLE_COPY_AND_ASSIGN(DLCompressBase); +}; + + +class DLCompressDFP : public DLCompressBase { + + friend class DLCompressBase; +public: + virtual ~DLCompressDFP(void) {}; + virtual dl_comp_return_t compress_buffer(float *src, int8_t *dst, float *diff, size_t count, bool inPlace = false); + virtual dl_comp_return_t compress_buffer(float *src, int8_t *dst, size_t count, bool inPlace = false); + virtual dl_comp_return_t decompress_buffer(const int8_t *src, float *dst, size_t blockCount); + virtual size_t get_dataCount_in_compressed_buffer(const int8_t *src, size_t blockCount); + virtual dl_comp_return_t compress_sum(const int8_t *invec, int8_t *inoutvec, size_t blockCount); + virtual dl_comp_return_t compress_sum2(const int8_t *invec, int8_t *inoutvec, size_t blockCount); + virtual void dump_compressed_buffer(const int8_t *src, size_t blockCount); + virtual bool check_compressed_buffer(const float *comp1, const int8_t *comp2, const float *diff, size_t blockCount); + +private: + DLCompressDFP(): avx512_enabled_(dl_comp_check_avx512_supported()) {}; + +private: + dl_comp_return_t compress_block(float *src, int8_t *dst, float *diff, size_t count, int *scale); + dl_comp_return_t decompress_block(const int8_t *src, float *dst, size_t count, int scale); + dl_comp_return_t avx512_decompress_block(const int8_t *src, float *dst, size_t count, int scale); + dl_comp_return_t avx512_compress_block(float *src, int8_t *dst, float *diff, size_t count, int *scale); + dl_comp_return_t compress_block_sum(const int8_t *invec, int8_t *inoutvec); + dl_comp_return_t compress_block_sum2(const int8_t *invec, int8_t *inoutvec); + +private: + bool avx512_enabled_; + +DISABLE_COPY_AND_ASSIGN(DLCompressDFP); +}; + + +#endif /* DL_COMPRESSION_IMPL_HPP */ diff --git a/dlcp/src/dl_compression_util.cpp b/dlcp/src/dl_compression_util.cpp new file mode 100644 index 00000000..ad05acb8 --- /dev/null +++ b/dlcp/src/dl_compression_util.cpp @@ -0,0 +1,36 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include "dl_compression_util.hpp" + +int g_log_th = 0; + +void dl_comp_get_time(char* buf, size_t bufSize) +{ + time_t timer; + struct tm* timeInfo = 0; + time(&timer); + timeInfo = localtime(&timer); + strftime(buf, bufSize, "%Y:%m:%d %H:%M:%S", timeInfo); +} diff --git a/dlcp/src/dl_compression_util.hpp b/dlcp/src/dl_compression_util.hpp new file mode 100644 index 00000000..5b33de25 --- /dev/null +++ b/dlcp/src/dl_compression_util.hpp @@ -0,0 +1,100 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef DL_COMPRESSION_UTIL_HPP +#define DL_COMPRESSION_UTIL_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern int g_log_th; // log threash hold + +#define GET_TID() syscall(SYS_gettid) +#define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) + +#define DLCP_LOG(log_level, fmt, ...) \ +do { \ + if (log_level <= g_log_th) \ + { \ + char time_buf[20]; \ + dl_comp_get_time(time_buf, 20); \ + switch (log_level) \ + { \ + case ERROR: \ + { \ + printf("%s: ERROR: (%ld): %s:%u " fmt "\n", time_buf, GET_TID(), \ + __FUNCTION__, __LINE__, ##__VA_ARGS__); \ + break; \ + } \ + case INFO: \ + { \ + printf("(%ld):" fmt "\n", GET_TID(), ##__VA_ARGS__); \ + break; \ + } \ + case DEBUG: \ + case TRACE: \ + { \ + printf("%s: (%ld): %s:%u " fmt "\n", time_buf, GET_TID(), \ + __FUNCTION__, __LINE__, ##__VA_ARGS__); \ + break; \ + } \ + default: \ + { \ + printf("(%ld):" fmt "\n", GET_TID(), ##__VA_ARGS__); \ + } \ + } \ + fflush(stdout); \ + } \ +} while (0) + +#define DLCP_ASSERT(cond, fmt, ...) \ +do \ +{ \ + if (!(cond)) \ + { \ + fprintf(stderr, "(%ld): %s:%s:%d: ASSERT '%s' FAILED: " fmt "\n", \ + GET_TID(), __FILENAME__, __FUNCTION__, __LINE__, #cond, ##__VA_ARGS__); \ + fflush(stderr); \ + _exit(1); \ + } \ +} while(0) + +enum LogLevel +{ + ERROR = 0, + INFO, + DEBUG, + TRACE +}; + +void dl_comp_get_time(char *buf, size_t buf_size); + +#endif diff --git a/dlcp/test/Makefile b/dlcp/test/Makefile new file mode 100644 index 00000000..c3524f9a --- /dev/null +++ b/dlcp/test/Makefile @@ -0,0 +1,68 @@ +LOCAL_DIR = $(shell pwd) + +#COMPILER ?= gnu +COMPILER ?= intel + +FLAG_DEBUG ?= 0 + +AR = ar +CXXFLAGS += -fPIC + +ifeq ($(COMPILER), intel) + CC = icc + CXX = icpc + CXXFLAGS += -std=c++11 + LDFLAGS += -static-intel +else ifeq ($(COMPILER), gnu) + CC = gcc + CXX = g++ + CXXFLAGS += -std=c++0x +else + $(error Unsupported compiler $(COMPILER)) +endif + +ifeq ($(FLAG_DEBUG), 1) + CXXFLAGS += -O0 -g +else + CXXFLAGS += -O2 +endif + +ifeq ($(CXX), icpc) + LDFLAGS += -static-intel -qopenmp +endif + +ifeq ($(CXX), icpc) + CXXFLAGS += +endif + +COMPRESSION_LIB = libdlcomp.so +SRC_DIR = $(LOCAL_DIR) +INCL_DIR = $(LOCAL_DIR)/../include + +TARGET = test_compress_lib +EXECUTE = test +INCS = -I$(INCLUDE_DIR) -I$(SRC_DIR) +LDFLAGS += -L$(LOCAL_DIR)/../lib -ldlcomp +CXXFLAGS += $(addprefix -I,$(INCL_DIR)) + + +SRCS += main.cpp + +OBJS := $(SRCS:.cpp=.o) + + +all: $(TARGET) + +$(TARGET): $(EXECUTE) + +$(EXECUTE): $(OBJS) + $(CXX) $(CXXFLAGS) -o $(EXECUTE) $(OBJS) $(LDFLAGS) + +$(SRC_DIR)/%.o: $(SRC_DIR)/%.cpp + $(CXX) -c $(CXXFLAGS) $< -o $@ + +clean: + rm -f $(SRC_DIR)/*.o $(EXECUTE) + +cleanall: clean + diff --git a/dlcp/test/main.cpp b/dlcp/test/main.cpp new file mode 100644 index 00000000..16962a67 --- /dev/null +++ b/dlcp/test/main.cpp @@ -0,0 +1,268 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dl_compression.h" + +#define DATA_LEN 100000000 + +float data1[DATA_LEN]; + +float data2[DATA_LEN]; + +void dataSetUp(void); + +bool test_compress_buffer(); + +bool test_decompress_buffer(); + +bool test_compressed_buffer_reduce_sum(); + +void addVec(const float *vec1, const float *vec2, float *vec3, int count) { + for (int i = 0; i < count; i++) { + vec3[i] = vec1[i] + vec2[i]; + } +} + +void cmpVec(const float *vec1, const float *vec2, int count) { + for (int i = 0; i < count; i++) { + if (std::abs(vec1[i] - vec2[i]) > 1e-3) { + printf("Detect big gap index: %d\n", i); + } + } +} + +float getSum(const float *src, int count) { + float sum = 0.0f; + for (int i = 0; i < count; i++) { + sum += src[i]; + } + return sum; +} + +void dumpVec(const float *vec, int count) { + for (int i = 0; i < count; i++) { + printf("vec[%d] = %lf\n", i, vec[i]); + } +} + +float sumVec(const float *vec1, const float *vec2, int count) { + float sum = 0.0f; + for (int i = 0; i < count; i++) { + sum = sum + vec1[i] + vec2[i]; + // printf("data1[%d] = %lf data2[%d] = %lf sum = %lf\n", i, vec1[i], i, vec2[i], vec1[i] + vec2[i]); + } + return sum; +} + +float sumVec2(const float *vec1, const float *vec2, int count) { + float sum = 0.0f; + for (int i = 0; i < count; i++) { + sum = sum + vec1[i] + vec2[i]; + // printf("tempData1[%d] = %lf tempData2[%d] = %lf sum = %lf\n", i, vec1[i], i, vec2[i], vec1[i] + vec2[i]); + } + return sum; +} + +int main(int argc, char *argv[]) +{ + dataSetUp(); + + if (!test_compress_buffer()) { + printf("test_compress_buffer failure!\n"); + } else { + printf("test_compress_buffer successful!\n"); + } + + if (!test_decompress_buffer()) { + printf("test_decompress_buffer failure!\n"); + } else { + printf("test_decompress_buffer successful!\n"); + } + + if (!test_compressed_buffer_reduce_sum()) { + printf("test_compressed_buffer_reduce_sum failure!\n"); + } else { + printf("test_compressed_buffer_reduce_sum successful!\n"); + } + + return 0; +} + +void dataSetUp() +{ + srand((int)time(0)); + + for (int i = 0; i < DATA_LEN; i++) { + data1[i] = (rand() % 10000) / (-100000.f) ; + } + + for (int i = 0; i < DATA_LEN; i++) { + data2[i] = (rand() % 10000) / (-100000.f); + } +} + +bool test_compress_buffer() +{ + float *tempData = (float *)malloc(sizeof(float) * DATA_LEN); + memcpy(tempData, data1, sizeof(float) * DATA_LEN); + + dl_comp_return_t ret = dl_comp_compress_buffer((const void *)tempData, + tempData, + DATA_LEN, + NULL, + DL_COMP_FLOAT32, + 4, + DL_COMP_DFP); + free(tempData); + if (ret != DL_COMP_OK) { + printf("compress failed error = %d!\n", ret); + return false; + } + + return true; +} + +bool test_decompress_buffer() +{ + float *tempData = (float *)malloc(sizeof(float) * DATA_LEN); + float *diff = (float *)malloc(sizeof(float) * DATA_LEN); + memcpy(tempData, data1, sizeof(float) * DATA_LEN); + memset(diff, 0, sizeof(float) * DATA_LEN); + + printf("before compress Total Sum: %f\n", getSum(data1, DATA_LEN)); + dl_comp_return_t ret = dl_comp_compress_buffer((const void *)tempData, + tempData, + DATA_LEN, + diff, + DL_COMP_FLOAT32, + 4, + DL_COMP_DFP); + if (ret != DL_COMP_OK) { + printf("compress failed error = %d!\n", ret); + free(tempData); + free(diff); + return false; + } + + ret = dl_comp_decompress_buffer((const void *)tempData, + tempData, + DATA_LEN); + if (ret != DL_COMP_OK) { + printf("de-compress failed error = %d!\n", ret); + free(tempData); + free(diff); + return false; + } + + printf("after compress Total Sum: %f diff: %f\n", getSum(tempData, DATA_LEN), getSum(diff, DATA_LEN)); + printf("after diff compensation Total Sum: %f\n", sumVec(tempData, diff, DATA_LEN)); + free(tempData); + free(diff); + return true; +} + +bool test_compressed_buffer_reduce_sum() +{ + float *tempData1 = (float *)malloc(sizeof(float) * DATA_LEN); + float *tempData2 = (float *)malloc(sizeof(float) * DATA_LEN); + float *tempData3 = (float *)malloc(sizeof(float) * DATA_LEN); + float *tempData4 = (float *)malloc(sizeof(float) * DATA_LEN); + float *sum1 = (float *)malloc(sizeof(float) * DATA_LEN); + float *sum2 = (float *)malloc(sizeof(float) * DATA_LEN); + float *sum3 = (float *)malloc(sizeof(float) * DATA_LEN); + memcpy(tempData1, data1, sizeof(float) * DATA_LEN); + memcpy(tempData2, data2, sizeof(float) * DATA_LEN); + + dl_comp_return_t ret = dl_comp_compress_buffer((const void *)tempData1, + tempData1, + DATA_LEN, + NULL, + DL_COMP_FLOAT32, + 4, + DL_COMP_DFP); + + if (ret != DL_COMP_OK) { + printf("compress failed error = %d!\n", ret); + free(tempData1); + free(tempData2); + return false; + } + + ret = dl_comp_compress_buffer((const void *)tempData2, + tempData2, + DATA_LEN, + NULL, + DL_COMP_FLOAT32, + 4, + DL_COMP_DFP); + + if (ret != DL_COMP_OK) { + printf("compress failed error = %d!\n", ret); + free(tempData1); + free(tempData2); + return false; + } + +#if 0 + ret = dl_comp_decompress_buffer((const void *)tempData1, + (void *)tempData3, + DATA_LEN); + ret = dl_comp_decompress_buffer((const void *)tempData2, + (void *)tempData4, + DATA_LEN); + + printf("orig data sum = %lf\n", sumVec(data1, data2, DATA_LEN)); + printf("new data sum = %lf\n", sumVec2(tempData3, tempData4, DATA_LEN)); +#endif + +#if 1 + size_t blockCount = dl_comp_convert_block_count(DATA_LEN); + + ret = dl_comp_compressed_buffer_reduce_sum((const void *)tempData1, + (void *)tempData2, + blockCount); + + if (ret != DL_COMP_OK) { + printf("reduce sum failed error = %d!\n", ret); + free(tempData1); + free(tempData2); + return false; + } + + ret = dl_comp_decompress_buffer((const void *)tempData2, + (void *)tempData2, + DATA_LEN); + + if (ret != DL_COMP_OK) { + printf("de compress failed error = %d!\n", ret); + free(tempData1); + free(tempData2); + return false; + } + + printf("orig data sum = %lf\n", sumVec(data1, data2, DATA_LEN)); + printf("new reduce sum = %lf\n", getSum(tempData2, DATA_LEN)); +#endif + +// addVec(data1, data2, sum1, DATA_LEN); +// addVec(tempData3, tempData4, sum2, DATA_LEN); + +// printf("start to cmp sum1 and tempData2!\n"); +// cmpVec(sum1, tempData2, DATA_LEN); + +// printf("start to cmp sum2 and sum1!\n"); +// cmpVec(sum2, sum1, DATA_LEN); + + + free(tempData1); + free(tempData2); + return true; +} diff --git a/dlcp/test/run.sh b/dlcp/test/run.sh new file mode 100755 index 00000000..dcf5f18f --- /dev/null +++ b/dlcp/test/run.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../lib/ +export OMP_NUM_THREADS=1 +./test diff --git a/python/external/__init__.py b/python/external/__init__.py new file mode 100644 index 00000000..791c3041 --- /dev/null +++ b/python/external/__init__.py @@ -0,0 +1 @@ +import external.mkldnn # NOQA diff --git a/python/external/dlcp/include/dl_compression.h b/python/external/dlcp/include/dl_compression.h new file mode 100644 index 00000000..e081461b --- /dev/null +++ b/python/external/dlcp/include/dl_compression.h @@ -0,0 +1,193 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef DL_COMPRESSION_H +#define DL_COMPRESSION_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + DL_COMP_NONE = 0, + DL_COMP_DFP = 1, +} dl_comp_method_t; + +typedef enum { + DL_COMP_OK = 0, + DL_COMP_FAIL = 1, + DL_COMP_FAIL_SRC_DATA_TYPE_NOT_SUPPORTED = 2, + DL_COMP_FAIL_RATIO_NOT_SUPPORTED = 3, + DL_COMP_FAIL_COMP_METHOD_NOT_SUPPORTED = 4, + DL_COMP_FAIL_INVALID_COMPRESSED_FORMAT = 5, + DL_COMP_FAIL_NOT_SUPPORTED = 6 +} dl_comp_return_t; + +typedef enum { + DL_COMP_INT8 = 0, + DL_COMP_FLOAT16 = 1, + DL_COMP_FLOAT32 = 2, + DL_COMP_FLOAT64 = 3, +} dl_comp_data_type_t; + +// Compress src buffer into dst buffer. +// +// Parameters: +// src [in] pointer to src buffer +// dst [out] pointer to dst buffer +// dataCount [in] num of element needs to be compressed +// diff [in/out] place the precision lost from the last compress +// return the precision lost from this compress. +// If you don't care about lost precision, you can +// set it NULL pointer. +// src_data_type [in/out] data type in src buffer +// comp_ratio [in] compression ratio, it should only be 2,4,8,16,32. +// e.g. If we compress FLOAT32 to INT8, the comp_ratio +// is 4. +// method [in] compression algorithm +// Returns: +// compress successful or not. DL_COMP_OK means successful, otherwise not. +dl_comp_return_t dl_comp_compress_buffer( const void *src, + void *dst, + size_t dataCount, + void *diff, + dl_comp_data_type_t src_data_type, + size_t comp_ratio, + dl_comp_method_t method ); + +// de-Compress src buffer into dst buffer. +// +// Parameters: +// src [in] pointer to src buffer +// dst [out] pointer to dst buffer +// dataCount [in] num of element needs to be de-Compressed +// Returns: +// de-compress successful or not. +dl_comp_return_t dl_comp_decompress_buffer( const void *src, + void *dst, + size_t dataCount ); + +// Sum up compressed data from two input buffer and put the result +// in the outBuffer. +// +// Parameters: +// inBuffer1 [in] pointer to quantized data vector +// inBuffer2 [in] pointer to quantized data vector +// dataCount [in] num of element in inBuffer1 and inBuffer2 +// needs to be sum up. +// outBuffer [out] pointer to quantized data vector and the result +// will be placed in this inoutBuffer. +// Returns: +// sum up successful or not. +dl_comp_return_t dl_comp_compressed_buffer_sum( const void *inBuffer1, + const void *inBuffer2, + size_t dataCount, + void *outBuffer ); + +// Get compress meta data info(block). Some operation like multi-node all-reduce +// will divide payload into parts to enhance communication efficiency.This api +// is to notify of the compressed meta data info: The minimum slicing granularity. +// Its size is related with src DataType, comp_ratio, compression algorithm +// +// Parameters: +// srcDataType [in] data type of src data before compression. +// comp_ratio [in] compression ratio +// method [in] compression algorithm +// Returns: +// N/A. +size_t dl_comp_get_sizeof_block( dl_comp_data_type_t src_data_type, + size_t comp_ratio, + dl_comp_method_t method ); + +// Sum up two buffer's compressed data and put the result in +// second buffer.Please attention here we use blockCount +// as input parameter. 1 block can contain multiple data. +// +// Parameters: +// inBuffer [in] pointer to quantized data +// inoutBuffer [in/out] pointer to quantized data. Result will +// be placed in this buffer. +// Returns: +// +dl_comp_return_t dl_comp_compressed_buffer_reduce_sum( const void *inBuffer, + void *inoutBuffer, + size_t blockCount ); + +// Util function for converting data count into block count. +// +// Prameters: +// dataCount [in] num of digit +// Returns: +// return corresponding num of block +size_t dl_comp_convert_block_count(size_t dataCount); + +// Util function to get how many elements in one block. +// Parameters: +// N/A +// Returns: +// return how many elements in one block +size_t dl_comp_get_elem_num_in_block(); + +// Check Running Environment. +// Parameters: +// N/A +// Returns: +// If check successful, return true. otherwise false. +// If false, pls disable quantization functionality. +// E.g. We sugguest not to use quantization on machine +// not support avx512 instructions, because there's no +// performance gain. +bool dl_comp_check_running_environ(); + +// Uitl function for compress float32 data to int8 +// Parameters: +// srcBuffer [in] src float32 data +// dstBuffer [out] dst int8 data +// diff: [in/out] precision lost in compression +// dataCount [in] data count +// Return: +// If successful, return 0, otherwise error code. +int dl_comp_compress_buffer_FLOAT32ToINT8( const void *srcBuffer, + void *dstBuffer, + void *diff, + size_t dataCount); + +// Util function for de-compress int8 to float32 +// Parameters: +// srcBuffer [in] contain int8 compressed data +// dstBuffer [out] de-comressed float32 data +// dataCount [in] data count +// Return: +// If successful, return 0, othersise error code. +int dl_comp_decompress_buffer_INT8ToFLOAT32(const void *srcBuffer, + void *dstBuffer, + size_t dataCount); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/python/external/dlcp/lib/libdlcomp.so b/python/external/dlcp/lib/libdlcomp.so new file mode 100755 index 00000000..5e0d245a Binary files /dev/null and b/python/external/dlcp/lib/libdlcomp.so differ diff --git a/python/external/mkldnn/__init__.py b/python/external/mkldnn/__init__.py new file mode 100644 index 00000000..e2d012a6 --- /dev/null +++ b/python/external/mkldnn/__init__.py @@ -0,0 +1,6 @@ +from external.mkldnn import prepare_mkldnn # NOQA +from external.mkldnn.prepare_mkldnn import prepare # NOQA +from external.mkldnn.prepare_mkldnn import root # NOQA +from external.mkldnn.prepare_mkldnn import lib_path # NOQA +from external.mkldnn.prepare_mkldnn import include_path # NOQA +from external.mkldnn.prepare_mkldnn import source_path # NOQA diff --git a/python/external/mkldnn/prepare_mkldnn.py b/python/external/mkldnn/prepare_mkldnn.py new file mode 100644 index 00000000..199d9327 --- /dev/null +++ b/python/external/mkldnn/prepare_mkldnn.py @@ -0,0 +1,127 @@ +import os +import sys + +MKLDNN_ROOT = os.environ['HOME'] + '/.chainer' +MKLDNN_WORK_PATH = os.path.split(os.path.realpath(__file__))[0] +MKLDNN_LIB_PATH = MKLDNN_ROOT + '/lib' +MKLDNN_INCLUDE_PATH = MKLDNN_ROOT + '/include' +MKLDNN_SOURCE_PATH = MKLDNN_WORK_PATH + '/source' +MKLDNN_BUILD_PATH = MKLDNN_WORK_PATH + '/source/build' +MKLML_PKG_PATH = MKLDNN_SOURCE_PATH + '/external' + + +def download(mkldnn_version): + print('Downloading ...') + + os.chdir(MKLDNN_WORK_PATH) + os.system( + 'git clone -b master \ + --single-branch https://github.com/01org/mkl-dnn.git source') + + os.chdir(MKLDNN_SOURCE_PATH) + os.system('git reset --hard %s' % mkldnn_version) + + if not os.path.exists(MKLML_PKG_PATH): + os.system('cd scripts && ./prepare_mkl.sh && cd ..') + + +def build(): + print('Building ...') + + os.system( + 'mkdir -p build && cd build \ + && cmake -DCMAKE_INSTALL_PREFIX=%s .. && make -j' % MKLDNN_ROOT) + + +def install(refresh_build): + print('Installing ...') + + os.chdir(MKLDNN_SOURCE_PATH) + + # install mkldnn + if refresh_build: + os.system('cd build && make -j && make install') + else: + os.system('cd build && make install') + + # install mklml + mklml_pkg_path_leafs = os.listdir(MKLML_PKG_PATH) + mklml_origin_path = None + for leaf in mklml_pkg_path_leafs: + if os.path.isdir('%s/%s' % (MKLML_PKG_PATH, leaf)) and \ + 'mklml' in leaf: + mklml_origin_path = '%s/%s' % (MKLML_PKG_PATH, leaf) + break + + if mklml_origin_path: + os.system('cp %s/lib/* %s' % (mklml_origin_path, MKLDNN_LIB_PATH)) + os.system('cp %s/include/* %s' % + (mklml_origin_path, MKLDNN_INCLUDE_PATH)) + + +def build_install(): + build() + install(False) + + +def download_build_install(mkldnn_version): + download(mkldnn_version) + build_install() + + +def prepare(mkldnn_version): + print('Intel mkl-dnn preparing ...') + mkldnn_prepared = True + mkldnn_built = True + mkldnn_installed = True + + if os.path.exists(MKLDNN_SOURCE_PATH): + os.chdir(MKLDNN_SOURCE_PATH) + res = os.popen('git log | sed -n \'1p\'', 'r') + commit_head = res.read() + if mkldnn_version not in commit_head: + os.chdir(MKLDNN_WORK_PATH) + os.system('rm -rf %s' % MKLDNN_SOURCE_PATH) + os.system('rm -rf %s' % MKLDNN_LIB_PATH) + os.system('rm -rf %s' % MKLDNN_INCLUDE_PATH) + mkldnn_prepared = False + else: + if not os.path.exists(MKLDNN_BUILD_PATH): + os.system('rm -rf %s' % MKLDNN_LIB_PATH) + os.system('rm -rf %s' % MKLDNN_INCLUDE_PATH) + mkldnn_built = False + elif (not os.path.exists(MKLDNN_LIB_PATH)) or \ + (not os.path.exists(MKLDNN_INCLUDE_PATH)): + os.system('rm -rf %s' % MKLDNN_LIB_PATH) + os.system('rm -rf %s' % MKLDNN_INCLUDE_PATH) + mkldnn_installed = False + else: + os.system('rm -rf %s' % MKLDNN_LIB_PATH) + os.system('rm -rf %s' % MKLDNN_INCLUDE_PATH) + mkldnn_prepared = False + + if not mkldnn_prepared: + download_build_install(mkldnn_version) + elif not mkldnn_built: + build_install() + elif not mkldnn_installed: + install(True) + + os.chdir(sys.path[0]) + print('Intel mkl-dnn prepared !') + + +def root(): + return MKLDNN_ROOT + + +def lib_path(): + return MKLDNN_LIB_PATH + + +def include_path(): + return MKLDNN_INCLUDE_PATH + + +def source_path(): + return MKLDNN_SOURCE_PATH diff --git a/python/ideep4py/__init__.py b/python/ideep4py/__init__.py new file mode 100644 index 00000000..8002fb32 --- /dev/null +++ b/python/ideep4py/__init__.py @@ -0,0 +1,171 @@ +import numpy +import sys + +from ideep4py._ideep4py import intVector # NOQA + +from ideep4py._ideep4py import mdarray # NOQA +from ideep4py._ideep4py import mdarrayVector # NOQA + +from ideep4py._ideep4py import batchNormalization # NOQA +from ideep4py._ideep4py import concat # NOQA +from ideep4py._ideep4py import convolution2D # NOQA +from ideep4py._ideep4py import convolution2DParam as conv2DParam # NOQA +from ideep4py._ideep4py import dropout # NOQA +from ideep4py._ideep4py import linear # NOQA +from ideep4py._ideep4py import localResponseNormalization # NOQA +from ideep4py._ideep4py import localResponseNormalizationParam as lrnParam # NOQA +from ideep4py._ideep4py import pooling2D # NOQA +from ideep4py._ideep4py import pooling2DParam as pol2DParam # NOQA +from ideep4py._ideep4py import relu # NOQA + +from ideep4py._ideep4py import basic_acc_sum # NOQA +from ideep4py._ideep4py import basic_copyto # NOQA + +from ideep4py._ideep4py import dlCompression # NOQA + +from ideep4py import cosim # NOQA + + +# ------------------------------------------------------------------------------ +# ideep4py.mdarray allocation +# ------------------------------------------------------------------------------ +dat_array = 'd' # data array +wgt_array = 'w' # weight array + + +def array(x, itype=dat_array): + """Create a :class:`ideep4py.mdarray` object according to ``x``. + + Args: + array (numpy.ndarray or ideep4py.mdarray): + if ``x`` is numpy.ndarray not in C contiguous, it will be + converted to C contiguous before ideep4py.mdarray created. + itype (=data_type): ideep4py.mdarray created is optimized according + ``itype`` flag. + + Returns: + Instance of :class:`ideep4py.mdarray`. + + """ + if isinstance(x, numpy.ndarray) and \ + x.dtype == numpy.dtype('float32'): + if x.flags.contiguous is False: + x = numpy.ascontiguousarray(x) + return mdarray(x, itype) + else: + return x + + +_ideep4py_ = sys.modules[__name__] + + +def get_array_module(array): + return _ideep4py_ + + +def check_ndim(inputs, supported_ndim=(2, 4)): + # Check with ideep4py supported dimension of input data + valid_ndim = False + for ndim in supported_ndim: + valid_ndim = valid_ndim or inputs[0].ndim == ndim + + if supported_ndim and not valid_ndim: + return False + else: + return True + + +def check_type(inputs): + if isinstance(inputs[0], numpy.ndarray): + _should_use_ideep = True + + for x in inputs: + _should_use_ideep = _should_use_ideep and \ + x.dtype == numpy.dtype('float32') and \ + x.size != 0 + return _should_use_ideep + else: + return False + + +def all_ready(inputs, supported_ndim=(2, 4)): + """Check inputs dimentions and type + + The function checks ``inputs`` info and ``supported_ndim``. + + Args: + inputs (numpy.ndarray, ideep.mdarray): + ``inputs`` to be checked including array type, dimension + and data type. + supported_ndim: A tuple of ndim. ideep supports array dimension + in either 2 or 4 only. + + Returns: + bool: ``True`` if all conditions meet. + + """ + + if check_ndim(inputs, supported_ndim) is False: + return False + elif isinstance(inputs[0], mdarray): + return True + else: + return check_type(inputs) + + +def tanh(x): + if all_ready((x,)): + y = _ideep4py.tanh.Forward(array(x)) # NOQA + else: + y = numpy.tanh(x) + + return y + + +def convolution2DParam(out_dims, dy, dx, sy, sx, ph, pw, pd, pr): + cp = conv2DParam() + cp.out_dims = intVector() + for d in out_dims: + cp.out_dims.push_back(d) + cp.dilate_y, cp.dilate_x = (dy - 1), (dx - 1) + cp.sy, cp.sx = sy, sx + cp.pad_lh, cp.pad_lw = ph, pw + cp.pad_rh, cp.pad_rw = pd, pr + return cp + + +def pooling2DParam(out_dims, kh, kw, sy, sx, ph, pw, pd, pr, algo): + pp = pol2DParam() + pp.out_dims = intVector() + for d in out_dims: + pp.out_dims.push_back(d) + pp.kh, pp.kw = kh, kw + pp.sy, pp.sx = sy, sx + pp.pad_lh, pp.pad_lw = ph, pw + pp.pad_rh, pp.pad_rw = pd, pr + pp.algo_kind = algo + return pp + + +pooling2DParam.pooling_max = pol2DParam.pooling_max +pooling2DParam.pooling_avg = pol2DParam.pooling_avg +pooling2DParam.pooling_avg_include_padding = \ + pol2DParam.pooling_avg_include_padding +pooling2DParam.pooling_avg_exclude_padding = \ + pol2DParam.pooling_avg_exclude_padding + + +def localResponseNormalizationParam(n, k, alpha, beta, algo): + lp = lrnParam() + lp.n = n + lp.k = k + lp.alpha = alpha + lp.beta = beta + lp.algo_kind = algo + return lp + + +localResponseNormalizationParam.lrn_across_channels = \ + lrnParam.lrn_across_channels +localResponseNormalizationParam.lrn_within_channel = \ + lrnParam.lrn_within_channel diff --git a/python/ideep4py/blas/sum.cc b/python/ideep4py/blas/sum.cc new file mode 100644 index 00000000..a3bcfd4b --- /dev/null +++ b/python/ideep4py/blas/sum.cc @@ -0,0 +1,498 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include +#include +#include +#include "tensor.h" +#include "sum.h" + +using namespace mkldnn; + +static inline bool optimized_format(Tensor *t) { + switch(t->format()) { + case mkldnn_nChw16c: + case mkldnn_nChw8c: + case mkldnn_OIhw8i8o: + case mkldnn_OIhw16i16o: + case mkldnn_OIhw8i16o2i: + case mkldnn_OIhw8o16i2o: + case mkldnn_OIhw8o8i: + case mkldnn_OIhw16o16i: + case mkldnn_Oihw8o: + case mkldnn_Oihw16o: + return true; + default: + return false; + } +} + +template +static T * sum_nChwXC_along_channel(T *src, mkldnn_memory_format_t format, + mkldnn_dims_t dims, vector axis, T *dst) { + int mb = dims[0], + ic = dims[1], + ih = dims[2], + iw = dims[3]; + const int cg = format == mkldnn_nChw16c ? 16 : 8; + int cn = ic / cg; + + int blk_nthr = omp_get_max_threads(), + blk_num = blk_nthr, + blk_len = mb / blk_num, + blk_len_ex = mb % blk_num; + + if (!blk_len) + blk_nthr = mb; + + T *buf = reinterpret_cast(new avx::byte[ic * blk_nthr * sizeof(T)]); + + # pragma omp parallel num_threads(blk_nthr) + { + int ithr = omp_get_thread_num(); + int blen = ithr < blk_len_ex ? blk_len + 1 : blk_len; + int bstart = ithr <= blk_len_ex ? (blk_len + 1) * ithr : + blk_len_ex * (blk_len + 1) + (ithr - blk_len_ex) * blk_len; + int bend = bstart + blen; + + T *loc_src = src + bstart * ic * ih * iw; + if ((cg == 16) && (((unsigned long)buf & 0xf) == 0) && (((unsigned long)loc_src & 0xf) == 0)) { + for (int b = bstart; b < bend; b++) { + T *loc_buf = buf + ithr * ic; + for (int c = 0; c < cn; c++) { + if (b == bstart) + for (int o = 0; o < cg; o++) + loc_buf[o] = 0; + for (int hw = 0; hw < ih * iw; hw++) { + __asm__( + "mov %0, %%rax\n" + "mov %1, %%rbx\n" + ".byte 0x62, 0xf1, 0x7c, 0x48, 0x10, 0x00\n" //vmovups (%%rax), %%zmm0 + ".byte 0x62, 0xf1, 0x7c, 0x48, 0x58, 0x03\n" //vaddps (%%rbx), %%zmm0, %%zmm0 + ".byte 0x62, 0xf1, 0x7c, 0x48, 0x11, 0x00\n" //vmovups %%zmm0, (%%rax) + :"+r"(loc_buf) + :"r"(loc_src) + :"rax", "rbx" + ); + loc_src += cg; + } + + loc_buf += cg; + } + } + } else if ((cg == 8) && (((unsigned long)buf & 0x7) == 0) && (((unsigned long)loc_src & 0x7) == 0)) { + for (int b = bstart; b < bend; b++) { + T *loc_buf = buf + ithr * ic; + for (int c = 0; c < cn; c++) { + if (b == bstart) + for (int o = 0; o < cg; o++) + loc_buf[o] = 0; + for (int hw = 0; hw < ih * iw; hw++) { + __asm__( + "mov %0, %%rax\n" + "mov %1, %%rbx\n" + ".byte 0xc5, 0xfc, 0x10, 0x00\n" //vmovups (%%rax), %%ymm0 + ".byte 0xc5, 0xfc, 0x58, 0x03\n" //vaddps (%%rbx), %%ymm0, %%ymm0 + ".byte 0xc5, 0xfc, 0x11, 0x00\n" //vmovups %%ymm0, (%rax) + :"+r"(loc_buf) + :"r"(loc_src) + :"rax", "rbx" + ); + loc_src += cg; + } + + loc_buf += cg; + } + } + } else { + for (int b = bstart; b < bend; b++) { + T *loc_buf = buf + ithr * ic; + for (int c = 0; c < cn; c++) { + if (b == bstart) + for (int o = 0; o < cg; o++) + loc_buf[o] = 0; + + for (int hw = 0; hw < ih * iw; hw++) { + for (int o = 0; o < cg; o++) + loc_buf[o] += loc_src[o]; + loc_src += cg; + } + + loc_buf += cg; + } + } + } + + } + + // Allreduce + int c_nthr = omp_get_max_threads(), + c_num = c_nthr, + c_len = ic / c_num, + c_len_ex = ic % c_num; + + if (!c_len) + c_nthr = ic; + + # pragma omp parallel num_threads(c_nthr) + { + int ithr = omp_get_thread_num(); + int clen = ithr < c_len_ex ? c_len + 1 : c_len; + int cstart = ithr <= c_len_ex ? (c_len + 1) * ithr : + c_len_ex * (c_len + 1) + (ithr - c_len_ex) * c_len; + int cend = cstart + clen; + + for (int c = cstart; c < cend; c++) + dst[c] = 0; + + for (int i = 0; i < blk_nthr; i++) { + T *loc_buf = buf + i * ic; + for (int c = cstart; c < cend; c++) + dst[c] += loc_buf[c]; + } + } + + delete(reinterpret_cast(buf)); + + return dst; +} + +// 4 dimensions(NCHW/OIHW) opitimzation for mkldnn backend only. +Tensor * sum_opt_along_axis(Tensor *src, vector axis) { + int axises = axis.size(); + vector valid_axis_4dim = {0, 2, 3}; + + if (src->ndims() != 4 || axises != 3) { + return nullptr; + } + + auto valid_axis = [](int axises, + vector axis, + vector valid_axis) -> bool { + for (int i = 0; i < axises; i++) { + if (valid_axis[i] != axis[i]) + return false; + } + return true; + }; + + try { + switch (src->format()) { + case mkldnn_nChw8c: + if (!valid_axis(axises, axis, valid_axis_4dim)) + throw std::runtime_error( + "Invalid axis in tensor sum along axis "); + break; + case mkldnn_nChw16c: + if (!valid_axis(axises, axis, valid_axis_4dim)) + throw std::runtime_error( + "Invalid axis in tensor sum along axis "); + break; + default: + throw std::runtime_error( + "Invalid format in tensor sum along axis"); + break; + } + } catch (std::runtime_error &e) { + (void)e; + return nullptr; + } + + Tensor *dst = nullptr; + try { + switch (src->type()) { + case FLOAT32: + dst = new Tensor(1, {src->desc().data.dims[1]}, src->type()); + sum_nChwXC_along_channel(static_cast(src->data()), src->format(), + src->desc().data.dims, axis, static_cast(dst->data())); + break; + case SINT32: + dst = new Tensor(1, {src->desc().data.dims[1]}, src->type()); + sum_nChwXC_along_channel(static_cast(src->data()), src->format(), + src->desc().data.dims, axis, static_cast(dst->data())); + break; + case SINT16: + dst = new Tensor(1, {src->desc().data.dims[1]}, src->type()); + sum_nChwXC_along_channel(static_cast(src->data()), src->format(), + src->desc().data.dims, axis, static_cast(dst->data())); + break; + case SINT8: + dst = new Tensor(1, {src->desc().data.dims[1]}, src->type()); + sum_nChwXC_along_channel(static_cast(src->data()), src->format(), + src->desc().data.dims, axis, static_cast(dst->data())); + break; + case UINT8: + dst = new Tensor(1, {src->desc().data.dims[1]}, src->type()); + sum_nChwXC_along_channel(static_cast(src->data()), src->format(), + src->desc().data.dims, axis, static_cast(dst->data())); + break; + default: + throw std::runtime_error( + "Invalid dtype in tensor opt sum along axis"); + break; + } + } catch (std::runtime_error &e) { + (void)e; + return nullptr; + } + + return dst; +} + +// Less optimization gained in case of first dimension in small size +template +static T * sum_along_axis(T *src, int src_ndims, mkldnn_dims_t src_dims, + vector axis, vector dst_dims, T *dst) { + int tail = 1; + for (int d = 1; d < src_ndims; d++) + tail *= src_dims[d]; + + bool along_mb = false; + for (int a = 0; a < axis.size(); a++) { + if (axis[a] == 0) { + along_mb = true; + break; + } + } + + int gbl_ws_size = 1; + for (int d = 1; d < src_ndims; d++) { + int a = 0; + for (; a < axis.size(); a++) + if (d == axis[a]) + break; + + if (a >= axis.size()) + gbl_ws_size *= src_dims[d]; + } + + int mb = src_dims[0]; + int blk_nthr = omp_get_max_threads(), + blk_num = blk_nthr, + blk_len = mb / blk_num, + blk_len_ex = mb % blk_num; + + if (!blk_len) + blk_nthr = mb; + + T *gbl_ws[blk_nthr]; + # pragma omp parallel num_threads(blk_nthr) + { + int ithr = omp_get_thread_num(); + int blen = ithr < blk_len_ex ? blk_len + 1 : blk_len; + int bstart = ithr <= blk_len_ex ? (blk_len + 1) * ithr : + blk_len_ex * (blk_len + 1) + (ithr - blk_len_ex) * blk_len; + int bend = bstart + blen; + + T *loc_ws[blen]; + for (int b = bstart; b < bend; b++) { + T *loc_src = src + b * tail; + T *cur_src = loc_src; + + // Intialize for new blk + vector cur_dims; + for (int d = 0; d < src_ndims; d++) + cur_dims.push_back(src_dims[d]); + + vector cur_axis; + for (int a = 0; a < axis.size(); a++) + if (axis[a] != 0) + cur_axis.insert(cur_axis.begin(), axis[a]); + + // Sum along axis[a] + for (int a = 0; a < cur_axis.size(); a++) { + + int cur_fore = 1; + for (int d = 1; d < cur_axis[a]; d++) + cur_fore *= cur_dims[d]; + + int cur_tail = 1; + for (int d = cur_axis[a] + 1; d < cur_dims.size(); d++) + cur_tail *= cur_dims[d]; + + int cur_ws_size = cur_fore * cur_tail; + T *ws = reinterpret_cast(new avx::byte[cur_ws_size * sizeof(T)]); + for (int o = 0; o < cur_ws_size; o++) ws[o] = 0; + + // kernel + for (int w = 0; w < cur_ws_size; w++) { + for (int t = 0; t < cur_dims[cur_axis[a]]; t++) { + ws[w] += cur_src[w + t * cur_tail]; + } + } + + // adjust dims and cur_axis for sum in next axis + cur_dims.erase(cur_dims.begin() + cur_axis[a]); + for (int _a = a + 1; _a < cur_axis.size(); _a++) { + if (cur_axis[_a] > cur_axis[a]) + cur_axis[_a] -= 1; + } + + // refresh buffer + if (cur_src != loc_src) delete(reinterpret_cast(cur_src)); + if (a == cur_axis.size() - 1) loc_ws[b - bstart] = ws; + + cur_src = ws; + } + } + + if (along_mb) { + // local allreduce + if (src_ndims == 2 && axis.size() == 1 && axis[0] == 0) { + loc_ws[0] = reinterpret_cast(new avx::byte[tail * sizeof(T)]); + for (int o = 0; o < tail; o++) + loc_ws[0][o] = 0; + for (int b = bstart; b < bend; b++) { + T *loc_src = src + b * tail; + for (int o = 0; o < tail; o++) + loc_ws[0][o] += loc_src[o]; + } + } else { + for (int b = 1; b < blen; b++) { + for (int o = 0; o < gbl_ws_size; o++) + loc_ws[0][o] += loc_ws[b][o]; + delete(reinterpret_cast(loc_ws[b])); + } + } + + gbl_ws[ithr] = loc_ws[0]; + } else { + // cpy to dst + for (int b = bstart; b < bend; b++) { + for (int o = 0; o < gbl_ws_size; o++) + dst[b * gbl_ws_size + o] = loc_ws[b - bstart][o]; + delete(reinterpret_cast(loc_ws[b - bstart])); + } + } + } + + if (along_mb) { + // global allreduce + int c_nthr = omp_get_max_threads(), + c_num = c_nthr, + c_len = gbl_ws_size / c_num, + c_len_ex = gbl_ws_size % c_num; + + if (!c_len) + c_nthr = gbl_ws_size; + + # pragma omp parallel num_threads(c_nthr) + { + int ithr = omp_get_thread_num(); + int clen = ithr < c_len_ex ? c_len + 1 : c_len; + int cstart = ithr <= c_len_ex ? (c_len + 1) * ithr : + c_len_ex * (c_len + 1) + (ithr - c_len_ex) * c_len; + int cend = cstart + clen; + + for (int c = cstart; c < cend; c++) + dst[c] = 0; + + for (int i = 0; i < blk_nthr; i++) { + T *loc_buf = gbl_ws[i]; + for (int c = cstart; c < cend; c++) + dst[c] += loc_buf[c]; + } + } + + for (int i = 0; i < blk_nthr; i++) + delete(reinterpret_cast(gbl_ws[i])); + } + + return dst; +} + +Tensor * sum_common_along_axis(Tensor *src, vector axis) { + auto dims = src->desc().data.dims; + vector o_dims; + int o_ndims = src->ndims() - axis.size(); + + // TODO: Support sum all + if ((o_ndims != 1 && o_ndims != 2 && o_ndims != 4) || + axis.size() == 0) + return nullptr; + + for (int d = 0; d < src->ndims(); d++) { + unsigned a = 0; for (; a < axis.size(); a++) { + if (d == axis[a]) + break; + } + + if (a >= axis.size()) + o_dims.push_back(dims[d]); + } + + Tensor *dst = nullptr; + try { + switch (src->type()) { + case FLOAT32: + dst = new Tensor(o_ndims, o_dims, src->type()); + sum_along_axis(static_cast(src->data()), + src->ndims(), src->desc().data.dims, axis, + o_dims, static_cast(dst->data())); + break; + case SINT32: + dst = new Tensor(o_ndims, o_dims, src->type()); + sum_along_axis(static_cast(src->data()), + src->ndims(), src->desc().data.dims, axis, + o_dims, static_cast(dst->data())); + break; + case SINT16: + dst = new Tensor(o_ndims, o_dims, src->type()); + sum_along_axis(static_cast(src->data()), + src->ndims(), src->desc().data.dims, axis, + o_dims, static_cast(dst->data())); + break; + case SINT8: + dst = new Tensor(o_ndims, o_dims, src->type()); + sum_along_axis(static_cast(src->data()), + src->ndims(), src->desc().data.dims, axis, + o_dims, static_cast(dst->data())); + break; + case UINT8: + dst = new Tensor(o_ndims, o_dims, src->type()); + sum_along_axis(static_cast(src->data()), + src->ndims(), src->desc().data.dims, axis, + o_dims, static_cast(dst->data())); + break; + default: + throw std::runtime_error( + "Invalid dtype in tensor sum common along axis"); + break; + } + } catch (std::runtime_error &e) { + (void)e; + return nullptr; + } + + return dst; +} + +Tensor * blas_sum(Tensor *src, vector axis) { + if (optimized_format(src)) + return sum_opt_along_axis(src, axis); + else + return sum_common_along_axis(src, axis); +} diff --git a/python/ideep4py/common/common.cc b/python/ideep4py/common/common.cc new file mode 100644 index 00000000..9c795d0d --- /dev/null +++ b/python/ideep4py/common/common.cc @@ -0,0 +1,42 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include +#include +#include "mkldnn.hpp" +#include "common.h" +#include "cpu_info.h" + +using namespace mkldnn; + +engine cpu_engine(engine::cpu, 0); +unsigned char dummy[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); +#define DUMMY_VAL 0xcc + +bool enable_prim_reuse = true; // whether reuse primitive +bool enable_mem_reuse = true; // whether reuse output memory to next layer + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/common/common.h b/python/ideep4py/common/common.h new file mode 100644 index 00000000..e190e40f --- /dev/null +++ b/python/ideep4py/common/common.h @@ -0,0 +1,36 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _COMMON_H_ +#define _COMMON_H_ + +#include + +const mkldnn::memory::dims NONE_DIMS = {}; +#define PAGE_SIZE 4096 +extern unsigned char dummy[PAGE_SIZE]; +#endif // _COMMON_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/common/config.h b/python/ideep4py/common/config.h new file mode 100644 index 00000000..40989fa1 --- /dev/null +++ b/python/ideep4py/common/config.h @@ -0,0 +1,28 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _CONFIG_H_ +#define _CONFIG_H_ + +#endif // _CONFIG_H_ diff --git a/python/ideep4py/common/cpu_info.cc b/python/ideep4py/common/cpu_info.cc new file mode 100644 index 00000000..7a6c3854 --- /dev/null +++ b/python/ideep4py/common/cpu_info.cc @@ -0,0 +1,465 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include + +#include +#include +#include +#include + +#include "cpu_info.h" + +Processor::Processor() { + processor = 0; + physicalId = 0; + siblings = 0; + coreId = 0; + cpuCores = 0; + speedMHz = 0; +} + +CpuInfo::CpuInfo() { + loadContentFromFile("/proc/cpuinfo"); +} + +CpuInfo::CpuInfo(const char *content) { + loadContent(content); +} + +void CpuInfo::loadContentFromFile(const char *fileName) { + std::ifstream file(fileName); + std::string content( + (std::istreambuf_iterator(file)), + (std::istreambuf_iterator())); + + loadContent(content.c_str()); +} + +void CpuInfo::loadContent(const char *content) { + size_t contentLength = strlen(content); + char *contentCopy = new char[contentLength + 1]; + snprintf(contentCopy, contentLength + 1, "%s", content); + + parseLines(contentCopy); + + fileContentBegin = contentCopy; + fileContentEnd = &contentCopy[contentLength]; + currentLine = NULL; +} + +CpuInfo::~CpuInfo() { + delete [] fileContentBegin; +} + +void CpuInfo::parseLines(char *content) { + for (; *content; content++) { + if (*content == '\n') { + *content = '\0'; + } + } +} + +const char *CpuInfo::getFirstLine() { + currentLine = fileContentBegin < fileContentEnd ? fileContentBegin : NULL; + return getNextLine(); +} + +const char *CpuInfo::getNextLine() { + if (!currentLine) { + return NULL; + } + + const char *savedCurrentLine = currentLine; + while (*(currentLine++)) { + } + + if (currentLine >= fileContentEnd) { + currentLine = NULL; + } + + return savedCurrentLine; +} + +Collection::Collection(CpuInfoInterface *cpuInfo) : cpuInfo(*cpuInfo) { + totalNumberOfSockets = 0; + totalNumberOfCpuCores = 0; + currentProcessor = NULL; + + processors.reserve(96); + + parseCpuInfo(); + collectBasicCpuInformation(); +} + +unsigned Collection::getProcessorSpeedMHz() { + return processors.size() ? processors[0].speedMHz : 0; +} + +unsigned Collection::getTotalNumberOfSockets() { + return totalNumberOfSockets; +} + +unsigned Collection::getTotalNumberOfCpuCores() { + return totalNumberOfCpuCores; +} + +unsigned Collection::getNumberOfProcessors() { + return processors.size(); +} + +const Processor &Collection::getProcessor(unsigned processorId) { + return processors[processorId]; +} + +void Collection::parseCpuInfo() { + const char *cpuInfoLine = cpuInfo.getFirstLine(); + for (; cpuInfoLine; cpuInfoLine = cpuInfo.getNextLine()) { + parseCpuInfoLine(cpuInfoLine); + } +} + +void Collection::parseCpuInfoLine(const char *cpuInfoLine) { + int delimiterPosition = strcspn(cpuInfoLine, ":"); + + if (cpuInfoLine[delimiterPosition] == '\0') { + currentProcessor = NULL; + } else { + parseValue(cpuInfoLine, &cpuInfoLine[delimiterPosition + 2]); + } +} + +void Collection::parseValue(const char *fieldName, const char *valueString) { + if (!currentProcessor) { + appendNewProcessor(); + } + + if (beginsWith(fieldName, "processor")) { + currentProcessor->processor = parseInteger(valueString); + } + + if (beginsWith(fieldName, "physical id")) { + currentProcessor->physicalId = parseInteger(valueString); + } + + if (beginsWith(fieldName, "siblings")) { + currentProcessor->siblings = parseInteger(valueString); + } + + if (beginsWith(fieldName, "core id")) { + currentProcessor->coreId = parseInteger(valueString); + } + + if (beginsWith(fieldName, "cpu cores")) { + currentProcessor->cpuCores = parseInteger(valueString); + } + + if (beginsWith(fieldName, "model name")) { + currentProcessor->speedMHz = extractSpeedFromModelName(valueString); + } +} + +void Collection::appendNewProcessor() { + processors.push_back(Processor()); + currentProcessor = &processors.back(); +} + +bool Collection::beginsWith(const char *lineBuffer, const char *text) const { + while (*text) { + if (*(lineBuffer++) != *(text++)) { + return false; + } + } + + return true; +} + +unsigned Collection::parseInteger(const char *text) const { + return atol(text); +} + +/* Function extracts CPU speed from model name. If unit is not set it is + assumed that values below 100 are specified in GHz, otherwise MHz */ +unsigned Collection::extractSpeedFromModelName(const char *text) const { + text = strstr(text, "@"); + if (!text) { + return 0; + } + + char *unit; + double speed = strtod(&text[1], &unit); + + while (isspace(*unit)) { + unit++; + } + + bool isMHz = !strncmp(unit, "MHz", 3); + bool isGHz = !strncmp(unit, "GHz", 3); + bool isGHzPossible = (speed < 100); + + if (isGHz || (isGHzPossible && !isMHz)) { + return 1000 * speed + 0.5; + } else { + return speed + 0.5; + } +} + +void Collection::collectBasicCpuInformation() { + std::set uniquePhysicalId; + std::vector::iterator processor = processors.begin(); + for (; processor != processors.end(); processor++) { + uniquePhysicalId.insert(processor->physicalId); + updateCpuInformation(*processor, uniquePhysicalId.size()); + } +} + +void Collection::updateCpuInformation(const Processor &processor, + unsigned numberOfUniquePhysicalId) { + if (totalNumberOfSockets == numberOfUniquePhysicalId) { + return; + } + + totalNumberOfSockets = numberOfUniquePhysicalId; + totalNumberOfCpuCores += processor.cpuCores; +} + +/* The OpenMpManager class is responsible for determining a set of all of + available CPU cores and delegating each core to perform other tasks. The + first of available cores is delegated for background threads, while other + remaining cores are dedicated for OpenMP threads. Each OpenMP thread owns + one core for exclusive use. The number of OpenMP threads is then limited + to the number of available cores minus one. The amount of CPU cores may + be limited by system eg. when numactl was used. */ + +#include +#include + +static const char *openMpEnvVars[] = { + "OMP_CANCELLATION", "OMP_DISPLAY_ENV", "OMP_DEFAULT_DEVICE", "OMP_DYNAMIC", + "OMP_MAX_ACTIVE_LEVELS", "OMP_MAX_TASK_PRIORITY", "OMP_NESTED", + "OMP_NUM_THREADS", "OMP_PROC_BIND", "OMP_PLACES", "OMP_STACKSIZE", + "OMP_SCHEDULE", "OMP_THREAD_LIMIT", "OMP_WAIT_POLICY", "GOMP_CPU_AFFINITY", + "GOMP_DEBUG", "GOMP_STACKSIZE", "GOMP_SPINCOUNT", "GOMP_RTEMS_THREAD_POOLS", + "KMP_AFFINITY", "KMP_NUM_THREADS", "MIC_KMP_AFFINITY", + "MIC_OMP_NUM_THREADS", "MIC_OMP_PROC_BIND", "PHI_KMP_AFFINITY", + "PHI_OMP_NUM_THREADS", "PHI_KMP_PLACE_THREADS", "MKL_NUM_THREADS", + "MKL_DYNAMIC", "MKL_DOMAIN_NUM_THREADS" +}; + +static const unsigned numberOfOpenMpEnvVars = + sizeof(openMpEnvVars) / sizeof(openMpEnvVars[0]); + +OpenMpManager::OpenMpManager(Collection *collection) : + mainThreadId(boost::this_thread::get_id()), + collection(*collection) { + getOpenMpEnvVars(); + getCurrentCpuSet(); + getCurrentCoreSet(); +} + +OpenMpManager &OpenMpManager::get_instance() { + static CpuInfo cpuInfo; + static Collection collection(&cpuInfo); + static OpenMpManager openMpManager(&collection); + return openMpManager; +} + +void OpenMpManager::setGpuEnabled() { + OpenMpManager &openMpManager = get_instance(); + openMpManager.isGpuEnabled = true; +} + +void OpenMpManager::setGpuDisabled() { + OpenMpManager &openMpManager = get_instance(); + openMpManager.isGpuEnabled = false; +} + +bool OpenMpManager::isMajorThread(boost::thread::id currentThread) { + OpenMpManager &openMpManager = get_instance(); + return (boost::this_thread::get_id() == openMpManager.mainThreadId); +} + +// Ideally bind given thread to secondary logical core, if +// only one thread exists then bind to primary one +void OpenMpManager::bindCurrentThreadToNonPrimaryCoreIfPossible() { + OpenMpManager &openMpManager = get_instance(); + if (openMpManager.isThreadsBindAllowed()) { + int totalNumberOfAvailableCores = CPU_COUNT(&openMpManager.currentCoreSet); + int logicalCoreToBindTo = totalNumberOfAvailableCores > 1 ? 1 : 0; + openMpManager.bindCurrentThreadToLogicalCoreCpus(logicalCoreToBindTo); + } +} + +void OpenMpManager::bindOpenMpThreads() { + OpenMpManager &openMpManager = get_instance(); + + if (!openMpManager.isThreadsBindAllowed()) + return; + + openMpManager.setOpenMpThreadNumberLimit(); + #pragma omp parallel + { + unsigned logicalCoreId = omp_get_thread_num(); + openMpManager.bindCurrentThreadToLogicalCoreCpu(logicalCoreId); + } +} + +void OpenMpManager::getOpenMpEnvVars() { + isAnyOpenMpEnvVarSpecified = false; + for (unsigned i = 0; i < numberOfOpenMpEnvVars; i++) { + if (getenv(openMpEnvVars[i])) { + isAnyOpenMpEnvVarSpecified = true; + } + } +} + +void OpenMpManager::getCurrentCpuSet() { + if (sched_getaffinity(0, sizeof(currentCpuSet), ¤tCpuSet)) { + getDefaultCpuSet(¤tCpuSet); + } +} + +void OpenMpManager::getDefaultCpuSet(cpu_set_t *defaultCpuSet) { + CPU_ZERO(defaultCpuSet); + unsigned numberOfProcessors = collection.getNumberOfProcessors(); + for (unsigned processorId = 0; processorId < numberOfProcessors; processorId++) { + CPU_SET(processorId, defaultCpuSet); + } +} + +/* Function getCurrentCoreSet() fills currentCoreSet variable with a set of + available CPUs, where only one CPU per core is chosen. When multiple CPUs + of single core are used, function is selecting only first one of all + available. */ + +void OpenMpManager::getCurrentCoreSet() { + unsigned numberOfProcessors = collection.getNumberOfProcessors(); + unsigned totalNumberOfCpuCores = collection.getTotalNumberOfCpuCores(); + + cpu_set_t usedCoreSet; + CPU_ZERO(&usedCoreSet); + CPU_ZERO(¤tCoreSet); + + for (unsigned processorId = 0; processorId < numberOfProcessors; processorId++) { + if (CPU_ISSET(processorId, ¤tCpuSet)) { + unsigned coreId = processorId % totalNumberOfCpuCores; + if (!CPU_ISSET(coreId, &usedCoreSet)) { + CPU_SET(coreId, &usedCoreSet); + CPU_SET(processorId, ¤tCoreSet); + } + } + } +} + +void OpenMpManager::selectAllCoreCpus(cpu_set_t *set, unsigned physicalCoreId) { + unsigned numberOfProcessors = collection.getNumberOfProcessors(); + unsigned totalNumberOfCpuCores = collection.getTotalNumberOfCpuCores(); + + unsigned processorId = physicalCoreId % totalNumberOfCpuCores; + while (processorId < numberOfProcessors) { + if (CPU_ISSET(processorId, ¤tCpuSet)) { + CPU_SET(processorId, set); + } + + processorId += totalNumberOfCpuCores; + } +} + +unsigned OpenMpManager::getPhysicalCoreId(unsigned logicalCoreId) { + unsigned numberOfProcessors = collection.getNumberOfProcessors(); + + for (unsigned processorId = 0; processorId < numberOfProcessors; processorId++) { + if (CPU_ISSET(processorId, ¤tCoreSet)) { + if (!logicalCoreId--) { + return processorId; + } + } + } + + LOG(FATAL) << "This should never happen!"; + return 0; +} + +bool OpenMpManager::isThreadsBindAllowed() { + return !isAnyOpenMpEnvVarSpecified && !isGpuEnabled; +} + +// Limit of threads to number of logical cores available +void OpenMpManager::setOpenMpThreadNumberLimit() { + omp_set_num_threads(CPU_COUNT(¤tCoreSet)); +} + +void OpenMpManager::bindCurrentThreadToLogicalCoreCpu(unsigned logicalCoreId) { + unsigned physicalCoreId = getPhysicalCoreId(logicalCoreId); + + cpu_set_t set; + CPU_ZERO(&set); + CPU_SET(physicalCoreId, &set); + sched_setaffinity(0, sizeof(set), &set); +} + +void OpenMpManager::bindCurrentThreadToLogicalCoreCpus(unsigned logicalCoreId) { + unsigned physicalCoreId = getPhysicalCoreId(logicalCoreId); + + cpu_set_t set; + CPU_ZERO(&set); + selectAllCoreCpus(&set, physicalCoreId); + sched_setaffinity(0, sizeof(set), &set); +} + +void OpenMpManager::printVerboseInformation() { + OpenMpManager &openMpManager = get_instance(); + + LOG(INFO) << "Processor speed [MHz]: " + << openMpManager.collection.getProcessorSpeedMHz(); + + LOG(INFO) << "Total number of sockets: " + << openMpManager.collection.getTotalNumberOfSockets(); + + LOG(INFO) << "Total number of CPU cores: " + << openMpManager.collection.getTotalNumberOfCpuCores(); + + LOG(INFO) << "Total number of processors: " + << openMpManager.collection.getNumberOfProcessors(); + + LOG(INFO) << "GPU is used: " + << (openMpManager.isGpuEnabled ? "yes" : "no"); + + LOG(INFO) << "OpenMP environmental variables are specified: " + << (openMpManager.isAnyOpenMpEnvVarSpecified ? "yes" : "no"); + + LOG(INFO) << "OpenMP thread bind allowed: " + << (openMpManager.isThreadsBindAllowed() ? "yes" : "no"); + + LOG(INFO) << "Number of OpenMP threads: " + << omp_get_max_threads(); +} + +unsigned OpenMpManager::getProcessorSpeedMHz() { + OpenMpManager &openMpManager = get_instance(); + return openMpManager.collection.getProcessorSpeedMHz(); +} + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/common/cpu_info.h b/python/ideep4py/common/cpu_info.h new file mode 100644 index 00000000..c8cd722a --- /dev/null +++ b/python/ideep4py/common/cpu_info.h @@ -0,0 +1,166 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _CPU_INFO_H +#define _CPU_INFO_H + +#include +#include +#include +#include +#include +#include +#include +//#include "utils.h" + +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&) = delete; \ + void operator=(const TypeName&) = delete + +struct Processor { + unsigned processor; + unsigned physicalId; + unsigned siblings; + unsigned coreId; + unsigned cpuCores; + unsigned speedMHz; + + Processor(); +}; + +class CpuInfoInterface { + public: + virtual ~CpuInfoInterface() {} + virtual const char *getFirstLine() = 0; + virtual const char *getNextLine() = 0; +}; + +class CpuInfo : public CpuInfoInterface { + public: + CpuInfo(); + explicit CpuInfo(const char *content); + virtual ~CpuInfo(); + + virtual const char *getFirstLine(); + virtual const char *getNextLine(); + + private: + const char *fileContentBegin; + const char *fileContentEnd; + const char *currentLine; + + void loadContentFromFile(const char *fileName); + void loadContent(const char *content); + void parseLines(char *content); + DISALLOW_COPY_AND_ASSIGN(CpuInfo); +}; + +class CollectionInterface { + public: + virtual ~CollectionInterface() {} + virtual unsigned getProcessorSpeedMHz() = 0; + virtual unsigned getTotalNumberOfSockets() = 0; + virtual unsigned getTotalNumberOfCpuCores() = 0; + virtual unsigned getNumberOfProcessors() = 0; + virtual const Processor &getProcessor(unsigned processorId) = 0; +}; + +class Collection : public CollectionInterface { + public: + explicit Collection(CpuInfoInterface *cpuInfo); + + virtual unsigned getProcessorSpeedMHz(); + virtual unsigned getTotalNumberOfSockets(); + virtual unsigned getTotalNumberOfCpuCores(); + virtual unsigned getNumberOfProcessors(); + virtual const Processor &getProcessor(unsigned processorId); + + private: + CpuInfoInterface &cpuInfo; + unsigned totalNumberOfSockets; + unsigned totalNumberOfCpuCores; + std::vector processors; + Processor *currentProcessor; + + Collection(const Collection &collection); + Collection &operator =(const Collection &collection); + + void parseCpuInfo(); + void parseCpuInfoLine(const char *cpuInfoLine); + void parseValue(const char *fieldName, const char *valueString); + void appendNewProcessor(); + bool beginsWith(const char *lineBuffer, const char *text) const; + unsigned parseInteger(const char *text) const; + unsigned extractSpeedFromModelName(const char *text) const; + + void collectBasicCpuInformation(); + void updateCpuInformation(const Processor &processor, + unsigned numberOfUniquePhysicalId); +}; + +class OpenMpManager { + public: + static void setGpuEnabled(); + static void setGpuDisabled(); + + static void bindCurrentThreadToNonPrimaryCoreIfPossible(); + + static void bindOpenMpThreads(); + static void printVerboseInformation(); + + static bool isMajorThread(boost::thread::id currentThread); + static unsigned getProcessorSpeedMHz(); + + private: + boost::thread::id mainThreadId; + Collection &collection; + + bool isGpuEnabled; + bool isAnyOpenMpEnvVarSpecified; + cpu_set_t currentCpuSet; + cpu_set_t currentCoreSet; + + explicit OpenMpManager(Collection *collection); + OpenMpManager(const OpenMpManager &openMpManager); + OpenMpManager &operator =(const OpenMpManager &openMpManager); + static OpenMpManager &get_instance(); + + void getOpenMpEnvVars(); + void getCurrentCpuSet(); + void getDefaultCpuSet(cpu_set_t *defaultCpuSet); + void getCurrentCoreSet(); + + void selectAllCoreCpus(cpu_set_t *set, unsigned physicalCoreId); + unsigned getPhysicalCoreId(unsigned logicalCoreId); + + bool isThreadsBindAllowed(); + void setOpenMpThreadNumberLimit(); + void bindCurrentThreadToLogicalCoreCpu(unsigned logicalCoreId); + void bindCurrentThreadToLogicalCoreCpus(unsigned logicalCoreId); +}; + +#endif // _CPU_INFO_H + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/common/utils.cc b/python/ideep4py/common/utils.cc new file mode 100644 index 00000000..4e497fad --- /dev/null +++ b/python/ideep4py/common/utils.cc @@ -0,0 +1,359 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include "utils.h" +#include + +#if defined(_MSC_VER) +static inline uint64_t __cpuidXfeature() +{ +#if (_MSC_VER > 1600) + return _xgetbv(0); +#else + uint32_t a, d; + __asm { + push edx + push ecx + push eax + xor ecx, ecx + _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 + mov a, eax + mov d, edx + pop eax + pop ecx + pop edx + } + return (((uint64_t)d << 32) | a); +#endif +} + +#if (_MSC_VER < 1400) +static inline __declspec(naked) void __cpuid(int[4] result, int level) +{ + __asm { + push ebx + push edi + mov eax, dword ptr [esp + 4 * 4] // level + cpuid + mov edi, dword ptr [esp + 4 * 3] // result + mov dword ptr [edi + 4 * 0], eax // result[0] + mov dword ptr [edi + 4 * 1], ebx // result[1] + mov dword ptr [edi + 4 * 2], ecx // result[2] + mov dword ptr [edi + 4 * 3], edx // result[3] + pop edi + pop ebx + ret + } +} + +static inline __declspec(naked) void __cpuidex(int[4] result, int level, int count) +{ + __asm { + push ebx + push ecx + push edi + mov ecx, dword ptr [esp + 4 * 6] // count + mov eax, dword ptr [esp + 4 * 5] // level + cpuid + mov edi, dword ptr [esp + 4 * 4] // result + mov dword ptr [edi + 4 * 0], eax // result[0] + mov dword ptr [edi + 4 * 1], ebx // result[1] + mov dword ptr [edi + 4 * 2], ecx // result[2] + mov dword ptr [edi + 4 * 3], edx // result[3] + pop edi + pop ecx + pop ebx + ret + } +} + +#else +#include + +#endif + +#else // Non-MSC +static inline uint64_t __cpuidXfeature() +{ + uint32_t eax, edx; +#if (((__GNUC__) > 4) || (((__GNUC__) == 4) && ((__GNUC_MINOR_) > 2))) + __asm__ volatile("xgetbv" + : "=a"(eax), "=d"(edx) + : "c"(0)); +#else + __asm__ volatile(".byte 0x0f, 0x01, 0xd0" + : "=a"(eax), "=d"(edx) + : "c"(0)); +#endif + return (((uint64_t)edx << 32) | eax); +} + +#if defined(__APPLE__) +#define __cpuid(a, b, c, d, level) \ + __asm__ __volatile__( \ + "pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" \ + : "=a"(a), "=S"(b), "=c"(c), "=d"(d) \ + : "0"(level)) + +#define __cpuid_count(a, b, c, d, level, count) \ + __asm__ __volatile__( \ + "pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" \ + : "=a"(a), "=S"(b), "=c"(c), "=d"(d) \ + : "0"(level), "2"(count)) + +#else // Non-APPLE +#define __cpuid(a, b, c, d, level) \ + __asm__ __volatile__( \ + "cpuid\n" \ + : "=a"(a), "=b"(b), "=c"(c), "=d"(d) \ + : "0"(level)) + +#define __cpuid_count(a, b, c, d, level, count) \ + __asm__ __volatile__( \ + "cpuid\n" \ + : "=a"(a), "=b"(b), "=c"(c), "=d"(d) \ + : "0"(level), "2"(count)) +#endif +#endif + +static inline void get_cpu_feature(uint32_t level, uint32_t result[4]) +{ +#ifdef _MSC_VER + __cpuid(reinterpret_cast(result), level); +#else + __cpuid(result[0], result[1], result[2], result[3], level); +#endif +} + +static inline void get_cpu_feature_ext(uint32_t level, uint32_t count, uint32_t result[4]) +{ +#ifdef _MSC_VER + __cpuidex(reinterpret_cast(result), level, count); +#else + __cpuid_count(result[0], result[1], result[2], result[3], level, count); +#endif +} + +class CpuFeatures +{ +public: + static const uint64_t f_NONE = uint64_t(0); + static const uint64_t f_MMX = uint64_t(1) << 0; + static const uint64_t f_MMX2 = uint64_t(1) << 1; + static const uint64_t f_CMOV = uint64_t(1) << 2; + static const uint64_t f_SSE = uint64_t(1) << 3; + static const uint64_t f_SSE2 = uint64_t(1) << 4; + static const uint64_t f_SSE3 = uint64_t(1) << 5; + static const uint64_t f_SSSE3 = uint64_t(1) << 6; + static const uint64_t f_SSE41 = uint64_t(1) << 7; + static const uint64_t f_SSE42 = uint64_t(1) << 8; + static const uint64_t f_POPCNT = uint64_t(1) << 9; + static const uint64_t f_AESNI = uint64_t(1) << 10; + static const uint64_t f_SSE5 = uint64_t(1) << 11; + static const uint64_t f_OSXSAVE = uint64_t(1) << 12; + static const uint64_t f_PCLMULQDQ = uint64_t(1) << 13; + static const uint64_t f_AVX = uint64_t(1) << 14; + static const uint64_t f_FMA = uint64_t(1) << 15; + static const uint64_t f_SSE4a = uint64_t(1) << 16; + static const uint64_t f_RDTSCP = uint64_t(1) << 17; + static const uint64_t f_AVX2 = uint64_t(1) << 18; + static const uint64_t f_BMI1 = uint64_t(1) << 19; + static const uint64_t f_BMI2 = uint64_t(1) << 20; + static const uint64_t f_LZCNT = uint64_t(1) << 21; + static const uint64_t f_ENHANCED_REP = uint64_t(1) << 22; + static const uint64_t f_RDRAND = uint64_t(1) << 23; + static const uint64_t f_ADX = uint64_t(1) << 24; + static const uint64_t f_RDSEED = uint64_t(1) << 25; + static const uint64_t f_SMAP = uint64_t(1) << 26; + static const uint64_t f_HLE = uint64_t(1) << 27; + static const uint64_t f_RTM = uint64_t(1) << 28; + static const uint64_t f_F16C = uint64_t(1) << 29; + static const uint64_t f_MOVBE = uint64_t(1) << 30; + static const uint64_t f_AVX512F = uint64_t(1) << 31; + static const uint64_t f_AVX512DQ = uint64_t(1) << 32; + static const uint64_t f_AVX512IFMA = uint64_t(1) << 33; + static const uint64_t f_AVX512PF = uint64_t(1) << 34; + static const uint64_t f_AVX512ER = uint64_t(1) << 35; + static const uint64_t f_AVX512CD = uint64_t(1) << 36; + static const uint64_t f_AVX512BW = uint64_t(1) << 37; + static const uint64_t f_AVX512VL = uint64_t(1) << 38; + static const uint64_t f_AVX512VBMI = uint64_t(1) << 39; + static const uint64_t f_AVX512_4VNNIW = uint64_t(1) << 40; + static const uint64_t f_AVX512_4FMAPS = uint64_t(1) << 41; + static const uint64_t f_PREFETCHWT1 = uint64_t(1) << 42; + + static const uint32_t any = 0; + static const uint32_t sse42 = 1; + static const uint32_t avx = 2; + static const uint32_t avx2 = 3; + static const uint32_t avx512_comm = 4; + static const uint32_t avx512_core = 5; + static const uint32_t avx512_mic = 6; + static const uint32_t avx512_mic_4ops = 7; + + CpuFeatures() + { + features = f_NONE; + uint32_t result[4] = {0}; + + get_cpu_feature(0x80000001, result); + if (result[2] & (1U << 5)) features |= f_LZCNT; + if (result[3] & (1U << 27)) features |= f_RDTSCP; + + get_cpu_feature(1, result); + if (result[2] & (1U << 0)) features |= f_SSE3; + if (result[2] & (1U << 1)) features |= f_PCLMULQDQ; + if (result[2] & (1U << 9)) features |= f_SSSE3; + if (result[2] & (1U << 19)) features |= f_SSE41; + if (result[2] & (1U << 20)) features |= f_SSE42; + if (result[2] & (1U << 22)) features |= f_MOVBE; + if (result[2] & (1U << 23)) features |= f_POPCNT; + if (result[2] & (1U << 25)) features |= f_AESNI; + if (result[2] & (1U << 27)) features |= f_OSXSAVE; + if (result[2] & (1U << 30)) features |= f_RDRAND; + if (result[2] & (1U << 29)) features |= f_F16C; + if (result[3] & (1U << 15)) features |= f_CMOV; + if (result[3] & (1U << 23)) features |= f_MMX; + if (result[3] & (1U << 25)) features |= f_MMX2 | f_SSE; + if (result[3] & (1U << 26)) features |= f_SSE2; + + if (features & f_OSXSAVE) { + uint64_t x_enabled = __cpuidXfeature(); + if ((x_enabled & 0x6) == 0x6) { + if (result[2] & (1U << 28)) features |= f_AVX; + if (result[2] & (1U << 12)) features |= f_FMA; + if (((x_enabled >> 5) & 0x7) == 0x7) { + get_cpu_feature_ext(0x7, 0x0, result); + if (result[1] & (1U << 16)) { + features |= f_AVX512F; + if (result[1] & (1U << 17)) features |= f_AVX512DQ; + if (result[1] & (1U << 21)) features |= f_AVX512IFMA; + if (result[1] & (1U << 26)) features |= f_AVX512PF; + if (result[1] & (1U << 27)) features |= f_AVX512ER; + if (result[1] & (1U << 28)) features |= f_AVX512CD; + if (result[1] & (1U << 30)) features |= f_AVX512BW; + if (result[1] & (1U << 31)) features |= f_AVX512VL; + if (result[2] & (1U << 1)) features |= f_AVX512VBMI; + if (result[3] & (1U << 2)) features |= f_AVX512_4VNNIW; + if (result[3] & (1U << 3)) features |= f_AVX512_4FMAPS; + } + } + } + } + + get_cpu_feature(0x0, result); + if (result[0] >= 7) { + get_cpu_feature_ext(0x7, 0x0, result); + if ((features & f_AVX) && (result[1] & 0x20)) features |= f_AVX2; + if (result[1] & (1U << 3)) features |= f_BMI1; + if (result[1] & (1U << 8)) features |= f_BMI2; + if (result[1] & (1U << 9)) features |= f_ENHANCED_REP; + if (result[1] & (1U << 18)) features |= f_RDSEED; + if (result[1] & (1U << 19)) features |= f_ADX; + if (result[1] & (1U << 20)) features |= f_SMAP; + if (result[1] & (1U << 4)) features |= f_HLE; + if (result[1] & (1U << 11)) features |= f_RTM; + if (result[2] & (1U << 0)) features |= f_PREFETCHWT1; + } + + } + + bool has_feature(uint64_t f) + { + return (features & f) ? true : false; + } + + bool is_supported(const uint32_t cpu_isa) + { + switch (cpu_isa) { + case sse42: + return has_feature(f_SSE42); + case avx: + return has_feature(f_AVX); + case avx2: + return has_feature(f_AVX2); + case avx512_comm: + return has_feature(f_AVX512F); + case avx512_core: + return has_feature(f_AVX512F) + && has_feature(f_AVX512BW) + && has_feature(f_AVX512VL) + && has_feature(f_AVX512DQ); + case avx512_mic: + return has_feature(f_AVX512F) + && has_feature(f_AVX512CD) + && has_feature(f_AVX512ER) + && has_feature(f_AVX512PF); + case avx512_mic_4ops: + return is_supported(avx512_mic) + && has_feature(f_AVX512_4FMAPS) + && has_feature(f_AVX512_4VNNIW); + case any: + return true; + default: + return false; + } + + return false; + } + +private: + uint64_t features; +}; + +memory::format get_desired_format(int channel) +{ + CpuFeatures cpu_f; + memory::format fmt_desired = memory::format::any; + + if (cpu_f.is_supported(CpuFeatures::avx512_comm) && (channel % 16) == 0) { + fmt_desired = memory::format::nChw16c; + } else if (cpu_f.is_supported(CpuFeatures::avx2) && (channel % 8) == 0) { + fmt_desired = memory::format::nChw8c; + } else { + fmt_desired = memory::format::nchw; + } + return fmt_desired; +} + +memory::format get_desired_format_weight(int channel0, int channel1) +{ + CpuFeatures cpu_f; + memory::format fmt_desired = memory::format::any; + + if (cpu_f.is_supported(CpuFeatures::avx512_comm) && (channel0 % 16) == 0) { + if (channel1 % 16 == 0) + fmt_desired = memory::format::OIhw16i16o; + else + fmt_desired = memory::format::Oihw16o; + } else if (cpu_f.is_supported(CpuFeatures::avx2) && (channel0 % 8) == 0) { + if (channel1 % 8 == 0) + fmt_desired = memory::format::OIhw8i8o; + else + fmt_desired = memory::format::Ohwi8o; + } else { + fmt_desired = memory::format::nchw; + } + return fmt_desired; +} diff --git a/python/ideep4py/common/utils.h b/python/ideep4py/common/utils.h new file mode 100644 index 00000000..4856520c --- /dev/null +++ b/python/ideep4py/common/utils.h @@ -0,0 +1,202 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _UTILS_H_ +#define _UTILS_H_ + +#include +#include +#include +#include "op_param.h" +#include "omp.h" +using namespace mkldnn; + +#define GET_PTR(t, p, offset) reinterpret_cast( reinterpret_cast(p) +static_cast(offset) ) + +memory::format get_desired_format(int channel); +memory::format get_desired_format_weight(int channel0, int channel1); +template +void eltwise_multiply(T* x1, T* x2, T* y, size_t n) { +#pragma omp parallel for schedule(static) + for (size_t i = 0; i < n; ++i) { + y[i] = x1[i] * x2[i]; + } +} + +// +//// map C type with mkldnn's +//// float -> memory::data_type::f32 +//// int -> memory::data_type::s32 +//// int16_t -> memory::data_type::s16 +//// int8_t -> memory::data_type::s8 +//// uint8_t -> memory::data_type::u8 +// +template +static inline mkldnn::memory::data_type memory_data_type() { + if (typeid(T) == typeid(float)) + return mkldnn::memory::data_type::f32; + else if (typeid(T) == typeid(int)) + return mkldnn::memory::data_type::s32; + else if (typeid(T) == typeid(int16_t)) + return mkldnn::memory::data_type::s16; + else if (typeid(T) == typeid(int8_t)) + return mkldnn::memory::data_type::s8; + else if (typeid(T) == typeid(uint8_t)) + return mkldnn::memory::data_type::u8; + + LOG(ERROR) << "Not support type"; + return mkldnn::memory::data_type::data_undef; +} + +// utils function conver int/double/bool/dims/ to string +static inline std::string int_to_string(int value) { + std::ostringstream os; + os << std::hex << "I" << value << "_"; + return os.str(); +} + +static inline std::string double_to_string(double value) { + std::ostringstream os; + os << "D" << value << "_"; + return os.str(); +} + +static inline std::string float_to_string(float value) { + std::ostringstream os; + os << "F" << value << "_"; + return os.str(); +} + +static inline std::string bool_to_string(bool value) { + std::ostringstream os; + os << "B" << value << "_"; + return os.str(); +} + +static inline std::string dims_to_string(mkldnn::memory::dims dims) { + std::ostringstream os; + os << "DIMS:"; + for (unsigned int i = 0; i < dims.size(); i++) + os << dims[i] << ","; + os << ";"; + return os.str(); +} + +static inline std::string long_to_string(size_t value) { + std::ostringstream os; + os << std::hex << "L" << value << "_"; + return os.str(); +} + +static inline mkldnn::algorithm pooling_algo_convert(pooling_param_t::algorithm input) { + switch(input) { + case pooling_param_t::algorithm::pooling_max: + return mkldnn::pooling_max; + case pooling_param_t::algorithm::pooling_avg: + return mkldnn::pooling_avg; + case pooling_param_t::algorithm::pooling_avg_include_padding: + return mkldnn::pooling_avg_include_padding; + case pooling_param_t::algorithm::pooling_avg_exclude_padding: + return mkldnn::pooling_avg_exclude_padding; + default: + LOG(ERROR) << "Not a valid pooling algo"; + return mkldnn::pooling_max; + } +} + +static inline mkldnn::algorithm lrn_algo_convert(lrn_param_t::algorithm input) { + switch(input) { + case lrn_param_t::algorithm::lrn_across_channels: + return mkldnn::lrn_across_channels; + case lrn_param_t::algorithm::lrn_within_channel: + return mkldnn::lrn_within_channel; + default: + LOG(ERROR) << "Not a valid lrn algo"; + return mkldnn::lrn_across_channels; + } +} + +template +inline T div_up(const T a, const U b) { + assert(b); + return(a + b - 1) / b; +} +template +inline void balance211(T n, U team, U tid, T &n_start, T &n_end) { + T n_min = 1; + T &n_my = n_end; + if (team <= 1 || n == 0) { + n_start = 0; + n_my = n; + } else if (n_min == 1) { + // team = T1 + T2 + // n = T1*n1 + T2*n2 (n1 - n2 = 1) + T n1 = div_up(n, (T)team); + T n2 = n1 - 1; + T T1 = n - n2 * (T)team; + n_my = (T)tid < T1 ? n1 : n2; + n_start = (T)tid <= T1 ? tid * n1 : T1 * n1 + ((T)tid - T1) * n2; + } + + n_end += n_start; +} + +inline void fast_memcpy(char* data_o, char *data_i, size_t len) +{ + size_t nelems_float = len / 4; + size_t nelems_char = len % 4; + const int block_size = 16; + const auto num_blocks_float = nelems_float / block_size; + const auto rem_elems_float = nelems_float % block_size; + float* output_f = (float*)data_o; + float* input_f = (float*) data_i; + char* output_c = (char*) data_o; + char* input_c = (char*) data_i; +# pragma omp parallel + { + const int ithr = omp_get_thread_num(); + const int nthr = omp_get_num_threads(); + size_t start{0}, end{0}; + balance211(num_blocks_float, nthr, ithr, start, end); + start = start * block_size; + end = end * block_size; +# pragma omp simd + for (size_t e = start; e < end; ++e) { + output_f[e] = input_f[e]; + } + if (rem_elems_float != 0 && ithr == nthr -1 ) { + for (auto e = nelems_float - rem_elems_float; e < nelems_float; ++e) { + output_f[e] = input_f[e]; + } + } + if (nelems_char != 0 && ithr == nthr -1){ + for (auto e = nelems_float*4; e < len; ++e) { + output_c[e] = input_c[e]; + } + } + } + return; +} + +#endif // _UTILS_H_ diff --git a/python/ideep4py/cosim/__init__.py b/python/ideep4py/cosim/__init__.py new file mode 100644 index 00000000..0e3ed7c2 --- /dev/null +++ b/python/ideep4py/cosim/__init__.py @@ -0,0 +1 @@ +from ideep4py.cosim.cosim import cosim_verify, is_cosim # NOQA diff --git a/python/ideep4py/cosim/cosim.py b/python/ideep4py/cosim/cosim.py new file mode 100644 index 00000000..681e2c2b --- /dev/null +++ b/python/ideep4py/cosim/cosim.py @@ -0,0 +1,156 @@ +import logging +import numpy as np +import os + +from ideep4py import mdarray + +logging.basicConfig(level=logging.INFO, + format='%(asctime)s [%(levelname)s]: %(message)s') +global_config_cosim = bool(int(os.environ.get('CHAINER_ENABLE_COSIM', '0'))) + + +def is_cosim(): + """Get the cosim mode. + + Returns: + bool: Return ``True`` if chainer is in cosim mode. + """ + return global_config_cosim + + +# Convert chainer.variable to array out of plain_array +def plain_array(params): + assert isinstance(params, tuple) \ + or isinstance(params, list) \ + or isinstance(params, mdarray) \ + or isinstance(params, np.ndarray) + # plain_array does not support chainer.variable + # or isinstance(params, chainer.variable.Variable) + + _params = () + + # plain_array does not support chainer.variable + # if isinstance(params, variable.Variable): + # return np.array(params.data), + if isinstance(params, np.ndarray): + return params, + elif isinstance(params, mdarray): + return np.array(params), + + for p in params: + # plain_array does not support chainer.variable + # if isinstance(p, variable.Variable): + # p = np.array(p.data) + if isinstance(p, mdarray): + _params += (np.array(p),) + else: + _params += (p,) + + return _params + + +def force_array(x, dtype=None): + # numpy returns a float value (scalar) when a return value of an operator + # is a 0-dimension array. + # We need to convert such a value to a 0-dimension array because `Function` + # object needs to return an `numpy.ndarray`. + if np.isscalar(x): + if dtype is None: + return np.array(x) + else: + return np.array(x, dtype) + else: + if dtype is None: + return x + else: + return x.astype(dtype, copy=False) + + +def expect_allclose(act, ref, atol=1e-4, rtol=1e-4, verbose=True): + """Failed if some corresponding element of act and ref differs too much. + + Args: + act: Left-hand-side array. + ref: Right-hand-side array. + atol (float): Absolute tolerance. + rtol (float): Relative tolerance. + verbose (bool): If ``True``, it outputs verbose messages on error. + """ + if not isinstance(act, np.ndarray) or not isinstance(ref, np.ndarray): + logging.warning('wrong array types') + return False + + act = force_array(act) + ref = force_array(ref) + + if (act.size != ref.size or act.itemsize != ref.itemsize + or act.shape != ref.shape): + logging.warning('size is not matched!\n \ + size: act={0} ref={1} \ + itemsize: act={2} ref={3}\n' + 'shape: act={4}, ref={5} dtype: act={6} ref={7}' + .format(act.size, ref.size, act.itemsize, ref.itemsize, + act.shape, ref.shape, act.dtype, ref.dtype)) + return False + + act = np.ascontiguousarray(act) + ref = np.ascontiguousarray(ref) + + try: + np.testing.assert_allclose(act, ref, rtol, atol, verbose=verbose) + except Exception: + return False + + return True + + +def verify_results(func, acts, refs, inputs): + if acts is None and refs is None: + logging.warning('input results are None!') + return True + elif acts is None or refs is None: + logging.error('cosim: input results are None!') + return False + + if len(acts) != len(refs): + logging.error('cosim: lengths of results \ + are different !' + .format(len(acts), len(refs))) + return False + + check_options = {'atol': 1e-3, 'rtol': 1e-2, 'verbose': True} + + for (i, (act, ref)) in enumerate(zip(acts, refs)): + if ref is None and act is None: + continue + elif ref is None or act is None: + logging.error('cosim: one input result is None!') + return False + + if not expect_allclose(*plain_array((act, ref)), **check_options): + logging.error('cosim: mismatched in {0} #{1} result!\n\ + size: {2}, itemsize: {3}\n' + 'shape: {4}, dtype: {5}' + .format(func.__class__.__name__, i, act.size, + act.itemsize, act.shape, act.dtype)) + return False + + return True + + +def cosim_verify(func, acts, inputs): + if not is_cosim(): + return + + logging.info('cosim test for function {0} ...'.format( + func.__class__.__name__)) + + refs = plain_array(func.forward_cpu(plain_array(inputs))) + + if not verify_results(func, acts, refs, inputs): + logging.error('cosim test for function {0} ...FAILED'.format( + func.__class__.__name__)) + raise RuntimeError + + logging.info('cosim test for function {0} ...PASS'.format( + func.__class__.__name__)) diff --git a/python/ideep4py/include/blas/blas.h b/python/ideep4py/include/blas/blas.h new file mode 100644 index 00000000..3b0e334d --- /dev/null +++ b/python/ideep4py/include/blas/blas.h @@ -0,0 +1,30 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _BLAS_H_ +#define _BLAS_H_ + +#include "sum.h" + +#endif diff --git a/python/ideep4py/include/blas/sum.h b/python/ideep4py/include/blas/sum.h new file mode 100644 index 00000000..57ed0a19 --- /dev/null +++ b/python/ideep4py/include/blas/sum.h @@ -0,0 +1,33 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _SUM_ALONG_AXIS_H_ +#define _SUM_ALONG_AXIS_H_ + +#include +#include "tensor.h" + +Tensor * blas_sum(Tensor *src, vector axis); + +#endif diff --git a/python/ideep4py/include/mkl/mkl_types.h b/python/ideep4py/include/mkl/mkl_types.h new file mode 100644 index 00000000..bfa38e24 --- /dev/null +++ b/python/ideep4py/include/mkl/mkl_types.h @@ -0,0 +1,149 @@ +/******************************************************************************* +* Copyright (c) 1999-2017, Intel Corporation +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright notice, +* this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* * Neither the name of Intel Corporation nor the names of its contributors +* may be used to endorse or promote products derived from this software +* without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + +/* +! Content: +! Intel(R) Math Kernel Library (Intel(R) MKL) types definition +!****************************************************************************/ + +#ifndef _MKL_TYPES_H_ +#define _MKL_TYPES_H_ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* MKL Complex type for single precision */ +#ifndef MKL_Complex8 +typedef +struct _MKL_Complex8 { + float real; + float imag; +} MKL_Complex8; +#endif + +/* MKL Complex type for double precision */ +#ifndef MKL_Complex16 +typedef +struct _MKL_Complex16 { + double real; + double imag; +} MKL_Complex16; +#endif + +/* MKL Version type */ +typedef +struct { + int MajorVersion; + int MinorVersion; + int UpdateVersion; + char * ProductStatus; + char * Build; + char * Processor; + char * Platform; +} MKLVersion; + +/* MKL integer types for LP64 and ILP64 */ +#if (!defined(__INTEL_COMPILER)) & defined(_MSC_VER) + #define MKL_INT64 __int64 + #define MKL_UINT64 unsigned __int64 +#else + #define MKL_INT64 long long int + #define MKL_UINT64 unsigned long long int +#endif + +#ifdef MKL_ILP64 + +/* MKL ILP64 integer types */ +#ifndef MKL_INT + #define MKL_INT MKL_INT64 +#endif +#ifndef MKL_UINT + #define MKL_UINT MKL_UINT64 +#endif +#define MKL_LONG MKL_INT64 + +#else + +/* MKL LP64 integer types */ +#ifndef MKL_INT + #define MKL_INT int +#endif +#ifndef MKL_UINT + #define MKL_UINT unsigned int +#endif +#define MKL_LONG long int + +#endif + +/* MKL threading stuff. MKL Domain names */ +#define MKL_DOMAIN_ALL 0 +#define MKL_DOMAIN_BLAS 1 +#define MKL_DOMAIN_FFT 2 +#define MKL_DOMAIN_VML 3 +#define MKL_DOMAIN_PARDISO 4 + +/* MKL CBWR stuff */ + +/* options */ +#define MKL_CBWR_BRANCH 1 +#define MKL_CBWR_ALL ~0 + +/* common settings */ +#define MKL_CBWR_UNSET_ALL 0 +#define MKL_CBWR_OFF 0 + +/* branch specific values */ +#define MKL_CBWR_BRANCH_OFF 1 +#define MKL_CBWR_AUTO 2 +#define MKL_CBWR_COMPATIBLE 3 +#define MKL_CBWR_SSE2 4 +#define MKL_CBWR_SSSE3 6 +#define MKL_CBWR_SSE4_1 7 +#define MKL_CBWR_SSE4_2 8 +#define MKL_CBWR_AVX 9 +#define MKL_CBWR_AVX2 10 +#define MKL_CBWR_AVX512_MIC 11 +#define MKL_CBWR_AVX512 12 + +/* error codes */ +#define MKL_CBWR_SUCCESS 0 +#define MKL_CBWR_ERR_INVALID_SETTINGS -1 +#define MKL_CBWR_ERR_INVALID_INPUT -2 +#define MKL_CBWR_ERR_UNSUPPORTED_BRANCH -3 +#define MKL_CBWR_ERR_UNKNOWN_BRANCH -4 +#define MKL_CBWR_ERR_MODE_CHANGE_FAILURE -8 + +/* Obsolete */ +#define MKL_CBWR_SSE3 5 + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _MKL_TYPES_H_ */ diff --git a/python/ideep4py/include/mkl/mkl_vsl.h b/python/ideep4py/include/mkl/mkl_vsl.h new file mode 100644 index 00000000..a6ad20d3 --- /dev/null +++ b/python/ideep4py/include/mkl/mkl_vsl.h @@ -0,0 +1,51 @@ +/* file: mkl_vsl.h */ +/******************************************************************************* +* Copyright (c) 2006-2017, Intel Corporation +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright notice, +* this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* * Neither the name of Intel Corporation nor the names of its contributors +* may be used to endorse or promote products derived from this software +* without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + +/* +//++ +// The main VSL header file. +//-- +*/ + +#ifndef __MKL_VSL_H__ +#define __MKL_VSL_H__ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#include "mkl_vsl_defines.h" +#include "mkl_vsl_functions.h" +#include "mkl_vsl_types.h" + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __MKL_VSL_H__ */ diff --git a/python/ideep4py/include/mkl/mkl_vsl_defines.h b/python/ideep4py/include/mkl/mkl_vsl_defines.h new file mode 100644 index 00000000..849044ac --- /dev/null +++ b/python/ideep4py/include/mkl/mkl_vsl_defines.h @@ -0,0 +1,1094 @@ +/* file: mkl_vsl_defines.h */ +/******************************************************************************* +* Copyright (c) 2006-2017, Intel Corporation +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright notice, +* this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* * Neither the name of Intel Corporation nor the names of its contributors +* may be used to endorse or promote products derived from this software +* without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + +/* +//++ +// User-level macro definitions +//-- +*/ + +#ifndef __MKL_VSL_DEFINES_H__ +#define __MKL_VSL_DEFINES_H__ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + +/* +// "No error" status +*/ +#define VSL_STATUS_OK 0 +#define VSL_ERROR_OK 0 + +/* +// Common errors (-1..-999) +*/ +#define VSL_ERROR_FEATURE_NOT_IMPLEMENTED -1 +#define VSL_ERROR_UNKNOWN -2 +#define VSL_ERROR_BADARGS -3 +#define VSL_ERROR_MEM_FAILURE -4 +#define VSL_ERROR_NULL_PTR -5 +#define VSL_ERROR_CPU_NOT_SUPPORTED -6 + + +/* +// RNG errors (-1000..-1999) +*/ +/* brng errors */ +#define VSL_RNG_ERROR_INVALID_BRNG_INDEX -1000 +#define VSL_RNG_ERROR_LEAPFROG_UNSUPPORTED -1002 +#define VSL_RNG_ERROR_SKIPAHEAD_UNSUPPORTED -1003 +#define VSL_RNG_ERROR_BRNGS_INCOMPATIBLE -1005 +#define VSL_RNG_ERROR_BAD_STREAM -1006 +#define VSL_RNG_ERROR_BRNG_TABLE_FULL -1007 +#define VSL_RNG_ERROR_BAD_STREAM_STATE_SIZE -1008 +#define VSL_RNG_ERROR_BAD_WORD_SIZE -1009 +#define VSL_RNG_ERROR_BAD_NSEEDS -1010 +#define VSL_RNG_ERROR_BAD_NBITS -1011 +#define VSL_RNG_ERROR_QRNG_PERIOD_ELAPSED -1012 +#define VSL_RNG_ERROR_LEAPFROG_NSTREAMS_TOO_BIG -1013 +#define VSL_RNG_ERROR_BRNG_NOT_SUPPORTED -1014 + +/* abstract stream related errors */ +#define VSL_RNG_ERROR_BAD_UPDATE -1120 +#define VSL_RNG_ERROR_NO_NUMBERS -1121 +#define VSL_RNG_ERROR_INVALID_ABSTRACT_STREAM -1122 + +/* non determenistic stream related errors */ +#define VSL_RNG_ERROR_NONDETERM_NOT_SUPPORTED -1130 +#define VSL_RNG_ERROR_NONDETERM_NRETRIES_EXCEEDED -1131 + +/* ARS5 stream related errors */ +#define VSL_RNG_ERROR_ARS5_NOT_SUPPORTED -1140 + +/* read/write stream to file errors */ +#define VSL_RNG_ERROR_FILE_CLOSE -1100 +#define VSL_RNG_ERROR_FILE_OPEN -1101 +#define VSL_RNG_ERROR_FILE_WRITE -1102 +#define VSL_RNG_ERROR_FILE_READ -1103 + +#define VSL_RNG_ERROR_BAD_FILE_FORMAT -1110 +#define VSL_RNG_ERROR_UNSUPPORTED_FILE_VER -1111 + +#define VSL_RNG_ERROR_BAD_MEM_FORMAT -1200 + +/* Convolution/correlation errors */ +#define VSL_CC_ERROR_NOT_IMPLEMENTED (-2000) +#define VSL_CC_ERROR_ALLOCATION_FAILURE (-2001) +#define VSL_CC_ERROR_BAD_DESCRIPTOR (-2200) +#define VSL_CC_ERROR_SERVICE_FAILURE (-2210) +#define VSL_CC_ERROR_EDIT_FAILURE (-2211) +#define VSL_CC_ERROR_EDIT_PROHIBITED (-2212) +#define VSL_CC_ERROR_COMMIT_FAILURE (-2220) +#define VSL_CC_ERROR_COPY_FAILURE (-2230) +#define VSL_CC_ERROR_DELETE_FAILURE (-2240) +#define VSL_CC_ERROR_BAD_ARGUMENT (-2300) +#define VSL_CC_ERROR_DIMS (-2301) +#define VSL_CC_ERROR_START (-2302) +#define VSL_CC_ERROR_DECIMATION (-2303) +#define VSL_CC_ERROR_XSHAPE (-2311) +#define VSL_CC_ERROR_YSHAPE (-2312) +#define VSL_CC_ERROR_ZSHAPE (-2313) +#define VSL_CC_ERROR_XSTRIDE (-2321) +#define VSL_CC_ERROR_YSTRIDE (-2322) +#define VSL_CC_ERROR_ZSTRIDE (-2323) +#define VSL_CC_ERROR_X (-2331) +#define VSL_CC_ERROR_Y (-2332) +#define VSL_CC_ERROR_Z (-2333) +#define VSL_CC_ERROR_JOB (-2100) +#define VSL_CC_ERROR_KIND (-2110) +#define VSL_CC_ERROR_MODE (-2120) +#define VSL_CC_ERROR_TYPE (-2130) +#define VSL_CC_ERROR_PRECISION (-2400) +#define VSL_CC_ERROR_EXTERNAL_PRECISION (-2141) +#define VSL_CC_ERROR_INTERNAL_PRECISION (-2142) +#define VSL_CC_ERROR_METHOD (-2400) +#define VSL_CC_ERROR_OTHER (-2800) + +/* +//++ +// SUMMARY STATTISTICS ERROR/WARNING CODES +//-- +*/ + +/* +// Warnings +*/ +#define VSL_SS_NOT_FULL_RANK_MATRIX 4028 +#define VSL_SS_SEMIDEFINITE_COR 4029 +/* +// Errors (-4000..-4999) +*/ +#define VSL_SS_ERROR_ALLOCATION_FAILURE -4000 +#define VSL_SS_ERROR_BAD_DIMEN -4001 +#define VSL_SS_ERROR_BAD_OBSERV_N -4002 +#define VSL_SS_ERROR_STORAGE_NOT_SUPPORTED -4003 +#define VSL_SS_ERROR_BAD_INDC_ADDR -4004 +#define VSL_SS_ERROR_BAD_WEIGHTS -4005 +#define VSL_SS_ERROR_BAD_MEAN_ADDR -4006 +#define VSL_SS_ERROR_BAD_2R_MOM_ADDR -4007 +#define VSL_SS_ERROR_BAD_3R_MOM_ADDR -4008 +#define VSL_SS_ERROR_BAD_4R_MOM_ADDR -4009 +#define VSL_SS_ERROR_BAD_2C_MOM_ADDR -4010 +#define VSL_SS_ERROR_BAD_3C_MOM_ADDR -4011 +#define VSL_SS_ERROR_BAD_4C_MOM_ADDR -4012 +#define VSL_SS_ERROR_BAD_KURTOSIS_ADDR -4013 +#define VSL_SS_ERROR_BAD_SKEWNESS_ADDR -4014 +#define VSL_SS_ERROR_BAD_MIN_ADDR -4015 +#define VSL_SS_ERROR_BAD_MAX_ADDR -4016 +#define VSL_SS_ERROR_BAD_VARIATION_ADDR -4017 +#define VSL_SS_ERROR_BAD_COV_ADDR -4018 +#define VSL_SS_ERROR_BAD_COR_ADDR -4019 +#define VSL_SS_ERROR_BAD_ACCUM_WEIGHT_ADDR -4020 +#define VSL_SS_ERROR_BAD_QUANT_ORDER_ADDR -4021 +#define VSL_SS_ERROR_BAD_QUANT_ORDER -4022 +#define VSL_SS_ERROR_BAD_QUANT_ADDR -4023 +#define VSL_SS_ERROR_BAD_ORDER_STATS_ADDR -4024 +#define VSL_SS_ERROR_MOMORDER_NOT_SUPPORTED -4025 +#define VSL_SS_ERROR_ALL_OBSERVS_OUTLIERS -4026 +#define VSL_SS_ERROR_BAD_ROBUST_COV_ADDR -4027 +#define VSL_SS_ERROR_BAD_ROBUST_MEAN_ADDR -4028 +#define VSL_SS_ERROR_METHOD_NOT_SUPPORTED -4029 +#define VSL_SS_ERROR_BAD_GROUP_INDC_ADDR -4030 +#define VSL_SS_ERROR_NULL_TASK_DESCRIPTOR -4031 +#define VSL_SS_ERROR_BAD_OBSERV_ADDR -4032 +#define VSL_SS_ERROR_SINGULAR_COV -4033 +#define VSL_SS_ERROR_BAD_POOLED_COV_ADDR -4034 +#define VSL_SS_ERROR_BAD_POOLED_MEAN_ADDR -4035 +#define VSL_SS_ERROR_BAD_GROUP_COV_ADDR -4036 +#define VSL_SS_ERROR_BAD_GROUP_MEAN_ADDR -4037 +#define VSL_SS_ERROR_BAD_GROUP_INDC -4038 +#define VSL_SS_ERROR_BAD_OUTLIERS_PARAMS_ADDR -4039 +#define VSL_SS_ERROR_BAD_OUTLIERS_PARAMS_N_ADDR -4040 +#define VSL_SS_ERROR_BAD_OUTLIERS_WEIGHTS_ADDR -4041 +#define VSL_SS_ERROR_BAD_ROBUST_COV_PARAMS_ADDR -4042 +#define VSL_SS_ERROR_BAD_ROBUST_COV_PARAMS_N_ADDR -4043 +#define VSL_SS_ERROR_BAD_STORAGE_ADDR -4044 +#define VSL_SS_ERROR_BAD_PARTIAL_COV_IDX_ADDR -4045 +#define VSL_SS_ERROR_BAD_PARTIAL_COV_ADDR -4046 +#define VSL_SS_ERROR_BAD_PARTIAL_COR_ADDR -4047 +#define VSL_SS_ERROR_BAD_MI_PARAMS_ADDR -4048 +#define VSL_SS_ERROR_BAD_MI_PARAMS_N_ADDR -4049 +#define VSL_SS_ERROR_BAD_MI_BAD_PARAMS_N -4050 +#define VSL_SS_ERROR_BAD_MI_PARAMS -4051 +#define VSL_SS_ERROR_BAD_MI_INIT_ESTIMATES_N_ADDR -4052 +#define VSL_SS_ERROR_BAD_MI_INIT_ESTIMATES_ADDR -4053 +#define VSL_SS_ERROR_BAD_MI_SIMUL_VALS_ADDR -4054 +#define VSL_SS_ERROR_BAD_MI_SIMUL_VALS_N_ADDR -4055 +#define VSL_SS_ERROR_BAD_MI_ESTIMATES_N_ADDR -4056 +#define VSL_SS_ERROR_BAD_MI_ESTIMATES_ADDR -4057 +#define VSL_SS_ERROR_BAD_MI_SIMUL_VALS_N -4058 +#define VSL_SS_ERROR_BAD_MI_ESTIMATES_N -4059 +#define VSL_SS_ERROR_BAD_MI_OUTPUT_PARAMS -4060 +#define VSL_SS_ERROR_BAD_MI_PRIOR_N_ADDR -4061 +#define VSL_SS_ERROR_BAD_MI_PRIOR_ADDR -4062 +#define VSL_SS_ERROR_BAD_MI_MISSING_VALS_N -4063 +#define VSL_SS_ERROR_BAD_STREAM_QUANT_PARAMS_N_ADDR -4064 +#define VSL_SS_ERROR_BAD_STREAM_QUANT_PARAMS_ADDR -4065 +#define VSL_SS_ERROR_BAD_STREAM_QUANT_PARAMS_N -4066 +#define VSL_SS_ERROR_BAD_STREAM_QUANT_PARAMS -4067 +#define VSL_SS_ERROR_BAD_STREAM_QUANT_ORDER_ADDR -4068 +#define VSL_SS_ERROR_BAD_STREAM_QUANT_ORDER -4069 +#define VSL_SS_ERROR_BAD_STREAM_QUANT_ADDR -4070 +#define VSL_SS_ERROR_BAD_PARAMTR_COR_ADDR -4071 +#define VSL_SS_ERROR_BAD_COR -4072 +#define VSL_SS_ERROR_BAD_PARTIAL_COV_IDX -4073 +#define VSL_SS_ERROR_BAD_SUM_ADDR -4074 +#define VSL_SS_ERROR_BAD_2R_SUM_ADDR -4075 +#define VSL_SS_ERROR_BAD_3R_SUM_ADDR -4076 +#define VSL_SS_ERROR_BAD_4R_SUM_ADDR -4077 +#define VSL_SS_ERROR_BAD_2C_SUM_ADDR -4078 +#define VSL_SS_ERROR_BAD_3C_SUM_ADDR -4079 +#define VSL_SS_ERROR_BAD_4C_SUM_ADDR -4080 +#define VSL_SS_ERROR_BAD_CP_ADDR -4081 +#define VSL_SS_ERROR_BAD_MDAD_ADDR -4082 +#define VSL_SS_ERROR_BAD_MNAD_ADDR -4083 +#define VSL_SS_ERROR_BAD_SORTED_OBSERV_ADDR -4084 +#define VSL_SS_ERROR_INDICES_NOT_SUPPORTED -4085 + + +/* +// Internal errors caused by internal routines of the functions +*/ +#define VSL_SS_ERROR_ROBCOV_INTERN_C1 -5000 +#define VSL_SS_ERROR_PARTIALCOV_INTERN_C1 -5010 +#define VSL_SS_ERROR_PARTIALCOV_INTERN_C2 -5011 +#define VSL_SS_ERROR_MISSINGVALS_INTERN_C1 -5021 +#define VSL_SS_ERROR_MISSINGVALS_INTERN_C2 -5022 +#define VSL_SS_ERROR_MISSINGVALS_INTERN_C3 -5023 +#define VSL_SS_ERROR_MISSINGVALS_INTERN_C4 -5024 +#define VSL_SS_ERROR_MISSINGVALS_INTERN_C5 -5025 +#define VSL_SS_ERROR_PARAMTRCOR_INTERN_C1 -5030 +#define VSL_SS_ERROR_COVRANK_INTERNAL_ERROR_C1 -5040 +#define VSL_SS_ERROR_INVCOV_INTERNAL_ERROR_C1 -5041 +#define VSL_SS_ERROR_INVCOV_INTERNAL_ERROR_C2 -5042 + + +/* +// CONV/CORR RELATED MACRO DEFINITIONS +*/ +#define VSL_CONV_MODE_AUTO 0 +#define VSL_CORR_MODE_AUTO 0 +#define VSL_CONV_MODE_DIRECT 1 +#define VSL_CORR_MODE_DIRECT 1 +#define VSL_CONV_MODE_FFT 2 +#define VSL_CORR_MODE_FFT 2 +#define VSL_CONV_PRECISION_SINGLE 1 +#define VSL_CORR_PRECISION_SINGLE 1 +#define VSL_CONV_PRECISION_DOUBLE 2 +#define VSL_CORR_PRECISION_DOUBLE 2 + +/* +//++ +// BASIC RANDOM NUMBER GENERATOR (BRNG) RELATED MACRO DEFINITIONS +//-- +*/ + +/* +// MAX NUMBER OF BRNGS CAN BE REGISTERED IN VSL +// No more than VSL_MAX_REG_BRNGS basic generators can be registered in VSL +// (including predefined basic generators). +// +// Change this number to increase/decrease number of BRNGs can be registered. +*/ +#define VSL_MAX_REG_BRNGS 512 + +/* +// PREDEFINED BRNG NAMES +*/ +#define VSL_BRNG_SHIFT 20 +#define VSL_BRNG_INC (1<_ +// +// where +// +// - probability distribution +// - method name +// +// VSL_RNG_METHOD__ should be used with +// vslRng function only, where +// +// - s (single) or d (double) +// - probability distribution +//-- +*/ + +/* +// Uniform +// +// +// STD standard method. Currently there is only one method for this +// distribution generator +*/ +#define VSL_RNG_METHOD_UNIFORM_STD 0 /* vsl{s,d,i}RngUniform */ + +#define VSL_RNG_METHOD_UNIFORM_STD_ACCURATE \ + VSL_RNG_METHOD_UNIFORM_STD | VSL_RNG_METHOD_ACCURACY_FLAG + /* accurate mode of vsl{d,s}RngUniform */ + +/* +// Uniform Bits +// +// +// STD standard method. Currently there is only one method for this +// distribution generator +*/ +#define VSL_RNG_METHOD_UNIFORMBITS_STD 0 /* vsliRngUniformBits */ + +/* +// Uniform Bits 32 +// +// +// STD standard method. Currently there is only one method for this +// distribution generator +*/ +#define VSL_RNG_METHOD_UNIFORMBITS32_STD 0 /* vsliRngUniformBits32 */ + +/* +// Uniform Bits 64 +// +// +// STD standard method. Currently there is only one method for this +// distribution generator +*/ +#define VSL_RNG_METHOD_UNIFORMBITS64_STD 0 /* vsliRngUniformBits64 */ + +/* +// Gaussian +// +// +// BOXMULLER generates normally distributed random number x thru the pair of +// uniformly distributed numbers u1 and u2 according to the formula: +// +// x=sqrt(-ln(u1))*sin(2*Pi*u2) +// +// BOXMULLER2 generates pair of normally distributed random numbers x1 and x2 +// thru the pair of uniformly dustributed numbers u1 and u2 +// according to the formula +// +// x1=sqrt(-ln(u1))*sin(2*Pi*u2) +// x2=sqrt(-ln(u1))*cos(2*Pi*u2) +// +// NOTE: implementation correctly works with odd vector lengths +// +// ICDF inverse cumulative distribution function method +*/ +#define VSL_RNG_METHOD_GAUSSIAN_BOXMULLER 0 /* vsl{d,s}RngGaussian */ +#define VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2 1 /* vsl{d,s}RngGaussian */ +#define VSL_RNG_METHOD_GAUSSIAN_ICDF 2 /* vsl{d,s}RngGaussian */ + +/* +// GaussianMV - multivariate (correlated) normal +// Multivariate (correlated) normal random number generator is based on +// uncorrelated Gaussian random number generator (see vslsRngGaussian and +// vsldRngGaussian functions): +// +// +// BOXMULLER generates normally distributed random number x thru the pair of +// uniformly distributed numbers u1 and u2 according to the formula: +// +// x=sqrt(-ln(u1))*sin(2*Pi*u2) +// +// BOXMULLER2 generates pair of normally distributed random numbers x1 and x2 +// thru the pair of uniformly dustributed numbers u1 and u2 +// according to the formula +// +// x1=sqrt(-ln(u1))*sin(2*Pi*u2) +// x2=sqrt(-ln(u1))*cos(2*Pi*u2) +// +// NOTE: implementation correctly works with odd vector lengths +// +// ICDF inverse cumulative distribution function method +*/ +#define VSL_RNG_METHOD_GAUSSIANMV_BOXMULLER 0 /* vsl{d,s}RngGaussianMV */ +#define VSL_RNG_METHOD_GAUSSIANMV_BOXMULLER2 1 /* vsl{d,s}RngGaussianMV */ +#define VSL_RNG_METHOD_GAUSSIANMV_ICDF 2 /* vsl{d,s}RngGaussianMV */ + +/* +// Exponential +// +// +// ICDF inverse cumulative distribution function method +*/ +#define VSL_RNG_METHOD_EXPONENTIAL_ICDF 0 /* vsl{d,s}RngExponential */ + +#define VSL_RNG_METHOD_EXPONENTIAL_ICDF_ACCURATE \ + VSL_RNG_METHOD_EXPONENTIAL_ICDF | VSL_RNG_METHOD_ACCURACY_FLAG + /* accurate mode of vsl{d,s}RngExponential */ + +/* +// Laplace +// +// +// ICDF inverse cumulative distribution function method +// +// ICDF - inverse cumulative distribution function method: +// +// x=+/-ln(u) with probability 1/2, +// +// where +// +// x - random number with Laplace distribution, +// u - uniformly distributed random number +*/ +#define VSL_RNG_METHOD_LAPLACE_ICDF 0 /* vsl{d,s}RngLaplace */ + +/* +// Weibull +// +// +// ICDF inverse cumulative distribution function method +*/ +#define VSL_RNG_METHOD_WEIBULL_ICDF 0 /* vsl{d,s}RngWeibull */ + +#define VSL_RNG_METHOD_WEIBULL_ICDF_ACCURATE \ + VSL_RNG_METHOD_WEIBULL_ICDF | VSL_RNG_METHOD_ACCURACY_FLAG + /* accurate mode of vsl{d,s}RngWeibull */ + + +/* +// Cauchy +// +// +// ICDF inverse cumulative distribution function method +*/ +#define VSL_RNG_METHOD_CAUCHY_ICDF 0 /* vsl{d,s}RngCauchy */ + +/* +// Rayleigh +// +// +// ICDF inverse cumulative distribution function method +*/ +#define VSL_RNG_METHOD_RAYLEIGH_ICDF 0 /* vsl{d,s}RngRayleigh */ + +#define VSL_RNG_METHOD_RAYLEIGH_ICDF_ACCURATE \ + VSL_RNG_METHOD_RAYLEIGH_ICDF | VSL_RNG_METHOD_ACCURACY_FLAG + /* accurate mode of vsl{d,s}RngRayleigh */ + +/* +// Lognormal +// +// +// BOXMULLER2 Box-Muller 2 algorithm based method +*/ +#define VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2 0 /* vsl{d,s}RngLognormal */ +#define VSL_RNG_METHOD_LOGNORMAL_ICDF 1 /* vsl{d,s}RngLognormal */ + +#define VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2_ACCURATE \ + VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2 | VSL_RNG_METHOD_ACCURACY_FLAG + /* accurate mode of vsl{d,s}RngLognormal */ + +#define VSL_RNG_METHOD_LOGNORMAL_ICDF_ACCURATE \ + VSL_RNG_METHOD_LOGNORMAL_ICDF | VSL_RNG_METHOD_ACCURACY_FLAG + /* accurate mode of vsl{d,s}RngLognormal */ + + +/* +// Gumbel +// +// +// ICDF inverse cumulative distribution function method +*/ +#define VSL_RNG_METHOD_GUMBEL_ICDF 0 /* vsl{d,s}RngGumbel */ + +/* +// Gamma +// +// Comments: +// alpha>1 - algorithm of Marsaglia is used, nonlinear +// transformation of gaussian numbers based on +// acceptance/rejection method with squeezes; +// alpha>=0.6, alpha<1 - rejection from the Weibull distribution is used; +// alpha<0.6 - transformation of exponential power distribution +// (EPD) is used, EPD random numbers are generated +// by means of acceptance/rejection technique; +// alpha=1 - gamma distribution reduces to exponential +// distribution +*/ +#define VSL_RNG_METHOD_GAMMA_GNORM 0 /* vsl{d,s}RngGamma */ + +#define VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE \ + VSL_RNG_METHOD_GAMMA_GNORM | VSL_RNG_METHOD_ACCURACY_FLAG + /* accurate mode of vsl{d,s}RngGamma */ + + +/* +// Beta +// +// Comments: +// CJA - stands for first letters of Cheng, Johnk, and Atkinson. +// Cheng - for min(p,q) > 1 method of Cheng, +// generation of beta random numbers of the second kind +// based on acceptance/rejection technique and its +// transformation to beta random numbers of the first kind; +// Johnk - for max(p,q) < 1 methods of Johnk and Atkinson: +// if q + K*p^2+C<=0, K=0.852..., C=-0.956... +// algorithm of Johnk: +// beta distributed random number is generated as +// u1^(1/p) / (u1^(1/p)+u2^(1/q)), if u1^(1/p)+u2^(1/q)<=1; +// otherwise switching algorithm of Atkinson: interval (0,1) +// is divided into two domains (0,t) and (t,1), on each interval +// acceptance/rejection technique with convenient majorizing +// function is used; +// Atkinson - for min(p,q)<1, max(p,q)>1 switching algorithm of Atkinson +// is used (with another point t, see short description above); +// ICDF - inverse cumulative distribution function method according +// to formulas x=1-u^(1/q) for p = 1, and x = u^(1/p) for q=1, +// where x is beta distributed random number, +// u - uniformly distributed random number. +// for p=q=1 beta distribution reduces to uniform distribution. +// +*/ +#define VSL_RNG_METHOD_BETA_CJA 0 /* vsl{d,s}RngBeta */ + +#define VSL_RNG_METHOD_BETA_CJA_ACCURATE \ + VSL_RNG_METHOD_BETA_CJA | VSL_RNG_METHOD_ACCURACY_FLAG + /* accurate mode of vsl{d,s}RngBeta */ + +/* +// Bernoulli +// +// +// ICDF inverse cumulative distribution function method +*/ +#define VSL_RNG_METHOD_BERNOULLI_ICDF 0 /* vsliRngBernoulli */ + +/* +// Geometric +// +// +// ICDF inverse cumulative distribution function method +*/ +#define VSL_RNG_METHOD_GEOMETRIC_ICDF 0 /* vsliRngGeometric */ + +/* +// Binomial +// +// +// BTPE for ntrial*min(p,1-p)>30 acceptance/rejection method with +// decomposition onto 4 regions: +// +// * 2 parallelograms; +// * triangle; +// * left exponential tail; +// * right exponential tail. +// +// othewise table lookup method is used +*/ +#define VSL_RNG_METHOD_BINOMIAL_BTPE 0 /* vsliRngBinomial */ + +/* +// Hypergeometric +// +// +// H2PE if mode of distribution is large, acceptance/rejection method is +// used with decomposition onto 3 regions: +// +// * rectangular; +// * left exponential tail; +// * right exponential tail. +// +// othewise table lookup method is used +*/ +#define VSL_RNG_METHOD_HYPERGEOMETRIC_H2PE 0 /* vsliRngHypergeometric */ + +/* +// Poisson +// +// +// PTPE if lambda>=27, acceptance/rejection method is used with +// decomposition onto 4 regions: +// +// * 2 parallelograms; +// * triangle; +// * left exponential tail; +// * right exponential tail. +// +// othewise table lookup method is used +// +// POISNORM for lambda>=1 method is based on Poisson inverse CDF +// approximation by Gaussian inverse CDF; for lambda<1 +// table lookup method is used. +*/ +#define VSL_RNG_METHOD_POISSON_PTPE 0 /* vsliRngPoisson */ +#define VSL_RNG_METHOD_POISSON_POISNORM 1 /* vsliRngPoisson */ + +/* +// Poisson +// +// +// POISNORM for lambda>=1 method is based on Poisson inverse CDF +// approximation by Gaussian inverse CDF; for lambda<1 +// ICDF method is used. +*/ +#define VSL_RNG_METHOD_POISSONV_POISNORM 0 /* vsliRngPoissonV */ + +/* +// Negbinomial +// +// +// NBAR if (a-1)*(1-p)/p>=100, acceptance/rejection method is used with +// decomposition onto 5 regions: +// +// * rectangular; +// * 2 trapezoid; +// * left exponential tail; +// * right exponential tail. +// +// othewise table lookup method is used. +*/ +#define VSL_RNG_METHOD_NEGBINOMIAL_NBAR 0 /* vsliRngNegbinomial */ + +/* +//++ +// MATRIX STORAGE SCHEMES +//-- +*/ + +/* +// Some multivariate random number generators, e.g. GaussianMV, operate +// with matrix parameters. To optimize matrix parameters usage VSL offers +// following matrix storage schemes. (See VSL documentation for more details). +// +// FULL - whole matrix is stored +// PACKED - lower/higher triangular matrix is packed in 1-dimensional array +// DIAGONAL - diagonal elements are packed in 1-dimensional array +*/ +#define VSL_MATRIX_STORAGE_FULL 0 +#define VSL_MATRIX_STORAGE_PACKED 1 +#define VSL_MATRIX_STORAGE_DIAGONAL 2 + + +/* +// SUMMARY STATISTICS (SS) RELATED MACRO DEFINITIONS +*/ + +/* +//++ +// MATRIX STORAGE SCHEMES +//-- +*/ +/* +// SS routines work with matrix parameters, e.g. matrix of observations, +// variance-covariance matrix. To optimize work with matrices the library +// provides the following storage matrix schemes. +*/ +/* +// Matrix of observations: +// ROWS - observations of the random vector are stored in raws, that +// is, i-th row of the matrix of observations contains values +// of i-th component of the random vector +// COLS - observations of the random vector are stored in columns that +// is, i-th column of the matrix of observations contains values +// of i-th component of the random vector +*/ +#define VSL_SS_MATRIX_STORAGE_ROWS 0x00010000 +#define VSL_SS_MATRIX_STORAGE_COLS 0x00020000 + +/* +// Variance-covariance/correlation matrix: +// FULL - whole matrix is stored +// L_PACKED - lower triangular matrix is stored as 1-dimensional array +// U_PACKED - upper triangular matrix is stored as 1-dimensional array +*/ +#define VSL_SS_MATRIX_STORAGE_FULL 0x00000000 +#define VSL_SS_MATRIX_STORAGE_L_PACKED 0x00000001 +#define VSL_SS_MATRIX_STORAGE_U_PACKED 0x00000002 + + +/* +//++ +// SUMMARY STATISTICS LIBRARY METHODS +//-- +*/ +/* +// SS routines provide computation of basic statistical estimates +// (central/raw moments up to 4th order, variance-covariance, +// minimum, maximum, skewness/kurtosis) using the following methods +// - FAST - estimates are computed for price of one or two passes over +// observations using highly optimized MKL routines +// - 1PASS - estimate is computed for price of one pass of the observations +// - FAST_USER_MEAN - estimates are computed for price of one or two passes +// over observations given user defined mean for central moments, +// covariance and correlation +// - CP_TO_COVCOR - convert cross-product matrix to variance-covariance/ +// correlation matrix +// - SUM_TO_MOM - convert raw/central sums to raw/central moments +// +*/ +#define VSL_SS_METHOD_FAST 0x00000001 +#define VSL_SS_METHOD_1PASS 0x00000002 +#define VSL_SS_METHOD_FAST_USER_MEAN 0x00000100 +#define VSL_SS_METHOD_CP_TO_COVCOR 0x00000200 +#define VSL_SS_METHOD_SUM_TO_MOM 0x00000400 + +/* +// SS provides routine for parametrization of correlation matrix using +// SPECTRAL DECOMPOSITION (SD) method +*/ +#define VSL_SS_METHOD_SD 0x00000004 + +/* +// SS routine for robust estimation of variance-covariance matrix +// and mean supports Rocke algorithm, TBS-estimator +*/ +#define VSL_SS_METHOD_TBS 0x00000008 + +/* +// SS routine for estimation of missing values +// supports Multiple Imputation (MI) method +*/ +#define VSL_SS_METHOD_MI 0x00000010 + +/* +// SS provides routine for detection of outliers, BACON method +*/ +#define VSL_SS_METHOD_BACON 0x00000020 + +/* +// SS supports routine for estimation of quantiles for streaming data +// using the following methods: +// - ZW - intermediate estimates of quantiles during processing +// the next block are computed +// - ZW_FAST - intermediate estimates of quantiles during processing +// the next block are not computed +*/ +#define VSL_SS_METHOD_SQUANTS_ZW 0x00000040 +#define VSL_SS_METHOD_SQUANTS_ZW_FAST 0x00000080 + + +/* +// Input of BACON algorithm is set of 3 parameters: +// - Initialization method of the algorithm +// - Parameter alfa such that 1-alfa is percentile of Chi2 distribution +// - Stopping criterion +*/ +/* +// Number of BACON algorithm parameters +*/ +#define VSL_SS_BACON_PARAMS_N 3 + +/* +// SS implementation of BACON algorithm supports two initialization methods: +// - Mahalanobis distance based method +// - Median based method +*/ +#define VSL_SS_METHOD_BACON_MAHALANOBIS_INIT 0x00000001 +#define VSL_SS_METHOD_BACON_MEDIAN_INIT 0x00000002 + +/* +// SS routine for sorting data, RADIX method +*/ +#define VSL_SS_METHOD_RADIX 0x00100000 + +/* +// Input of TBS algorithm is set of 4 parameters: +// - Breakdown point +// - Asymptotic rejection probability +// - Stopping criterion +// - Maximum number of iterations +*/ +/* +// Number of TBS algorithm parameters +*/ +#define VSL_SS_TBS_PARAMS_N 4 + +/* +// Input of MI algorithm is set of 5 parameters: +// - Maximal number of iterations for EM algorithm +// - Maximal number of iterations for DA algorithm +// - Stopping criterion +// - Number of sets to impute +// - Total number of missing values in dataset +*/ +/* +// Number of MI algorithm parameters +*/ +#define VSL_SS_MI_PARAMS_SIZE 5 + +/* +// SS MI algorithm expects that missing values are +// marked with NANs +*/ +#define VSL_SS_DNAN 0xFFF8000000000000 +#define VSL_SS_SNAN 0xFFC00000 + +/* +// Input of ZW algorithm is 1 parameter: +// - accuracy of quantile estimation +*/ +/* +// Number of ZW algorithm parameters +*/ +#define VSL_SS_SQUANTS_ZW_PARAMS_N 1 + + +/* +//++ +// MACROS USED SS EDIT AND COMPUTE ROUTINES +//-- +*/ + +/* +// SS EditTask routine is way to edit input and output parameters of the task, +// e.g., pointers to arrays which hold observations, weights of observations, +// arrays of mean estimates or covariance estimates. +// Macros below define parameters available for modification +*/ +#define VSL_SS_ED_DIMEN 1 +#define VSL_SS_ED_OBSERV_N 2 +#define VSL_SS_ED_OBSERV 3 +#define VSL_SS_ED_OBSERV_STORAGE 4 +#define VSL_SS_ED_INDC 5 +#define VSL_SS_ED_WEIGHTS 6 +#define VSL_SS_ED_MEAN 7 +#define VSL_SS_ED_2R_MOM 8 +#define VSL_SS_ED_3R_MOM 9 +#define VSL_SS_ED_4R_MOM 10 +#define VSL_SS_ED_2C_MOM 11 +#define VSL_SS_ED_3C_MOM 12 +#define VSL_SS_ED_4C_MOM 13 +#define VSL_SS_ED_SUM 67 +#define VSL_SS_ED_2R_SUM 68 +#define VSL_SS_ED_3R_SUM 69 +#define VSL_SS_ED_4R_SUM 70 +#define VSL_SS_ED_2C_SUM 71 +#define VSL_SS_ED_3C_SUM 72 +#define VSL_SS_ED_4C_SUM 73 +#define VSL_SS_ED_KURTOSIS 14 +#define VSL_SS_ED_SKEWNESS 15 +#define VSL_SS_ED_MIN 16 +#define VSL_SS_ED_MAX 17 +#define VSL_SS_ED_VARIATION 18 +#define VSL_SS_ED_COV 19 +#define VSL_SS_ED_COV_STORAGE 20 +#define VSL_SS_ED_COR 21 +#define VSL_SS_ED_COR_STORAGE 22 +#define VSL_SS_ED_CP 74 +#define VSL_SS_ED_CP_STORAGE 75 +#define VSL_SS_ED_ACCUM_WEIGHT 23 +#define VSL_SS_ED_QUANT_ORDER_N 24 +#define VSL_SS_ED_QUANT_ORDER 25 +#define VSL_SS_ED_QUANT_QUANTILES 26 +#define VSL_SS_ED_ORDER_STATS 27 +#define VSL_SS_ED_GROUP_INDC 28 +#define VSL_SS_ED_POOLED_COV_STORAGE 29 +#define VSL_SS_ED_POOLED_MEAN 30 +#define VSL_SS_ED_POOLED_COV 31 +#define VSL_SS_ED_GROUP_COV_INDC 32 +#define VSL_SS_ED_REQ_GROUP_INDC 32 +#define VSL_SS_ED_GROUP_MEAN 33 +#define VSL_SS_ED_GROUP_COV_STORAGE 34 +#define VSL_SS_ED_GROUP_COV 35 +#define VSL_SS_ED_ROBUST_COV_STORAGE 36 +#define VSL_SS_ED_ROBUST_COV_PARAMS_N 37 +#define VSL_SS_ED_ROBUST_COV_PARAMS 38 +#define VSL_SS_ED_ROBUST_MEAN 39 +#define VSL_SS_ED_ROBUST_COV 40 +#define VSL_SS_ED_OUTLIERS_PARAMS_N 41 +#define VSL_SS_ED_OUTLIERS_PARAMS 42 +#define VSL_SS_ED_OUTLIERS_WEIGHT 43 +#define VSL_SS_ED_ORDER_STATS_STORAGE 44 +#define VSL_SS_ED_PARTIAL_COV_IDX 45 +#define VSL_SS_ED_PARTIAL_COV 46 +#define VSL_SS_ED_PARTIAL_COV_STORAGE 47 +#define VSL_SS_ED_PARTIAL_COR 48 +#define VSL_SS_ED_PARTIAL_COR_STORAGE 49 +#define VSL_SS_ED_MI_PARAMS_N 50 +#define VSL_SS_ED_MI_PARAMS 51 +#define VSL_SS_ED_MI_INIT_ESTIMATES_N 52 +#define VSL_SS_ED_MI_INIT_ESTIMATES 53 +#define VSL_SS_ED_MI_SIMUL_VALS_N 54 +#define VSL_SS_ED_MI_SIMUL_VALS 55 +#define VSL_SS_ED_MI_ESTIMATES_N 56 +#define VSL_SS_ED_MI_ESTIMATES 57 +#define VSL_SS_ED_MI_PRIOR_N 58 +#define VSL_SS_ED_MI_PRIOR 59 +#define VSL_SS_ED_PARAMTR_COR 60 +#define VSL_SS_ED_PARAMTR_COR_STORAGE 61 +#define VSL_SS_ED_STREAM_QUANT_PARAMS_N 62 +#define VSL_SS_ED_STREAM_QUANT_PARAMS 63 +#define VSL_SS_ED_STREAM_QUANT_ORDER_N 64 +#define VSL_SS_ED_STREAM_QUANT_ORDER 65 +#define VSL_SS_ED_STREAM_QUANT_QUANTILES 66 +#define VSL_SS_ED_MDAD 76 +#define VSL_SS_ED_MNAD 77 +#define VSL_SS_ED_SORTED_OBSERV 78 +#define VSL_SS_ED_SORTED_OBSERV_STORAGE 79 + + +/* +// SS Compute routine calculates estimates supported by the library +// Macros below define estimates to compute +*/ +#define VSL_SS_MEAN 0x0000000000000001 +#define VSL_SS_2R_MOM 0x0000000000000002 +#define VSL_SS_3R_MOM 0x0000000000000004 +#define VSL_SS_4R_MOM 0x0000000000000008 +#define VSL_SS_2C_MOM 0x0000000000000010 +#define VSL_SS_3C_MOM 0x0000000000000020 +#define VSL_SS_4C_MOM 0x0000000000000040 +#define VSL_SS_SUM 0x0000000002000000 +#define VSL_SS_2R_SUM 0x0000000004000000 +#define VSL_SS_3R_SUM 0x0000000008000000 +#define VSL_SS_4R_SUM 0x0000000010000000 +#define VSL_SS_2C_SUM 0x0000000020000000 +#define VSL_SS_3C_SUM 0x0000000040000000 +#define VSL_SS_4C_SUM 0x0000000080000000 +#define VSL_SS_KURTOSIS 0x0000000000000080 +#define VSL_SS_SKEWNESS 0x0000000000000100 +#define VSL_SS_VARIATION 0x0000000000000200 +#define VSL_SS_MIN 0x0000000000000400 +#define VSL_SS_MAX 0x0000000000000800 +#define VSL_SS_COV 0x0000000000001000 +#define VSL_SS_COR 0x0000000000002000 +#define VSL_SS_CP 0x0000000100000000 +#define VSL_SS_POOLED_COV 0x0000000000004000 +#define VSL_SS_GROUP_COV 0x0000000000008000 +#define VSL_SS_POOLED_MEAN 0x0000000800000000 +#define VSL_SS_GROUP_MEAN 0x0000001000000000 +#define VSL_SS_QUANTS 0x0000000000010000 +#define VSL_SS_ORDER_STATS 0x0000000000020000 +#define VSL_SS_SORTED_OBSERV 0x0000008000000000 +#define VSL_SS_ROBUST_COV 0x0000000000040000 +#define VSL_SS_OUTLIERS 0x0000000000080000 +#define VSL_SS_PARTIAL_COV 0x0000000000100000 +#define VSL_SS_PARTIAL_COR 0x0000000000200000 +#define VSL_SS_MISSING_VALS 0x0000000000400000 +#define VSL_SS_PARAMTR_COR 0x0000000000800000 +#define VSL_SS_STREAM_QUANTS 0x0000000001000000 +#define VSL_SS_MDAD 0x0000000200000000 +#define VSL_SS_MNAD 0x0000000400000000 + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __MKL_VSL_DEFINES_H__ */ diff --git a/python/ideep4py/include/mkl/mkl_vsl_functions.h b/python/ideep4py/include/mkl/mkl_vsl_functions.h new file mode 100644 index 00000000..b09229bb --- /dev/null +++ b/python/ideep4py/include/mkl/mkl_vsl_functions.h @@ -0,0 +1,854 @@ +/* file: mkl_vsl_functions.h */ +/******************************************************************************* +* Copyright (c) 2006-2017, Intel Corporation +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright notice, +* this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* * Neither the name of Intel Corporation nor the names of its contributors +* may be used to endorse or promote products derived from this software +* without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + +/* +//++ +// User-level VSL function declarations +//-- +*/ + +#ifndef __MKL_VSL_FUNCTIONS_H__ +#define __MKL_VSL_FUNCTIONS_H__ + +#include "mkl_vsl_types.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* +//++ +// EXTERNAL API MACROS. +// Used to construct VSL function declaration. Change them if you are going to +// provide different API for VSL functions. +//-- +*/ + +#if !defined(_Mkl_Api) +#define _Mkl_Api(rtype,name,arg) extern rtype name arg; +#endif + +#if !defined(_mkl_api) +#define _mkl_api(rtype,name,arg) extern rtype name##_ arg; +#endif + +#if !defined(_MKL_API) +#define _MKL_API(rtype,name,arg) extern rtype name##_ arg; +#endif + +/* +//++ +// VSL CONTINUOUS DISTRIBUTION GENERATOR FUNCTION DECLARATIONS. +//-- +*/ +/* Cauchy distribution */ +_Mkl_Api(int,vdRngCauchy,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , double [], const double , const double )) +_MKL_API(int,VDRNGCAUCHY,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *)) +_mkl_api(int,vdrngcauchy,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *)) +_Mkl_Api(int,vsRngCauchy,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , float [], const float , const float )) +_MKL_API(int,VSRNGCAUCHY,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float * )) +_mkl_api(int,vsrngcauchy,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float * )) + +/* Uniform distribution */ +_Mkl_Api(int,vdRngUniform,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , double [], const double , const double )) +_MKL_API(int,VDRNGUNIFORM,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *)) +_mkl_api(int,vdrnguniform,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *)) +_Mkl_Api(int,vsRngUniform,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , float [], const float , const float )) +_MKL_API(int,VSRNGUNIFORM,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float * )) +_mkl_api(int,vsrnguniform,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float * )) + +/* Gaussian distribution */ +_Mkl_Api(int,vdRngGaussian,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , double [], const double , const double )) +_MKL_API(int,VDRNGGAUSSIAN,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *)) +_mkl_api(int,vdrnggaussian,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *)) +_Mkl_Api(int,vsRngGaussian,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , float [], const float , const float )) +_MKL_API(int,VSRNGGAUSSIAN,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float * )) +_mkl_api(int,vsrnggaussian,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float * )) + +/* GaussianMV distribution */ +_Mkl_Api(int,vdRngGaussianMV,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , double [], const MKL_INT , const MKL_INT , const double *, const double *)) +_MKL_API(int,VDRNGGAUSSIANMV,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const MKL_INT *, const MKL_INT *, const double *, const double *)) +_mkl_api(int,vdrnggaussianmv,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const MKL_INT *, const MKL_INT *, const double *, const double *)) +_Mkl_Api(int,vsRngGaussianMV,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , float [], const MKL_INT , const MKL_INT , const float *, const float * )) +_MKL_API(int,VSRNGGAUSSIANMV,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const MKL_INT *, const MKL_INT *, const float *, const float * )) +_mkl_api(int,vsrnggaussianmv,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const MKL_INT *, const MKL_INT *, const float *, const float * )) + +/* Exponential distribution */ +_Mkl_Api(int,vdRngExponential,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , double [], const double , const double )) +_MKL_API(int,VDRNGEXPONENTIAL,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *)) +_mkl_api(int,vdrngexponential,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *)) +_Mkl_Api(int,vsRngExponential,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , float [], const float , const float )) +_MKL_API(int,VSRNGEXPONENTIAL,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float * )) +_mkl_api(int,vsrngexponential,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float * )) + +/* Laplace distribution */ +_Mkl_Api(int,vdRngLaplace,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , double [], const double , const double )) +_MKL_API(int,VDRNGLAPLACE,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *)) +_mkl_api(int,vdrnglaplace,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *)) +_Mkl_Api(int,vsRngLaplace,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , float [], const float , const float )) +_MKL_API(int,VSRNGLAPLACE,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float * )) +_mkl_api(int,vsrnglaplace,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float * )) + +/* Weibull distribution */ +_Mkl_Api(int,vdRngWeibull,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , double [], const double , const double , const double )) +_MKL_API(int,VDRNGWEIBULL,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *, const double *)) +_mkl_api(int,vdrngweibull,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *, const double *)) +_Mkl_Api(int,vsRngWeibull,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , float [], const float , const float , const float )) +_MKL_API(int,VSRNGWEIBULL,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float *, const float * )) +_mkl_api(int,vsrngweibull,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float *, const float * )) + +/* Rayleigh distribution */ +_Mkl_Api(int,vdRngRayleigh,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , double [], const double , const double )) +_MKL_API(int,VDRNGRAYLEIGH,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *)) +_mkl_api(int,vdrngrayleigh,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *)) +_Mkl_Api(int,vsRngRayleigh,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , float [], const float , const float )) +_MKL_API(int,VSRNGRAYLEIGH,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float * )) +_mkl_api(int,vsrngrayleigh,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float * )) + +/* Lognormal distribution */ +_Mkl_Api(int,vdRngLognormal,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , double [], const double , const double , const double , const double )) +_MKL_API(int,VDRNGLOGNORMAL,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *, const double *, const double *)) +_mkl_api(int,vdrnglognormal,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *, const double *, const double *)) +_Mkl_Api(int,vsRngLognormal,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , float [], const float , const float , const float , const float )) +_MKL_API(int,VSRNGLOGNORMAL,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float *, const float *, const float * )) +_mkl_api(int,vsrnglognormal,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float *, const float *, const float * )) + +/* Gumbel distribution */ +_Mkl_Api(int,vdRngGumbel,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , double [], const double , const double )) +_MKL_API(int,VDRNGGUMBEL,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *)) +_mkl_api(int,vdrnggumbel,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *)) +_Mkl_Api(int,vsRngGumbel,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , float [], const float , const float )) +_MKL_API(int,VSRNGGUMBEL,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float * )) +_mkl_api(int,vsrnggumbel,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float * )) + +/* Gamma distribution */ +_Mkl_Api(int,vdRngGamma,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , double [], const double , const double , const double )) +_MKL_API(int,VDRNGGAMMA,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *, const double *)) +_mkl_api(int,vdrnggamma,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *, const double *)) +_Mkl_Api(int,vsRngGamma,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , float [], const float , const float , const float )) +_MKL_API(int,VSRNGGAMMA,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float *, const float * )) +_mkl_api(int,vsrnggamma,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float *, const float * )) + +/* Beta distribution */ +_Mkl_Api(int,vdRngBeta,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , double [], const double , const double , const double , const double )) +_MKL_API(int,VDRNGBETA,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *, const double *, const double *)) +_mkl_api(int,vdrngbeta,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, double [], const double *, const double *, const double *, const double *)) +_Mkl_Api(int,vsRngBeta,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , float [], const float , const float , const float , const float )) +_MKL_API(int,VSRNGBETA,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float *, const float *, const float * )) +_mkl_api(int,vsrngbeta,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, float [], const float *, const float *, const float *, const float * )) + +/* +//++ +// VSL DISCRETE DISTRIBUTION GENERATOR FUNCTION DECLARATIONS. +//-- +*/ +/* Bernoulli distribution */ +_Mkl_Api(int,viRngBernoulli,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , int [], const double )) +_MKL_API(int,VIRNGBERNOULLI,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, int [], const double *)) +_mkl_api(int,virngbernoulli,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, int [], const double *)) + +/* Uniform distribution */ +_Mkl_Api(int,viRngUniform,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , int [], const int , const int )) +_MKL_API(int,VIRNGUNIFORM,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, int [], const int *, const int *)) +_mkl_api(int,virnguniform,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, int [], const int *, const int *)) + +/* UniformBits distribution */ +_Mkl_Api(int,viRngUniformBits,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , unsigned int [])) +_MKL_API(int,VIRNGUNIFORMBITS,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, unsigned int [])) +_mkl_api(int,virnguniformbits,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, unsigned int [])) + +/* UniformBits32 distribution */ +_Mkl_Api(int,viRngUniformBits32,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , unsigned int [])) +_MKL_API(int,VIRNGUNIFORMBITS32,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, unsigned int [])) +_mkl_api(int,virnguniformbits32,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, unsigned int [])) + +/* UniformBits64 distribution */ +_Mkl_Api(int,viRngUniformBits64,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , unsigned MKL_INT64 [])) +_MKL_API(int,VIRNGUNIFORMBITS64,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, unsigned MKL_INT64 [])) +_mkl_api(int,virnguniformbits64,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, unsigned MKL_INT64 [])) + +/* Geometric distribution */ +_Mkl_Api(int,viRngGeometric,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , int [], const double )) +_MKL_API(int,VIRNGGEOMETRIC,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, int [], const double *)) +_mkl_api(int,virnggeometric,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, int [], const double *)) + +/* Binomial distribution */ +_Mkl_Api(int,viRngBinomial,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , int [], const int , const double )) +_MKL_API(int,VIRNGBINOMIAL,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, int [], const int *, const double *)) +_mkl_api(int,virngbinomial,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, int [], const int *, const double *)) + +/* Hypergeometric distribution */ +_Mkl_Api(int,viRngHypergeometric,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , int [], const int , const int , const int )) +_MKL_API(int,VIRNGHYPERGEOMETRIC,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, int [], const int *, const int *, const int *)) +_mkl_api(int,virnghypergeometric,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, int [], const int *, const int *, const int *)) + +/* Negbinomial distribution */ +_Mkl_Api(int,viRngNegbinomial,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , int [], const double , const double )) +_Mkl_Api(int,viRngNegBinomial,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , int [], const double , const double )) +_MKL_API(int,VIRNGNEGBINOMIAL,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, int [], const double *, const double *)) +_mkl_api(int,virngnegbinomial,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, int [], const double *, const double *)) + +/* Poisson distribution */ +_Mkl_Api(int,viRngPoisson,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , int [], const double )) +_MKL_API(int,VIRNGPOISSON,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, int [], const double *)) +_mkl_api(int,virngpoisson,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, int [], const double *)) + +/* PoissonV distribution */ +_Mkl_Api(int,viRngPoissonV,(const MKL_INT , VSLStreamStatePtr , const MKL_INT , int [], const double [])) +_MKL_API(int,VIRNGPOISSONV,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, int [], const double [])) +_mkl_api(int,virngpoissonv,(const MKL_INT *, VSLStreamStatePtr *, const MKL_INT *, int [], const double [])) + + +/* +//++ +// VSL SERVICE FUNCTION DECLARATIONS. +//-- +*/ +/* NewStream - stream creation/initialization */ +_Mkl_Api(int,vslNewStream,(VSLStreamStatePtr* , const MKL_INT , const MKL_UINT )) +_mkl_api(int,vslnewstream,(VSLStreamStatePtr* , const MKL_INT *, const MKL_UINT *)) +_MKL_API(int,VSLNEWSTREAM,(VSLStreamStatePtr* , const MKL_INT *, const MKL_UINT *)) + +/* NewStreamEx - advanced stream creation/initialization */ +_Mkl_Api(int,vslNewStreamEx,(VSLStreamStatePtr* , const MKL_INT , const MKL_INT , const unsigned int[])) +_mkl_api(int,vslnewstreamex,(VSLStreamStatePtr* , const MKL_INT *, const MKL_INT *, const unsigned int[])) +_MKL_API(int,VSLNEWSTREAMEX,(VSLStreamStatePtr* , const MKL_INT *, const MKL_INT *, const unsigned int[])) + +_Mkl_Api(int,vsliNewAbstractStream,(VSLStreamStatePtr* , const MKL_INT , const unsigned int[], const iUpdateFuncPtr)) +_mkl_api(int,vslinewabstractstream,(VSLStreamStatePtr* , const MKL_INT *, const unsigned int[], const iUpdateFuncPtr)) +_MKL_API(int,VSLINEWABSTRACTSTREAM,(VSLStreamStatePtr* , const MKL_INT *, const unsigned int[], const iUpdateFuncPtr)) + +_Mkl_Api(int,vsldNewAbstractStream,(VSLStreamStatePtr* , const MKL_INT , const double[], const double , const double , const dUpdateFuncPtr)) +_mkl_api(int,vsldnewabstractstream,(VSLStreamStatePtr* , const MKL_INT *, const double[], const double *, const double *, const dUpdateFuncPtr)) +_MKL_API(int,VSLDNEWABSTRACTSTREAM,(VSLStreamStatePtr* , const MKL_INT *, const double[], const double *, const double *, const dUpdateFuncPtr)) + +_Mkl_Api(int,vslsNewAbstractStream,(VSLStreamStatePtr* , const MKL_INT , const float[], const float , const float , const sUpdateFuncPtr)) +_mkl_api(int,vslsnewabstractstream,(VSLStreamStatePtr* , const MKL_INT *, const float[], const float *, const float *, const sUpdateFuncPtr)) +_MKL_API(int,VSLSNEWABSTRACTSTREAM,(VSLStreamStatePtr* , const MKL_INT *, const float[], const float *, const float *, const sUpdateFuncPtr)) + +/* DeleteStream - delete stream */ +_Mkl_Api(int,vslDeleteStream,(VSLStreamStatePtr*)) +_mkl_api(int,vsldeletestream,(VSLStreamStatePtr*)) +_MKL_API(int,VSLDELETESTREAM,(VSLStreamStatePtr*)) + +/* CopyStream - copy all stream information */ +_Mkl_Api(int,vslCopyStream,(VSLStreamStatePtr*, const VSLStreamStatePtr)) +_mkl_api(int,vslcopystream,(VSLStreamStatePtr*, const VSLStreamStatePtr)) +_MKL_API(int,VSLCOPYSTREAM,(VSLStreamStatePtr*, const VSLStreamStatePtr)) + +/* CopyStreamState - copy stream state only */ +_Mkl_Api(int,vslCopyStreamState,(VSLStreamStatePtr , const VSLStreamStatePtr )) +_mkl_api(int,vslcopystreamstate,(VSLStreamStatePtr *, const VSLStreamStatePtr *)) +_MKL_API(int,VSLCOPYSTREAMSTATE,(VSLStreamStatePtr *, const VSLStreamStatePtr *)) + +/* LeapfrogStream - leapfrog method */ +_Mkl_Api(int,vslLeapfrogStream,(VSLStreamStatePtr , const MKL_INT , const MKL_INT )) +_mkl_api(int,vslleapfrogstream,(VSLStreamStatePtr *, const MKL_INT *, const MKL_INT *)) +_MKL_API(int,VSLLEAPFROGSTREAM,(VSLStreamStatePtr *, const MKL_INT *, const MKL_INT *)) + +/* SkipAheadStream - skip-ahead method */ +_Mkl_Api(int,vslSkipAheadStream,(VSLStreamStatePtr , const long long int )) +_mkl_api(int,vslskipaheadstream,(VSLStreamStatePtr *, const long long int *)) +_MKL_API(int,VSLSKIPAHEADSTREAM,(VSLStreamStatePtr *, const long long int *)) + +/* GetStreamStateBrng - get BRNG associated with given stream */ +_Mkl_Api(int,vslGetStreamStateBrng,(const VSLStreamStatePtr )) +_mkl_api(int,vslgetstreamstatebrng,(const VSLStreamStatePtr *)) +_MKL_API(int,VSLGETSTREAMSTATEBRNG,(const VSLStreamStatePtr *)) + +/* GetNumRegBrngs - get number of registered BRNGs */ +_Mkl_Api(int,vslGetNumRegBrngs,(void)) +_mkl_api(int,vslgetnumregbrngs,(void)) +_MKL_API(int,VSLGETNUMREGBRNGS,(void)) + +/* RegisterBrng - register new BRNG */ +_Mkl_Api(int,vslRegisterBrng,(const VSLBRngProperties* )) +_mkl_api(int,vslregisterbrng,(const VSLBRngProperties* )) +_MKL_API(int,VSLREGISTERBRNG,(const VSLBRngProperties* )) + +/* GetBrngProperties - get BRNG properties */ +_Mkl_Api(int,vslGetBrngProperties,(const int , VSLBRngProperties* )) +_mkl_api(int,vslgetbrngproperties,(const int *, VSLBRngProperties* )) +_MKL_API(int,VSLGETBRNGPROPERTIES,(const int *, VSLBRngProperties* )) + +/* SaveStreamF - save random stream descriptive data to file */ +_Mkl_Api(int,vslSaveStreamF,(const VSLStreamStatePtr , const char* )) +_mkl_api(int,vslsavestreamf,(const VSLStreamStatePtr *, const char* , const int )) +_MKL_API(int,VSLSAVESTREAMF,(const VSLStreamStatePtr *, const char* , const int )) + +/* LoadStreamF - load random stream descriptive data from file */ +_Mkl_Api(int,vslLoadStreamF,(VSLStreamStatePtr *, const char* )) +_mkl_api(int,vslloadstreamf,(VSLStreamStatePtr *, const char* , const int )) +_MKL_API(int,VSLLOADSTREAMF,(VSLStreamStatePtr *, const char* , const int )) + +/* SaveStreamM - save random stream descriptive data to memory */ +_Mkl_Api(int,vslSaveStreamM,(const VSLStreamStatePtr , char* )) +_mkl_api(int,vslsavestreamm,(const VSLStreamStatePtr *, char* )) +_MKL_API(int,VSLSAVESTREAMM,(const VSLStreamStatePtr *, char* )) + +/* LoadStreamM - load random stream descriptive data from memory */ +_Mkl_Api(int,vslLoadStreamM,(VSLStreamStatePtr *, const char* )) +_mkl_api(int,vslloadstreamm,(VSLStreamStatePtr *, const char* )) +_MKL_API(int,VSLLOADSTREAMM,(VSLStreamStatePtr *, const char* )) + +/* GetStreamSize - get size of random stream descriptive data */ +_Mkl_Api(int,vslGetStreamSize,(const VSLStreamStatePtr)) +_mkl_api(int,vslgetstreamsize,(const VSLStreamStatePtr)) +_MKL_API(int,VSLGETSTREAMSIZE,(const VSLStreamStatePtr)) + +/* +//++ +// VSL CONVOLUTION AND CORRELATION FUNCTION DECLARATIONS. +//-- +*/ + +_Mkl_Api(int,vsldConvNewTask,(VSLConvTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT [], const MKL_INT [], const MKL_INT [])) +_mkl_api(int,vsldconvnewtask,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [])) +_MKL_API(int,VSLDCONVNEWTASK,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [])) + +_Mkl_Api(int,vslsConvNewTask,(VSLConvTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT [], const MKL_INT [], const MKL_INT [])) +_mkl_api(int,vslsconvnewtask,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [])) +_MKL_API(int,VSLSCONVNEWTASK,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [])) + +_Mkl_Api(int,vslzConvNewTask,(VSLConvTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT [], const MKL_INT [], const MKL_INT [])) +_mkl_api(int,vslzconvnewtask,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [])) +_MKL_API(int,VSLZCONVNEWTASK,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [])) + +_Mkl_Api(int,vslcConvNewTask,(VSLConvTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT [], const MKL_INT [], const MKL_INT [])) +_mkl_api(int,vslcconvnewtask,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [])) +_MKL_API(int,VSLCCONVNEWTASK,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [])) + +_Mkl_Api(int,vsldCorrNewTask,(VSLCorrTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT [], const MKL_INT [], const MKL_INT [])) +_mkl_api(int,vsldcorrnewtask,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [])) +_MKL_API(int,VSLDCORRNEWTASK,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [])) + +_Mkl_Api(int,vslsCorrNewTask,(VSLCorrTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT [], const MKL_INT [], const MKL_INT [])) +_mkl_api(int,vslscorrnewtask,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [])) +_MKL_API(int,VSLSCORRNEWTASK,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [])) + +_Mkl_Api(int,vslzCorrNewTask,(VSLCorrTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT [], const MKL_INT [], const MKL_INT [])) +_mkl_api(int,vslzcorrnewtask,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [])) +_MKL_API(int,VSLZCORRNEWTASK,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [])) + +_Mkl_Api(int,vslcCorrNewTask,(VSLCorrTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT [], const MKL_INT [], const MKL_INT [])) +_mkl_api(int,vslccorrnewtask,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [])) +_MKL_API(int,VSLCCORRNEWTASK,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [])) + + +_Mkl_Api(int,vsldConvNewTask1D,(VSLConvTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_INT )) +_mkl_api(int,vsldconvnewtask1d,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* )) +_MKL_API(int,VSLDCONVNEWTASK1D,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* )) + +_Mkl_Api(int,vslsConvNewTask1D,(VSLConvTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_INT )) +_mkl_api(int,vslsconvnewtask1d,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* )) +_MKL_API(int,VSLSCONVNEWTASK1D,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* )) + +_Mkl_Api(int,vslzConvNewTask1D,(VSLConvTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_INT )) +_mkl_api(int,vslzconvnewtask1d,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* )) +_MKL_API(int,VSLZCONVNEWTASK1D,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* )) + +_Mkl_Api(int,vslcConvNewTask1D,(VSLConvTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_INT )) +_mkl_api(int,vslcconvnewtask1d,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* )) +_MKL_API(int,VSLCCONVNEWTASK1D,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* )) + +_Mkl_Api(int,vsldCorrNewTask1D,(VSLCorrTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_INT )) +_mkl_api(int,vsldcorrnewtask1d,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* )) +_MKL_API(int,VSLDCORRNEWTASK1D,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* )) + +_Mkl_Api(int,vslsCorrNewTask1D,(VSLCorrTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_INT )) +_mkl_api(int,vslscorrnewtask1d,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* )) +_MKL_API(int,VSLSCORRNEWTASK1D,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* )) + +_Mkl_Api(int,vslzCorrNewTask1D,(VSLCorrTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_INT )) +_mkl_api(int,vslzcorrnewtask1d,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* )) +_MKL_API(int,VSLZCORRNEWTASK1D,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* )) + +_Mkl_Api(int,vslcCorrNewTask1D,(VSLCorrTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_INT )) +_mkl_api(int,vslccorrnewtask1d,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* )) +_MKL_API(int,VSLCCORRNEWTASK1D,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* )) + + +_Mkl_Api(int,vsldConvNewTaskX,(VSLConvTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT [], const MKL_INT [], const MKL_INT [], const double [], const MKL_INT [])) +_mkl_api(int,vsldconvnewtaskx,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [], const double [], const MKL_INT [])) +_MKL_API(int,VSLDCONVNEWTASKX,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [], const double [], const MKL_INT [])) + +_Mkl_Api(int,vslsConvNewTaskX,(VSLConvTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT [], const MKL_INT [], const MKL_INT [], const float [], const MKL_INT [])) +_mkl_api(int,vslsconvnewtaskx,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [], const float [], const MKL_INT [])) +_MKL_API(int,VSLSCONVNEWTASKX,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [], const float [], const MKL_INT [])) + +_Mkl_Api(int,vslzConvNewTaskX,(VSLConvTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT [], const MKL_INT [], const MKL_INT [], const MKL_Complex16 [], const MKL_INT [])) +_mkl_api(int,vslzconvnewtaskx,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [], const MKL_Complex16 [], const MKL_INT [])) +_MKL_API(int,VSLZCONVNEWTASKX,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [], const MKL_Complex16 [], const MKL_INT [])) + +_Mkl_Api(int,vslcConvNewTaskX,(VSLConvTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT [], const MKL_INT [], const MKL_INT [], const MKL_Complex8 [], const MKL_INT [])) +_mkl_api(int,vslcconvnewtaskx,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [], const MKL_Complex8 [], const MKL_INT [])) +_MKL_API(int,VSLCCONVNEWTASKX,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [], const MKL_Complex8 [], const MKL_INT [])) + +_Mkl_Api(int,vsldCorrNewTaskX,(VSLCorrTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT [], const MKL_INT [], const MKL_INT [], const double [], const MKL_INT [])) +_mkl_api(int,vsldcorrnewtaskx,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [], const double [], const MKL_INT [])) +_MKL_API(int,VSLDCORRNEWTASKX,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [], const double [], const MKL_INT [])) + +_Mkl_Api(int,vslsCorrNewTaskX,(VSLCorrTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT [], const MKL_INT [], const MKL_INT [], const float [], const MKL_INT [])) +_mkl_api(int,vslscorrnewtaskx,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [], const float [], const MKL_INT [])) +_MKL_API(int,VSLSCORRNEWTASKX,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [], const float [], const MKL_INT [])) + +_Mkl_Api(int,vslzCorrNewTaskX,(VSLCorrTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT [], const MKL_INT [], const MKL_INT [], const MKL_Complex16 [], const MKL_INT [])) +_mkl_api(int,vslzcorrnewtaskx,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [], const MKL_Complex16 [], const MKL_INT [])) +_MKL_API(int,VSLZCORRNEWTASKX,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [], const MKL_Complex16 [], const MKL_INT [])) + +_Mkl_Api(int,vslcCorrNewTaskX,(VSLCorrTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT [], const MKL_INT [], const MKL_INT [], const MKL_Complex8 [], const MKL_INT [])) +_mkl_api(int,vslccorrnewtaskx,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [], const MKL_Complex8 [], const MKL_INT [])) +_MKL_API(int,VSLCCORRNEWTASKX,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT [], const MKL_INT [], const MKL_INT [], const MKL_Complex8 [], const MKL_INT [])) + + +_Mkl_Api(int,vsldConvNewTaskX1D,(VSLConvTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_INT , const double [], const MKL_INT )) +_mkl_api(int,vsldconvnewtaskx1d,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const double [], const MKL_INT* )) +_MKL_API(int,VSLDCONVNEWTASKX1D,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const double [], const MKL_INT* )) + +_Mkl_Api(int,vslsConvNewTaskX1D,(VSLConvTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_INT , const float [], const MKL_INT )) +_mkl_api(int,vslsconvnewtaskx1d,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const float [], const MKL_INT* )) +_MKL_API(int,VSLSCONVNEWTASKX1D,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const float [], const MKL_INT* )) + +_Mkl_Api(int,vslzConvNewTaskX1D,(VSLConvTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_Complex16 [], const MKL_INT )) +_mkl_api(int,vslzconvnewtaskx1d,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_Complex16 [], const MKL_INT* )) +_MKL_API(int,VSLZCONVNEWTASKX1D,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_Complex16 [], const MKL_INT* )) + +_Mkl_Api(int,vslcConvNewTaskX1D,(VSLConvTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_Complex8 [], const MKL_INT )) +_mkl_api(int,vslcconvnewtaskx1d,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_Complex8 [], const MKL_INT* )) +_MKL_API(int,VSLCCONVNEWTASKX1D,(VSLConvTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_Complex8 [], const MKL_INT* )) + +_Mkl_Api(int,vsldCorrNewTaskX1D,(VSLCorrTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_INT , const double [], const MKL_INT )) +_mkl_api(int,vsldcorrnewtaskx1d,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const double [], const MKL_INT* )) +_MKL_API(int,VSLDCORRNEWTASKX1D,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const double [], const MKL_INT* )) + +_Mkl_Api(int,vslsCorrNewTaskX1D,(VSLCorrTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_INT , const float [], const MKL_INT )) +_mkl_api(int,vslscorrnewtaskx1d,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const float [], const MKL_INT* )) +_MKL_API(int,VSLSCORRNEWTASKX1D,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const float [], const MKL_INT* )) + +_Mkl_Api(int,vslzCorrNewTaskX1D,(VSLCorrTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_Complex16 [], const MKL_INT )) +_mkl_api(int,vslzcorrnewtaskx1d,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_Complex16 [], const MKL_INT* )) +_MKL_API(int,VSLZCORRNEWTASKX1D,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_Complex16 [], const MKL_INT* )) + +_Mkl_Api(int,vslcCorrNewTaskX1D,(VSLCorrTaskPtr* , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_INT , const MKL_Complex8 [], const MKL_INT )) +_mkl_api(int,vslccorrnewtaskx1d,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_Complex8 [], const MKL_INT* )) +_MKL_API(int,VSLCCORRNEWTASKX1D,(VSLCorrTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const MKL_Complex8 [], const MKL_INT* )) + + +_Mkl_Api(int,vslConvDeleteTask,(VSLConvTaskPtr* )) +_mkl_api(int,vslconvdeletetask,(VSLConvTaskPtr* )) +_MKL_API(int,VSLCONVDeleteTask,(VSLConvTaskPtr* )) + +_Mkl_Api(int,vslCorrDeleteTask,(VSLCorrTaskPtr* )) +_mkl_api(int,vslcorrdeletetask,(VSLCorrTaskPtr* )) +_MKL_API(int,VSLCORRDeleteTask,(VSLCorrTaskPtr* )) + + +_Mkl_Api(int,vslConvCopyTask,(VSLConvTaskPtr* , const VSLConvTaskPtr )) +_mkl_api(int,vslconvcopytask,(VSLConvTaskPtr* , const VSLConvTaskPtr* )) +_MKL_API(int,VSLCONVCopyTask,(VSLConvTaskPtr* , const VSLConvTaskPtr* )) + +_Mkl_Api(int,vslCorrCopyTask,(VSLCorrTaskPtr* , const VSLCorrTaskPtr )) +_mkl_api(int,vslcorrcopytask,(VSLCorrTaskPtr* , const VSLCorrTaskPtr* )) +_MKL_API(int,VSLCORRCopyTask,(VSLCorrTaskPtr* , const VSLCorrTaskPtr* )) + + +_Mkl_Api(int,vslConvSetMode,(VSLConvTaskPtr , const MKL_INT )) +_mkl_api(int,vslconvsetmode,(VSLConvTaskPtr* , const MKL_INT* )) +_MKL_API(int,VSLCONVSETMODE,(VSLConvTaskPtr* , const MKL_INT* )) + +_Mkl_Api(int,vslCorrSetMode,(VSLCorrTaskPtr , const MKL_INT )) +_mkl_api(int,vslcorrsetmode,(VSLCorrTaskPtr* , const MKL_INT* )) +_MKL_API(int,VSLCORRSETMODE,(VSLCorrTaskPtr* , const MKL_INT* )) + + +_Mkl_Api(int,vslConvSetInternalPrecision,(VSLConvTaskPtr , const MKL_INT )) +_mkl_api(int,vslconvsetinternalprecision,(VSLConvTaskPtr* , const MKL_INT* )) +_MKL_API(int,VSLCONVSETINTERNALPRECISION,(VSLConvTaskPtr* , const MKL_INT* )) + +_Mkl_Api(int,vslCorrSetInternalPrecision,(VSLCorrTaskPtr , const MKL_INT )) +_mkl_api(int,vslcorrsetinternalprecision,(VSLCorrTaskPtr* , const MKL_INT* )) +_MKL_API(int,VSLCORRSETINTERNALPRECISION,(VSLCorrTaskPtr* , const MKL_INT* )) + + +_Mkl_Api(int,vslConvSetStart,(VSLConvTaskPtr , const MKL_INT [])) +_mkl_api(int,vslconvsetstart,(VSLConvTaskPtr* , const MKL_INT [])) +_MKL_API(int,VSLCONVSETSTART,(VSLConvTaskPtr* , const MKL_INT [])) + +_Mkl_Api(int,vslCorrSetStart,(VSLCorrTaskPtr , const MKL_INT [])) +_mkl_api(int,vslcorrsetstart,(VSLCorrTaskPtr* , const MKL_INT [])) +_MKL_API(int,VSLCORRSETSTART,(VSLCorrTaskPtr* , const MKL_INT [])) + + +_Mkl_Api(int,vslConvSetDecimation,(VSLConvTaskPtr , const MKL_INT [])) +_mkl_api(int,vslconvsetdecimation,(VSLConvTaskPtr* , const MKL_INT [])) +_MKL_API(int,VSLCONVSETDECIMATION,(VSLConvTaskPtr* , const MKL_INT [])) + +_Mkl_Api(int,vslCorrSetDecimation,(VSLCorrTaskPtr , const MKL_INT [])) +_mkl_api(int,vslcorrsetdecimation,(VSLCorrTaskPtr* , const MKL_INT [])) +_MKL_API(int,VSLCORRSETDECIMATION,(VSLCorrTaskPtr* , const MKL_INT [])) + + +_Mkl_Api(int,vsldConvExec,(VSLConvTaskPtr , const double [], const MKL_INT [], const double [], const MKL_INT [], double [], const MKL_INT [])) +_mkl_api(int,vsldconvexec,(VSLConvTaskPtr* , const double [], const MKL_INT [], const double [], const MKL_INT [], double [], const MKL_INT [])) +_MKL_API(int,VSLDCONVEXEC,(VSLConvTaskPtr* , const double [], const MKL_INT [], const double [], const MKL_INT [], double [], const MKL_INT [])) + +_Mkl_Api(int,vslsConvExec,(VSLConvTaskPtr , const float [], const MKL_INT [], const float [], const MKL_INT [], float [], const MKL_INT [])) +_mkl_api(int,vslsconvexec,(VSLConvTaskPtr* , const float [], const MKL_INT [], const float [], const MKL_INT [], float [], const MKL_INT [])) +_MKL_API(int,VSLSCONVEXEC,(VSLConvTaskPtr* , const float [], const MKL_INT [], const float [], const MKL_INT [], float [], const MKL_INT [])) + +_Mkl_Api(int,vslzConvExec,(VSLConvTaskPtr , const MKL_Complex16 [], const MKL_INT [], const MKL_Complex16 [], const MKL_INT [], MKL_Complex16 [], const MKL_INT [])) +_mkl_api(int,vslzconvexec,(VSLConvTaskPtr* , const MKL_Complex16 [], const MKL_INT [], const MKL_Complex16 [], const MKL_INT [], MKL_Complex16 [], const MKL_INT [])) +_MKL_API(int,VSLZCONVEXEC,(VSLConvTaskPtr* , const MKL_Complex16 [], const MKL_INT [], const MKL_Complex16 [], const MKL_INT [], MKL_Complex16 [], const MKL_INT [])) + +_Mkl_Api(int,vslcConvExec,(VSLConvTaskPtr , const MKL_Complex8 [], const MKL_INT [], const MKL_Complex8 [], const MKL_INT [], MKL_Complex8 [], const MKL_INT [])) +_mkl_api(int,vslcconvexec,(VSLConvTaskPtr* , const MKL_Complex8 [], const MKL_INT [], const MKL_Complex8 [], const MKL_INT [], MKL_Complex8 [], const MKL_INT [])) +_MKL_API(int,VSLCCONVEXEC,(VSLConvTaskPtr* , const MKL_Complex8 [], const MKL_INT [], const MKL_Complex8 [], const MKL_INT [], MKL_Complex8 [], const MKL_INT [])) + +_Mkl_Api(int,vsldCorrExec,(VSLCorrTaskPtr , const double [], const MKL_INT [], const double [], const MKL_INT [], double [], const MKL_INT [])) +_mkl_api(int,vsldcorrexec,(VSLCorrTaskPtr* , const double [], const MKL_INT [], const double [], const MKL_INT [], double [], const MKL_INT [])) +_MKL_API(int,VSLDCORREXEC,(VSLCorrTaskPtr* , const double [], const MKL_INT [], const double [], const MKL_INT [], double [], const MKL_INT [])) + +_Mkl_Api(int,vslsCorrExec,(VSLCorrTaskPtr , const float [], const MKL_INT [], const float [], const MKL_INT [], float [], const MKL_INT [])) +_mkl_api(int,vslscorrexec,(VSLCorrTaskPtr* , const float [], const MKL_INT [], const float [], const MKL_INT [], float [], const MKL_INT [])) +_MKL_API(int,VSLSCORREXEC,(VSLCorrTaskPtr* , const float [], const MKL_INT [], const float [], const MKL_INT [], float [], const MKL_INT [])) + +_Mkl_Api(int,vslzCorrExec,(VSLCorrTaskPtr , const MKL_Complex16 [], const MKL_INT [], const MKL_Complex16 [], const MKL_INT [], MKL_Complex16 [], const MKL_INT [])) +_mkl_api(int,vslzcorrexec,(VSLCorrTaskPtr* , const MKL_Complex16 [], const MKL_INT [], const MKL_Complex16 [], const MKL_INT [], MKL_Complex16 [], const MKL_INT [])) +_MKL_API(int,VSLZCORREXEC,(VSLCorrTaskPtr* , const MKL_Complex16 [], const MKL_INT [], const MKL_Complex16 [], const MKL_INT [], MKL_Complex16 [], const MKL_INT [])) + +_Mkl_Api(int,vslcCorrExec,(VSLCorrTaskPtr , const MKL_Complex8 [], const MKL_INT [], const MKL_Complex8 [], const MKL_INT [], MKL_Complex8 [], const MKL_INT [])) +_mkl_api(int,vslccorrexec,(VSLCorrTaskPtr* , const MKL_Complex8 [], const MKL_INT [], const MKL_Complex8 [], const MKL_INT [], MKL_Complex8 [], const MKL_INT [])) +_MKL_API(int,VSLCCORREXEC,(VSLCorrTaskPtr* , const MKL_Complex8 [], const MKL_INT [], const MKL_Complex8 [], const MKL_INT [], MKL_Complex8 [], const MKL_INT [])) + + +_Mkl_Api(int,vsldConvExec1D,(VSLConvTaskPtr , const double [], const MKL_INT , const double [], const MKL_INT , double [], const MKL_INT )) +_mkl_api(int,vsldconvexec1d,(VSLConvTaskPtr* , const double [], const MKL_INT* , const double [], const MKL_INT* , double [], const MKL_INT* )) +_MKL_API(int,VSLDCONVEXEC1D,(VSLConvTaskPtr* , const double [], const MKL_INT* , const double [], const MKL_INT* , double [], const MKL_INT* )) + +_Mkl_Api(int,vslsConvExec1D,(VSLConvTaskPtr , const float [], const MKL_INT , const float [], const MKL_INT , float [], const MKL_INT )) +_mkl_api(int,vslsconvexec1d,(VSLConvTaskPtr* , const float [], const MKL_INT* , const float [], const MKL_INT* , float [], const MKL_INT* )) +_MKL_API(int,VSLSCONVEXEC1D,(VSLConvTaskPtr* , const float [], const MKL_INT* , const float [], const MKL_INT* , float [], const MKL_INT* )) + +_Mkl_Api(int,vslzConvExec1D,(VSLConvTaskPtr , const MKL_Complex16 [], const MKL_INT , const MKL_Complex16 [], const MKL_INT , MKL_Complex16 [], const MKL_INT )) +_mkl_api(int,vslzconvexec1d,(VSLConvTaskPtr* , const MKL_Complex16 [], const MKL_INT* , const MKL_Complex16 [], const MKL_INT* , MKL_Complex16 [], const MKL_INT* )) +_MKL_API(int,VSLZCONVEXEC1D,(VSLConvTaskPtr* , const MKL_Complex16 [], const MKL_INT* , const MKL_Complex16 [], const MKL_INT* , MKL_Complex16 [], const MKL_INT* )) + +_Mkl_Api(int,vslcConvExec1D,(VSLConvTaskPtr , const MKL_Complex8 [], const MKL_INT , const MKL_Complex8 [], const MKL_INT , MKL_Complex8 [], const MKL_INT )) +_mkl_api(int,vslcconvexec1d,(VSLConvTaskPtr* , const MKL_Complex8 [], const MKL_INT* , const MKL_Complex8 [], const MKL_INT* , MKL_Complex8 [], const MKL_INT* )) +_MKL_API(int,VSLCCONVEXEC1D,(VSLConvTaskPtr* , const MKL_Complex8 [], const MKL_INT* , const MKL_Complex8 [], const MKL_INT* , MKL_Complex8 [], const MKL_INT* )) + +_Mkl_Api(int,vsldCorrExec1D,(VSLCorrTaskPtr , const double [], const MKL_INT , const double [], const MKL_INT , double [], const MKL_INT )) +_mkl_api(int,vsldcorrexec1d,(VSLCorrTaskPtr* , const double [], const MKL_INT* , const double [], const MKL_INT* , double [], const MKL_INT* )) +_MKL_API(int,VSLDCORREXEC1D,(VSLCorrTaskPtr* , const double [], const MKL_INT* , const double [], const MKL_INT* , double [], const MKL_INT* )) + +_Mkl_Api(int,vslsCorrExec1D,(VSLCorrTaskPtr , const float [], const MKL_INT , const float [], const MKL_INT , float [], const MKL_INT )) +_mkl_api(int,vslscorrexec1d,(VSLCorrTaskPtr* , const float [], const MKL_INT* , const float [], const MKL_INT* , float [], const MKL_INT* )) +_MKL_API(int,VSLSCORREXEC1D,(VSLCorrTaskPtr* , const float [], const MKL_INT* , const float [], const MKL_INT* , float [], const MKL_INT* )) + +_Mkl_Api(int,vslzCorrExec1D,(VSLCorrTaskPtr , const MKL_Complex16 [], const MKL_INT , const MKL_Complex16 [], const MKL_INT , MKL_Complex16 [], const MKL_INT )) +_mkl_api(int,vslzcorrexec1d,(VSLCorrTaskPtr* , const MKL_Complex16 [], const MKL_INT* , const MKL_Complex16 [], const MKL_INT* , MKL_Complex16 [], const MKL_INT* )) +_MKL_API(int,VSLZCORREXEC1D,(VSLCorrTaskPtr* , const MKL_Complex16 [], const MKL_INT* , const MKL_Complex16 [], const MKL_INT* , MKL_Complex16 [], const MKL_INT* )) + +_Mkl_Api(int,vslcCorrExec1D,(VSLCorrTaskPtr , const MKL_Complex8 [], const MKL_INT , const MKL_Complex8 [], const MKL_INT , MKL_Complex8 [], const MKL_INT )) +_mkl_api(int,vslccorrexec1d,(VSLCorrTaskPtr* , const MKL_Complex8 [], const MKL_INT* , const MKL_Complex8 [], const MKL_INT* , MKL_Complex8 [], const MKL_INT* )) +_MKL_API(int,VSLCCORREXEC1D,(VSLCorrTaskPtr* , const MKL_Complex8 [], const MKL_INT* , const MKL_Complex8 [], const MKL_INT* , MKL_Complex8 [], const MKL_INT* )) + + +_Mkl_Api(int,vsldConvExecX,(VSLConvTaskPtr , const double [], const MKL_INT [], double [], const MKL_INT [])) +_mkl_api(int,vsldconvexecx,(VSLConvTaskPtr* , const double [], const MKL_INT [], double [], const MKL_INT [])) +_MKL_API(int,VSLDCONVEXECX,(VSLConvTaskPtr* , const double [], const MKL_INT [], double [], const MKL_INT [])) + +_Mkl_Api(int,vslsConvExecX,(VSLConvTaskPtr , const float [], const MKL_INT [], float [], const MKL_INT [])) +_mkl_api(int,vslsconvexecx,(VSLConvTaskPtr* , const float [], const MKL_INT [], float [], const MKL_INT [])) +_MKL_API(int,VSLSCONVEXECX,(VSLConvTaskPtr* , const float [], const MKL_INT [], float [], const MKL_INT [])) + +_Mkl_Api(int,vslzConvExecX,(VSLConvTaskPtr , const MKL_Complex16 [], const MKL_INT [], MKL_Complex16 [], const MKL_INT [])) +_mkl_api(int,vslzconvexecx,(VSLConvTaskPtr* , const MKL_Complex16 [], const MKL_INT [], MKL_Complex16 [], const MKL_INT [])) +_MKL_API(int,VSLZCONVEXECX,(VSLConvTaskPtr* , const MKL_Complex16 [], const MKL_INT [], MKL_Complex16 [], const MKL_INT [])) + +_Mkl_Api(int,vslcConvExecX,(VSLConvTaskPtr , const MKL_Complex8 [], const MKL_INT [], MKL_Complex8 [], const MKL_INT [])) +_mkl_api(int,vslcconvexecx,(VSLConvTaskPtr* , const MKL_Complex8 [], const MKL_INT [], MKL_Complex8 [], const MKL_INT [])) +_MKL_API(int,VSLCCONVEXECX,(VSLConvTaskPtr* , const MKL_Complex8 [], const MKL_INT [], MKL_Complex8 [], const MKL_INT [])) + +_Mkl_Api(int,vsldCorrExecX,(VSLCorrTaskPtr , const double [], const MKL_INT [], double [], const MKL_INT [])) +_mkl_api(int,vsldcorrexecx,(VSLCorrTaskPtr* , const double [], const MKL_INT [], double [], const MKL_INT [])) +_MKL_API(int,VSLDCORREXECX,(VSLCorrTaskPtr* , const double [], const MKL_INT [], double [], const MKL_INT [])) + +_Mkl_Api(int,vslsCorrExecX,(VSLCorrTaskPtr , const float [], const MKL_INT [], float [], const MKL_INT [])) +_mkl_api(int,vslscorrexecx,(VSLCorrTaskPtr* , const float [], const MKL_INT [], float [], const MKL_INT [])) +_MKL_API(int,VSLSCORREXECX,(VSLCorrTaskPtr* , const float [], const MKL_INT [], float [], const MKL_INT [])) + +_Mkl_Api(int,vslzCorrExecX,(VSLCorrTaskPtr , const MKL_Complex16 [], const MKL_INT [], MKL_Complex16 [], const MKL_INT [])) +_mkl_api(int,vslzcorrexecx,(VSLCorrTaskPtr* , const MKL_Complex16 [], const MKL_INT [], MKL_Complex16 [], const MKL_INT [])) +_MKL_API(int,VSLZCORREXECX,(VSLCorrTaskPtr* , const MKL_Complex16 [], const MKL_INT [], MKL_Complex16 [], const MKL_INT [])) + +_Mkl_Api(int,vslcCorrExecX,(VSLCorrTaskPtr , const MKL_Complex8 [], const MKL_INT [], MKL_Complex8 [], const MKL_INT [])) +_mkl_api(int,vslccorrexecx,(VSLCorrTaskPtr* , const MKL_Complex8 [], const MKL_INT [], MKL_Complex8 [], const MKL_INT [])) +_MKL_API(int,VSLCCORREXECX,(VSLCorrTaskPtr* , const MKL_Complex8 [], const MKL_INT [], MKL_Complex8 [], const MKL_INT [])) + + +_Mkl_Api(int,vsldConvExecX1D,(VSLConvTaskPtr , const double [], const MKL_INT , double [], const MKL_INT )) +_mkl_api(int,vsldconvexecx1d,(VSLConvTaskPtr* , const double [], const MKL_INT* , double [], const MKL_INT* )) +_MKL_API(int,VSLDCONVEXECX1D,(VSLConvTaskPtr* , const double [], const MKL_INT* , double [], const MKL_INT* )) + +_Mkl_Api(int,vslsConvExecX1D,(VSLConvTaskPtr , const float [], const MKL_INT , float [], const MKL_INT )) +_mkl_api(int,vslsconvexecx1d,(VSLConvTaskPtr* , const float [], const MKL_INT* , float [], const MKL_INT* )) +_MKL_API(int,VSLSCONVEXECX1D,(VSLConvTaskPtr* , const float [], const MKL_INT* , float [], const MKL_INT* )) + +_Mkl_Api(int,vslzConvExecX1D,(VSLConvTaskPtr , const MKL_Complex16 [], const MKL_INT , MKL_Complex16 [], const MKL_INT )) +_mkl_api(int,vslzconvexecx1d,(VSLConvTaskPtr* , const MKL_Complex16 [], const MKL_INT* , MKL_Complex16 [], const MKL_INT* )) +_MKL_API(int,VSLZCONVEXECX1D,(VSLConvTaskPtr* , const MKL_Complex16 [], const MKL_INT* , MKL_Complex16 [], const MKL_INT* )) + +_Mkl_Api(int,vslcConvExecX1D,(VSLConvTaskPtr , const MKL_Complex8 [], const MKL_INT , MKL_Complex8 [], const MKL_INT )) +_mkl_api(int,vslcconvexecx1d,(VSLConvTaskPtr* , const MKL_Complex8 [], const MKL_INT* , MKL_Complex8 [], const MKL_INT* )) +_MKL_API(int,VSLCCONVEXECX1D,(VSLConvTaskPtr* , const MKL_Complex8 [], const MKL_INT* , MKL_Complex8 [], const MKL_INT* )) + +_Mkl_Api(int,vsldCorrExecX1D,(VSLCorrTaskPtr , const double [], const MKL_INT , double [], const MKL_INT )) +_mkl_api(int,vsldcorrexecx1d,(VSLCorrTaskPtr* , const double [], const MKL_INT* , double [], const MKL_INT* )) +_MKL_API(int,VSLDCORREXECX1D,(VSLCorrTaskPtr* , const double [], const MKL_INT* , double [], const MKL_INT* )) + +_Mkl_Api(int,vslsCorrExecX1D,(VSLCorrTaskPtr , const float [], const MKL_INT , float [], const MKL_INT )) +_mkl_api(int,vslscorrexecx1d,(VSLCorrTaskPtr* , const float [], const MKL_INT* , float [], const MKL_INT* )) +_MKL_API(int,VSLSCORREXECX1D,(VSLCorrTaskPtr* , const float [], const MKL_INT* , float [], const MKL_INT* )) + +_Mkl_Api(int,vslzCorrExecX1D,(VSLCorrTaskPtr , const MKL_Complex16 [], const MKL_INT , MKL_Complex16 [], const MKL_INT )) +_mkl_api(int,vslzcorrexecx1d,(VSLCorrTaskPtr* , const MKL_Complex16 [], const MKL_INT* , MKL_Complex16 [], const MKL_INT* )) +_MKL_API(int,VSLZCORREXECX1D,(VSLCorrTaskPtr* , const MKL_Complex16 [], const MKL_INT* , MKL_Complex16 [], const MKL_INT* )) + +_Mkl_Api(int,vslcCorrExecX1D,(VSLCorrTaskPtr , const MKL_Complex8 [], const MKL_INT , MKL_Complex8 [], const MKL_INT )) +_mkl_api(int,vslccorrexecx1d,(VSLCorrTaskPtr* , const MKL_Complex8 [], const MKL_INT* , MKL_Complex8 [], const MKL_INT* )) +_MKL_API(int,VSLCCORREXECX1D,(VSLCorrTaskPtr* , const MKL_Complex8 [], const MKL_INT* , MKL_Complex8 [], const MKL_INT* )) + + +/* +//++ +// SUMMARARY STATTISTICS LIBRARY ROUTINES +//-- +*/ + +/* +// Task constructors +*/ +_Mkl_Api(int,vsldSSNewTask,(VSLSSTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const double [], const double [], const MKL_INT [])) +_mkl_api(int,vsldssnewtask,(VSLSSTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const double [], const double [], const MKL_INT [])) +_MKL_API(int,VSLDSSNEWTASK,(VSLSSTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const double [], const double [], const MKL_INT [])) + +_Mkl_Api(int,vslsSSNewTask,(VSLSSTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const float [], const float [], const MKL_INT [])) +_mkl_api(int,vslsssnewtask,(VSLSSTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const float [], const float [], const MKL_INT [])) +_MKL_API(int,VSLSSSNEWTASK,(VSLSSTaskPtr* , const MKL_INT* , const MKL_INT* , const MKL_INT* , const float [], const float [], const MKL_INT [])) + + +/* +// Task editors +*/ + +/* +// Editor to modify a task parameter +*/ +_Mkl_Api(int,vsldSSEditTask,(VSLSSTaskPtr , const MKL_INT , const double* )) +_mkl_api(int,vsldssedittask,(VSLSSTaskPtr* , const MKL_INT* , const double* )) +_MKL_API(int,VSLDSSEDITTASK,(VSLSSTaskPtr* , const MKL_INT* , const double* )) + +_Mkl_Api(int,vslsSSEditTask,(VSLSSTaskPtr , const MKL_INT , const float* )) +_mkl_api(int,vslsssedittask,(VSLSSTaskPtr* , const MKL_INT* , const float* )) +_MKL_API(int,VSLSSSEDITTASK,(VSLSSTaskPtr* , const MKL_INT* , const float* )) + +_Mkl_Api(int,vsliSSEditTask,(VSLSSTaskPtr , const MKL_INT , const MKL_INT* )) +_mkl_api(int,vslissedittask,(VSLSSTaskPtr* , const MKL_INT* , const MKL_INT* )) +_MKL_API(int,VSLISSEDITTASK,(VSLSSTaskPtr* , const MKL_INT* , const MKL_INT* )) + +/* +// Task specific editors +*/ + +/* +// Editors to modify moments related parameters +*/ +_Mkl_Api(int,vsldSSEditMoments,(VSLSSTaskPtr , double* , double* , double* , double* , double* , double* , double* )) +_mkl_api(int,vsldsseditmoments,(VSLSSTaskPtr* , double* , double* , double* , double* , double* , double* , double* )) +_MKL_API(int,VSLDSSEDITMOMENTS,(VSLSSTaskPtr* , double* , double* , double* , double* , double* , double* , double* )) + +_Mkl_Api(int,vslsSSEditMoments,(VSLSSTaskPtr , float* , float* , float* , float* , float* , float* , float* )) +_mkl_api(int,vslssseditmoments,(VSLSSTaskPtr* , float* , float* , float* , float* , float* , float* , float* )) +_MKL_API(int,VSLSSSEDITMOMENTS,(VSLSSTaskPtr* , float* , float* , float* , float* , float* , float* , float* )) + + +/* +// Editors to modify sums related parameters +*/ +_Mkl_Api(int,vsldSSEditSums,(VSLSSTaskPtr , double* , double* , double* , double* , double* , double* , double* )) +_mkl_api(int,vsldsseditsums,(VSLSSTaskPtr* , double* , double* , double* , double* , double* , double* , double* )) +_MKL_API(int,VSLDSSEDITSUMS,(VSLSSTaskPtr* , double* , double* , double* , double* , double* , double* , double* )) + +_Mkl_Api(int,vslsSSEditSums,(VSLSSTaskPtr , float* , float* , float* , float* , float* , float* , float* )) +_mkl_api(int,vslssseditsums,(VSLSSTaskPtr* , float* , float* , float* , float* , float* , float* , float* )) +_MKL_API(int,VSLSSSEDITSUMS,(VSLSSTaskPtr* , float* , float* , float* , float* , float* , float* , float* )) + + +/* +// Editors to modify variance-covariance/correlation matrix related parameters +*/ +_Mkl_Api(int,vsldSSEditCovCor,(VSLSSTaskPtr , double* , double* , const MKL_INT* , double* , const MKL_INT* )) +_mkl_api(int,vsldsseditcovcor,(VSLSSTaskPtr* , double* , double* , const MKL_INT* , double* , const MKL_INT* )) +_MKL_API(int,VSLDSSEDITCOVCOR,(VSLSSTaskPtr* , double* , double* , const MKL_INT* , double* , const MKL_INT* )) + +_Mkl_Api(int,vslsSSEditCovCor,(VSLSSTaskPtr , float* , float* , const MKL_INT* , float* , const MKL_INT* )) +_mkl_api(int,vslssseditcovcor,(VSLSSTaskPtr* , float* , float* , const MKL_INT* , float* , const MKL_INT* )) +_MKL_API(int,VSLSSSEDITCOVCOR,(VSLSSTaskPtr* , float* , float* , const MKL_INT* , float* , const MKL_INT* )) + + +/* +// Editors to modify cross-product matrix related parameters +*/ +_Mkl_Api(int,vsldSSEditCP,(VSLSSTaskPtr , double* , double* , double* , const MKL_INT* )) +_mkl_api(int,vsldsseditcp,(VSLSSTaskPtr* , double* , double* , double* , const MKL_INT* )) +_MKL_API(int,VSLDSSEDITCP,(VSLSSTaskPtr* , double* , double* , double* , const MKL_INT* )) + +_Mkl_Api(int,vslsSSEditCP,(VSLSSTaskPtr , float* , float* , float* , const MKL_INT* )) +_mkl_api(int,vslssseditcp,(VSLSSTaskPtr* , float* , float* , float* , const MKL_INT* )) +_MKL_API(int,VSLSSSEDITCP,(VSLSSTaskPtr* , float* , float* , float* , const MKL_INT* )) + + +/* +// Editors to modify partial variance-covariance matrix related parameters +*/ +_Mkl_Api(int,vsldSSEditPartialCovCor,(VSLSSTaskPtr , const MKL_INT [], const double* , const MKL_INT* , const double* , const MKL_INT* , double* , const MKL_INT* , double* , const MKL_INT* )) +_mkl_api(int,vsldsseditpartialcovcor,(VSLSSTaskPtr* , const MKL_INT [], const double* , const MKL_INT* , const double* , const MKL_INT* , double* , const MKL_INT* , double* , const MKL_INT* )) +_MKL_API(int,VSLDSSEDITPARTIALCOVCOR,(VSLSSTaskPtr* , const MKL_INT [], const double* , const MKL_INT* , const double* , const MKL_INT* , double* , const MKL_INT* , double* , const MKL_INT* )) + +_Mkl_Api(int,vslsSSEditPartialCovCor,(VSLSSTaskPtr , const MKL_INT [], const float* , const MKL_INT* , const float* , const MKL_INT* , float* , const MKL_INT* , float* , const MKL_INT* )) +_mkl_api(int,vslssseditpartialcovcor,(VSLSSTaskPtr* , const MKL_INT [], const float* , const MKL_INT* , const float* , const MKL_INT* , float* , const MKL_INT* , float* , const MKL_INT* )) +_MKL_API(int,VSLSSSEDITPARTIALCOVCOR,(VSLSSTaskPtr* , const MKL_INT [], const float* , const MKL_INT* , const float* , const MKL_INT* , float* , const MKL_INT* , float* , const MKL_INT* )) + + +/* +// Editors to modify quantiles related parameters +*/ +_Mkl_Api(int,vsldSSEditQuantiles,(VSLSSTaskPtr , const MKL_INT* , const double* , double* , double* , const MKL_INT* )) +_mkl_api(int,vsldsseditquantiles,(VSLSSTaskPtr* , const MKL_INT* , const double* , double* , double* , const MKL_INT* )) +_MKL_API(int,VSLDSSEDITQUANTILES,(VSLSSTaskPtr* , const MKL_INT* , const double* , double* , double* , const MKL_INT* )) + +_Mkl_Api(int,vslsSSEditQuantiles,(VSLSSTaskPtr , const MKL_INT* , const float* , float* , float* , const MKL_INT* )) +_mkl_api(int,vslssseditquantiles,(VSLSSTaskPtr* , const MKL_INT* , const float* , float* , float* , const MKL_INT* )) +_MKL_API(int,VSLSSSEDITQUANTILES,(VSLSSTaskPtr* , const MKL_INT* , const float* , float* , float* , const MKL_INT* )) + + +/* +// Editors to modify stream data quantiles related parameters +*/ +_Mkl_Api(int,vsldSSEditStreamQuantiles,(VSLSSTaskPtr , const MKL_INT* , const double* , double* , const MKL_INT* , const double* )) +_mkl_api(int,vsldsseditstreamquantiles,(VSLSSTaskPtr* , const MKL_INT* , const double* , double* , const MKL_INT* , const double* )) +_MKL_API(int,VSLDSSEDITSTREAMQUANTILES,(VSLSSTaskPtr* , const MKL_INT* , const double* , double* , const MKL_INT* , const double* )) + +_Mkl_Api(int,vslsSSEditStreamQuantiles,(VSLSSTaskPtr , const MKL_INT* , const float* , float* , const MKL_INT* , const float* )) +_mkl_api(int,vslssseditstreamquantiles,(VSLSSTaskPtr* , const MKL_INT* , const float* , float* , const MKL_INT* , const float* )) +_MKL_API(int,VSLSSSEDITSTREAMQUANTILES,(VSLSSTaskPtr* , const MKL_INT* , const float* , float* , const MKL_INT* , const float* )) + +/* +// Editors to modify pooled/group variance-covariance matrix related parameters +*/ +_Mkl_Api(int,vsldSSEditPooledCovariance,(VSLSSTaskPtr , const MKL_INT* , double* , double* , const MKL_INT* , double* , double* )) +_mkl_api(int,vsldsseditpooledcovariance,(VSLSSTaskPtr* , const MKL_INT* , double* , double* , const MKL_INT* , double* , double* )) +_MKL_API(int,VSLDSSEDITPOOLEDCOVARIANCE,(VSLSSTaskPtr* , const MKL_INT* , double* , double* , const MKL_INT* , double* , double* )) + +_Mkl_Api(int,vslsSSEditPooledCovariance,(VSLSSTaskPtr , const MKL_INT* , float* , float* , const MKL_INT* , float* , float* )) +_mkl_api(int,vslssseditpooledcovariance,(VSLSSTaskPtr* , const MKL_INT* , float* , float* , const MKL_INT* , float* , float* )) +_MKL_API(int,VSLSSSEDITPOOLEDCOVARIANCE,(VSLSSTaskPtr* , const MKL_INT* , float* , float* , const MKL_INT* , float* , float* )) + + +/* +// Editors to modify robust variance-covariance matrix related parameters +*/ +_Mkl_Api(int,vsldSSEditRobustCovariance,(VSLSSTaskPtr , const MKL_INT* , const MKL_INT* , const double* , double* , double* )) +_mkl_api(int,vsldsseditrobustcovariance,(VSLSSTaskPtr* , const MKL_INT* , const MKL_INT* , const double* , double* , double* )) +_MKL_API(int,VSLDSSEDITROBUSTCOVARIANCE,(VSLSSTaskPtr* , const MKL_INT* , const MKL_INT* , const double* , double* , double* )) + +_Mkl_Api(int,vslsSSEditRobustCovariance,(VSLSSTaskPtr , const MKL_INT* , const MKL_INT* , const float* , float* , float* )) +_mkl_api(int,vslssseditrobustcovariance,(VSLSSTaskPtr* , const MKL_INT* , const MKL_INT* , const float* , float* , float* )) +_MKL_API(int,VSLSSSEDITROBUSTCOVARIANCE,(VSLSSTaskPtr* , const MKL_INT* , const MKL_INT* , const float* , float* , float* )) + + +/* +// Editors to modify outliers detection parameters +*/ +_Mkl_Api(int,vsldSSEditOutliersDetection,(VSLSSTaskPtr , const MKL_INT* , const double* , double* )) +_mkl_api(int,vsldsseditoutliersdetection,(VSLSSTaskPtr* , const MKL_INT* , const double* , double* )) +_MKL_API(int,VSLDSSEDITOUTLIERSDETECTION,(VSLSSTaskPtr* , const MKL_INT* , const double* , double* )) + +_Mkl_Api(int,vslsSSEditOutliersDetection,(VSLSSTaskPtr , const MKL_INT* , const float* , float* )) +_mkl_api(int,vslssseditoutliersdetection,(VSLSSTaskPtr* , const MKL_INT* , const float* , float* )) +_MKL_API(int,VSLSSSEDITOUTLIERSDETECTION,(VSLSSTaskPtr* , const MKL_INT* , const float* , float* )) + +/* +// Editors to modify missing values support parameters +*/ +_Mkl_Api(int,vsldSSEditMissingValues,(VSLSSTaskPtr , const MKL_INT* , const double* , const MKL_INT* , const double* , const MKL_INT* , const double* , const MKL_INT* , double* , const MKL_INT* , double* )) +_mkl_api(int,vsldsseditmissingvalues,(VSLSSTaskPtr* , const MKL_INT* , const double* , const MKL_INT* , const double* , const MKL_INT* , const double* , const MKL_INT* , double* , const MKL_INT* , double* )) +_MKL_API(int,VSLDSSEDITMISSINGVALUES,(VSLSSTaskPtr* , const MKL_INT* , const double* , const MKL_INT* , const double* , const MKL_INT* , const double* , const MKL_INT* , double* , const MKL_INT* , double* )) + +_Mkl_Api(int,vslsSSEditMissingValues,(VSLSSTaskPtr , const MKL_INT* , const float* , const MKL_INT* , const float* , const MKL_INT* , const float* , const MKL_INT* , float* , const MKL_INT* , float* )) +_mkl_api(int,vslssseditmissingvalues,(VSLSSTaskPtr* , const MKL_INT* , const float* , const MKL_INT* , const float* , const MKL_INT* , const float* , const MKL_INT* , float* , const MKL_INT* , float* )) +_MKL_API(int,VSLSSSEDITMISSINGVALUES,(VSLSSTaskPtr* , const MKL_INT* , const float* , const MKL_INT* , const float* , const MKL_INT* , const float* , const MKL_INT* , float* , const MKL_INT* , float* )) + +/* +// Editors to modify matrixparametrization parameters +*/ +_Mkl_Api(int,vsldSSEditCorParameterization,(VSLSSTaskPtr , const double* , const MKL_INT* , double* , const MKL_INT* )) +_mkl_api(int,vsldsseditcorparameterization,(VSLSSTaskPtr* , const double* , const MKL_INT* , double* , const MKL_INT* )) +_MKL_API(int,VSLDSSEDITCORPARAMETERIZATION,(VSLSSTaskPtr* , const double* , const MKL_INT* , double* , const MKL_INT* )) + +_Mkl_Api(int,vslsSSEditCorParameterization,(VSLSSTaskPtr , const float* , const MKL_INT* , float* , const MKL_INT* )) +_mkl_api(int,vslssseditcorparameterization,(VSLSSTaskPtr* , const float* , const MKL_INT* , float* , const MKL_INT* )) +_MKL_API(int,VSLSSSEDITCORPARAMETERIZATION,(VSLSSTaskPtr* , const float* , const MKL_INT* , float* , const MKL_INT* )) + + +/* +// Compute routines +*/ +_Mkl_Api(int,vsldSSCompute,(VSLSSTaskPtr , const unsigned MKL_INT64 , const MKL_INT )) +_mkl_api(int,vsldsscompute,(VSLSSTaskPtr* , const unsigned MKL_INT64* , const MKL_INT* )) +_MKL_API(int,VSLDSSCOMPUTE,(VSLSSTaskPtr* , const unsigned MKL_INT64* , const MKL_INT* )) + +_Mkl_Api(int,vslsSSCompute,(VSLSSTaskPtr , const unsigned MKL_INT64 , const MKL_INT )) +_mkl_api(int,vslssscompute,(VSLSSTaskPtr* , const unsigned MKL_INT64* , const MKL_INT* )) +_MKL_API(int,VSLSSSCOMPUTE,(VSLSSTaskPtr* , const unsigned MKL_INT64* , const MKL_INT* )) + + +/* +// Task destructor +*/ +_Mkl_Api(int,vslSSDeleteTask,(VSLSSTaskPtr* )) +_mkl_api(int,vslssdeletetask,(VSLSSTaskPtr* )) +_MKL_API(int,VSLSSDELETETASK,(VSLSSTaskPtr* )) + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __MKL_VSL_FUNCTIONS_H__ */ diff --git a/python/ideep4py/include/mkl/mkl_vsl_types.h b/python/ideep4py/include/mkl/mkl_vsl_types.h new file mode 100644 index 00000000..48c4e38e --- /dev/null +++ b/python/ideep4py/include/mkl/mkl_vsl_types.h @@ -0,0 +1,126 @@ +/* file: mkl_vsl_types.h */ +/******************************************************************************* +* Copyright (c) 2006-2017, Intel Corporation +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright notice, +* this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* * Neither the name of Intel Corporation nor the names of its contributors +* may be used to endorse or promote products derived from this software +* without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + +/* +//++ +// This file contains user-level type definitions. +//-- +*/ + +#ifndef __MKL_VSL_TYPES_H__ +#define __MKL_VSL_TYPES_H__ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#include "mkl_types.h" + +/* +//++ +// TYPEDEFS +//-- +*/ + +/* +// POINTER TO STREAM STATE STRUCTURE +// This is a void pointer to hide implementation details. +*/ +typedef void* VSLStreamStatePtr; +typedef void* VSLConvTaskPtr; +typedef void* VSLCorrTaskPtr; +typedef void* VSLSSTaskPtr; + +/* +// POINTERS TO BASIC RANDOM NUMBER GENERATOR FUNCTIONS +// Each BRNG must have following implementations: +// +// * Stream initialization (InitStreamPtr) +// * Integer-value recurrence implementation (iBRngPtr) +// * Single precision implementation (sBRngPtr) - for random number generation +// uniformly distributed on the [a,b] interval +// * Double precision implementation (dBRngPtr) - for random number generation +// uniformly distributed on the [a,b] interval +*/ +typedef int (*InitStreamPtr)( int method, VSLStreamStatePtr stream, \ + int n, const unsigned int params[] ); +typedef int (*sBRngPtr)( VSLStreamStatePtr stream, int n, float r[], \ + float a, float b ); +typedef int (*dBRngPtr)( VSLStreamStatePtr stream, int n, double r[], \ + double a, double b ); +typedef int (*iBRngPtr)( VSLStreamStatePtr stream, int n, unsigned int r[] ); + +/*********** Pointers to callback functions for abstract streams *************/ +typedef int (*iUpdateFuncPtr)( VSLStreamStatePtr stream, int* n, \ + unsigned int ibuf[], int* nmin, int* nmax, int* idx ); +typedef int (*dUpdateFuncPtr)( VSLStreamStatePtr stream, int* n, + double dbuf[], int* nmin, int* nmax, int* idx ); +typedef int (*sUpdateFuncPtr)( VSLStreamStatePtr stream, int* n, \ + float sbuf[], int* nmin, int* nmax, int* idx ); + + +/* +// BASIC RANDOM NUMBER GENERATOR PROPERTIES STRUCTURE +// The structure describes the properties of given basic generator, e.g. size +// of the stream state structure, pointers to function implementations, etc. +// +// BRNG properties structure fields: +// StreamStateSize - size of the stream state structure (in bytes) +// WordSize - size of base word (in bytes). Typically this is 4 bytes. +// NSeeds - number of words necessary to describe generator's state +// NBits - number of bits actually used in base word. For example, +// only 31 least significant bits are actually used in +// basic random number generator MCG31m1 with 4-byte base +// word. NBits field is useful while interpreting random +// words as a sequence of random bits. +// IncludesZero - FALSE if 0 cannot be generated in integer-valued +// implementation; TRUE if 0 can be potentially generated in +// integer-valued implementation. +// InitStream - pointer to stream state initialization function +// sBRng - pointer to single precision implementation +// dBRng - pointer to double precision implementation +// iBRng - pointer to integer-value implementation +*/ +typedef struct _VSLBRngProperties { + int StreamStateSize; /* Stream state size (in bytes) */ + int NSeeds; /* Number of seeds */ + int IncludesZero; /* Zero flag */ + int WordSize; /* Size (in bytes) of base word */ + int NBits; /* Number of actually used bits */ + InitStreamPtr InitStream; /* Pointer to InitStream func */ + sBRngPtr sBRng; /* Pointer to S func */ + dBRngPtr dBRng; /* Pointer to D func */ + iBRngPtr iBRng; /* Pointer to I func */ +} VSLBRngProperties; + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __MKL_VSL_TYPES_H__ */ diff --git a/python/ideep4py/include/mm/mem.h b/python/ideep4py/include/mm/mem.h new file mode 100644 index 00000000..88784bc5 --- /dev/null +++ b/python/ideep4py/include/mm/mem.h @@ -0,0 +1,193 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" + +using namespace std; +static constexpr int DEFAULT_ALIGNMENT = 64; + +typedef enum { + MPOOL_ANON, + MPOOL_REORDER, + MPOOL_ELTWISE_FWD, + MPOOL_ELTWISE_BWD, + MPOOL_BN_FWD, + MPOOL_BN_BWD, + MPOOL_LRN_FWD, + MPOOL_LRN_BWD, + MPOOL_CONV_FWD, + MPOOL_CONV_BWD, + MPOOL_POOLING_FWD, + MPOOL_POOLING_BWD, + MPOOL_IP_FWD, + MPOOL_IP_BWD, + MPOOL_CONCAT_FWD, + MPOOL_CONCAT_BWD, +} mem_pool_t; + +template +class Memory { +public: + Memory() : alloc_size_(0), free_size_(0), seq_(0) {} + Memory(const char *name) : alloc_size_(0), free_size_(0) + , seq_(0), name_(name) {} + virtual ~Memory() { + //std::cout << name_ << " alloc size " << alloc_size_ << " free size " + // << free_size_ << std::endl; + } + + void* malloc(size_t size) { + std::lock_guard lock(mutex_); + void *ptr; + int idx = to_index(size); + + if (!free_hashline_[idx].empty()) { + block_t *block = nullptr; + std::list &list = free_hashline_[idx]; + typename std::list::block_t*>::iterator it; + for(it=list.begin(); it != list.end(); ++it) { + if((*it)->header_.size_ == size) { + block = *it; + break; + } + } + if (block) { + list.erase(it); + void *ptr = static_cast(block); + free_size_ -= size; + //std::cout << name_ << " cache alloc seq " << block->header_.seq_ << " size " << block->header_.size_ << std::endl; + return GET_PTR(void, ptr, ALIGNMENT); + } + } + // No cached memory + size_t len = size + ALIGNMENT; + int rc = ::posix_memalign(&ptr, ALIGNMENT, len); + if (rc != 0) { + throw std::invalid_argument("Out of memory"); + } + block_t *block = static_cast(ptr); + block->header_.size_ = size; + alloc_size_ += size; + //std::cout << name_ << " system alloc seq " << seq_ << " size " << size << std::endl; + block->header_.seq_ = seq_++; + return GET_PTR(void, ptr, ALIGNMENT); + } + + void free(void* ptr) { + std::lock_guard lock(mutex_); + //block_t *block = static_cast(ptr - ALIGNMENT); + block_t *block = GET_PTR(block_t, ptr, -ALIGNMENT); + int idx = to_index(block->header_.size_); + free_hashline_[idx].push_back(block); + free_size_ += block->header_.size_; + //std::cout << name_ << " free seq " << block->header_.seq_ << " size " << block->header_.size_ << std::endl; + } + + void epoch() { + } +private: + int to_index(std::size_t size) { + std::string str = long_to_string(size); + std::size_t hash = std::hash{}(str); + int idx = hash % MAX_ENTRY; + return idx; + } + + typedef union _header_str { + struct { + std::size_t size_; + int seq_; + }; + char pad_[ALIGNMENT]; + } header_t; + + typedef struct _block_str { + header_t header_; + char data_[]; + } block_t; + + static constexpr int MAX_ENTRY = 512; + + std::size_t alloc_size_; + std::size_t free_size_; + std::list free_hashline_[MAX_ENTRY]; + std::mutex mutex_; + int seq_; + std::string name_; +}; + +void* dnn_malloc(size_t size, mem_pool_t pool=MPOOL_ANON); +void dnn_free(void *p, mem_pool_t pool=MPOOL_ANON); + +// Just grab it from MKL-DNN +namespace avx { +#if 1 + inline void* malloc(size_t size, int alignment) { + return ::dnn_malloc(size); + } + inline void free(void* p) { ::dnn_free(p); } +#else + inline void* malloc(size_t size, int alignment) { + void *ptr; + int rc = ::posix_memalign(&ptr, alignment, size); + return (rc == 0) ? ptr : 0; + } + inline void free(void* p) { ::free(p); } +#endif + + struct compatible { + enum { default_alignment = DEFAULT_ALIGNMENT }; + static void* operator new(size_t sz) { + return malloc(sz, default_alignment); + } + static void* operator new(size_t sz, void* p) { (void)sz; return p; } + static void* operator new[](size_t sz) { + return malloc(sz, default_alignment); + } + static void operator delete(void* p) { + free(p); } + static void operator delete[](void* p) { + free(p); } + }; + + struct byte: public compatible { + char q; + }; +} + +class Allocator { + public: + static std::shared_ptr malloc(size_t len, mem_pool_t mpool); + static std::shared_ptr malloc(vector dims, int element_sz, mem_pool_t mpool); +}; diff --git a/python/ideep4py/include/mm/mkldnn_ex.h b/python/ideep4py/include/mm/mkldnn_ex.h new file mode 100644 index 00000000..d9380a9d --- /dev/null +++ b/python/ideep4py/include/mm/mkldnn_ex.h @@ -0,0 +1,92 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#pragma once +#include "mkldnn.hpp" +#include "reorder.h" + +inline static mkldnn::memory reorder_if_must(mkldnn::memory user + , mkldnn::memory::primitive_desc expect + , std::unique_ptr &mreorder + , std::vector *dag) { + + if (user.get_primitive_desc() != expect) { + mkldnn::memory interm(expect); +#if 0 + auto user_mpd = user.get_primitive_desc(); + mkldnn::memory::format user_fmt = static_cast( + user_mpd.desc().data.format); + mkldnn::memory::format mkl_fmt = static_cast( + expect.desc().data.format); + mkldnn::memory::data_type dtype = static_cast( + expect.desc().data.data_type); + + if ((user_fmt == mkldnn::memory::format::nChw16c && + mkl_fmt == mkldnn::memory::format::nChw8c) || + (mkl_fmt == mkldnn::memory::format::nChw16c && + user_fmt == mkldnn::memory::format::nChw8c)) { + auto m = expect.desc().data; + int n = m.dims[0], c = m.dims[1], h = m.dims[2], w = m.dims[3]; + mkldnn::memory::dims tz = {n, c, h, w}; + mreorder.reset(new mkldnn::memory({{{ tz }, dtype, mkldnn::memory::format::nchw }, expect.get_engine()})); + //auto mreorder = new mkldnn::memory({{{ tz }, dtype, mkldnn::memory::format::nchw }, expect.get_engine()}); + auto rep1 = mkldnn::reorder(user, *mreorder); + auto rep2 = mkldnn::reorder(*mreorder, interm); + dag->push_back(rep1); + dag->push_back(rep2); + //static int spl_nr = 0; + //printf("\n %d *Reorder(split) iutput from:%d, to:%d\n", spl_nr++, user_fmt, mkl_fmt); + } else { + dag->push_back(mkldnn::reorder(user, interm)); + } +#else + dag->push_back(mkldnn::reorder(user, interm)); +#endif + return interm; + } + + return user; +} + +template +inline static void axpby(Tensor *dst, T a, Tensor *x, T b, Tensor *y) { + std::vector prims; + std::unique_ptr mreorder; + + /// Reorder to x's format + auto mid = reorder_if_must(y->mkldnn_memory(), x->mkldnn_memory().get_primitive_desc() + , mreorder, &prims); + + mkldnn::sum::primitive_desc sum_pd(std::vector({(float)a, (float)b}) + , {x->mkldnn_memory().get_primitive_desc(), mid.get_primitive_desc()}); + + std::vector inputs_at {x->mkldnn_memory(), mid}; + + mkldnn::sum sum_prim(sum_pd, inputs_at, dst->mkldnn_memory()); + prims.push_back(sum_prim); + + mkldnn::stream s(mkldnn::stream::kind::eager); + s.submit(prims).wait(); +} + diff --git a/python/ideep4py/include/mm/reorder.h b/python/ideep4py/include/mm/reorder.h new file mode 100644 index 00000000..76d58a0f --- /dev/null +++ b/python/ideep4py/include/mm/reorder.h @@ -0,0 +1,202 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#pragma once + +#include "mkldnn.hpp" +#include "tensor.h" + +extern engine cpu_engine; +static constexpr int MAX_NDIM = 12; //XXX: For now + +class Reorderer { +//protected: +public: + bool non_trivial_; + mkldnn::memory dst_; + std::shared_ptr data_; + + int ndims_; + int size_; + char format_[4]; + Py_ssize_t itemsize_; + Py_ssize_t strides_[MAX_NDIM]; + Py_ssize_t shape_[MAX_NDIM]; + + void _collect_buffer_info() { + auto md = dst_.get_primitive_desc().desc(); + int ndims = md.data.ndims; + + ndims_ = ndims; + switch(static_cast(md.data.data_type)) { + case mkldnn::memory::f32: + strcpy(format_, "f"); + itemsize_ = 4; + break; + case mkldnn::memory::s32: + strcpy(format_, "i"); + itemsize_ = 4; + break; + case mkldnn::memory::s16: + strcpy(format_, "h"); + itemsize_ = 2; + break; + case mkldnn::memory::s8: + strcpy(format_, "b"); + itemsize_ = 1; + break; + case mkldnn::memory::u8: + strcpy(format_, "B"); + itemsize_ = 1; + break; + default: + break; + } + + for (int i = 0; i < ndims; i ++) { + shape_[i] = md.data.dims[i]; + } + + Py_ssize_t sd = itemsize_; + + for (int i = ndims -1; i >= 0; --i) { + strides_[i] = sd; + sd *= shape_[i]; + } + } + + inline avx::byte *data() const { return data_.get(); } + +public: +#if 0 + Reorderer(const py_handle in) + :Reorderer(in.get()) {} +#endif + Reorderer(const Tensor *src) + : non_trivial_(src->incompatible()), dst_([src] () { + if (src->incompatible()) { + auto md_data = src->desc().data; + + mkldnn::memory::dims adims(md_data.dims + , md_data.dims + md_data.ndims); + + mkldnn::memory::primitive_desc pd ({adims + , static_cast(md_data.data_type) + , static_cast(::public_format(md_data.format))} + , src->get_engine()); + + // XXX: magic number 4 is a hack + return mkldnn::memory(pd, reinterpret_cast(4)); + } else { + return src->mkldnn_memory(); + }} ()), size_(src->size()) { + if (src->incompatible()) { + auto pd = dst_.get_primitive_desc(); + + data_ = std::shared_ptr(new avx::byte [pd.get_size()] + , [](avx::byte *p) {delete [] p;}); + + dst_.set_data_handle(data_.get()); + + } else { + data_ = src->share_data(); + } + + _collect_buffer_info(); + } + + mkldnn::reorder fire(const Tensor *src) { + mkldnn::reorder reorder(src->mkldnn_memory(), dst_); + mkldnn::stream s(mkldnn::stream::eager); + + s.submit({reorder}).wait(); + return reorder; + } + + mkldnn::reorder sync(const Tensor *src) { + mkldnn::reorder reorder(dst_, src->mkldnn_memory()); + mkldnn::stream s(mkldnn::stream::eager); + + s.submit({reorder}).wait(); + return reorder; + } + + inline bool non_trivial() const { + return non_trivial_; + } + +#if 0 + // PEP 3118 interface + int build_view(Py_buffer *view, int flags) { + view->buf = data_.get(); + view->itemsize = itemsize_; + view->readonly = 0; + view->internal = nullptr; + view->len = size_ * itemsize_; + + if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) { + view->format = format_; + } else { + view->format = nullptr; + } + + if ((flags & PyBUF_ND) == PyBUF_ND) { + view->ndim = ndims_; + view->shape = shape_; + } else { + view->ndim = 0; + view->shape = nullptr; + } + + if ((flags & PyBUF_STRIDES) == PyBUF_STRIDES) { + view->strides = strides_; + } else { + view->strides = nullptr; + } + + view->suboffsets = nullptr; + + return 0; + } + + // Array protocol + PyArrayInterface *build_array_struct(void) { + auto arrstr = new PyArrayInterface(); + + arrstr->two = 2; + arrstr->nd = ndims_; + arrstr->typekind = *((char *)format_); + arrstr->itemsize = itemsize_; + arrstr->flags = NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_NOTSWAPPED | + NPY_ARRAY_ALIGNED | NPY_ARRAY_WRITEABLE; + arrstr->flags &= ~(NPY_ARRAY_UPDATEIFCOPY | NPY_ARRAY_OWNDATA); + arrstr->shape = shape_; + arrstr->strides = strides_; + arrstr->data = data_.get(); + arrstr->descr = nullptr; + + return arrstr; + } +#endif +}; diff --git a/python/ideep4py/include/mm/tensor.h b/python/ideep4py/include/mm/tensor.h new file mode 100644 index 00000000..6e4a9423 --- /dev/null +++ b/python/ideep4py/include/mm/tensor.h @@ -0,0 +1,513 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#pragma once + +#include +#include +#include "mkldnn.hpp" +#include "mem.h" +#include "utils.h" +using namespace std; +using namespace mkldnn; +extern engine cpu_engine; + +typedef size_t size_type; +enum data_type_t { + UNKNOWN_TYPE = 0, + FLOAT32, + SINT32, + SINT16, + SINT8, + UINT8, +}; + +inline int type2size(data_type_t type) { + int size = 0; + switch (type) { + case FLOAT32: + size = 4; + break; + case SINT32: + size = 4; + break; + case SINT16: + size = 2; + break; + case SINT8: + size = 1; + break; + case UINT8: + size = 1; + break; + default: + break; + } + return size; +} + +inline size_t prod(vectordims, int ndims) +{ + size_t prod = 1; + for (int i = (ndims - 1); i >= 0; i--) { + prod *= dims[i]; + } + return prod; +} + +//input_type:'d'-->data, 'w'-->weight +inline mkldnn_memory_format_t ndims2format(int ndims, char input_type = 'd') +{ + mkldnn_memory_format_t fmt = mkldnn_any; + switch (ndims) { + case 1: + fmt = mkldnn_x; + break; + case 2: + fmt = (input_type == 'd') ? mkldnn_nc : mkldnn_oi; + break; + case 4: + fmt = (input_type == 'd') ? mkldnn_nchw : mkldnn_oihw; + break; + default: + throw mkldnn::error(mkldnn_invalid_arguments + , "MKLDNN does not support dimensions" + + ndims); + } + + return fmt; +} + + +inline mkldnn_memory_format_t ndims2format_preferred(int ndims, vector dims, char input_type = 'd') +{ + mkldnn_memory_format_t fmt = mkldnn_any; + switch (ndims) { + case 1: + fmt = mkldnn_x; + break; + case 2: + fmt = (input_type == 'd') ? mkldnn_nc : mkldnn_oi; + break; + case 4: + if (input_type == 'd') { + fmt = (mkldnn_memory_format_t)get_desired_format(dims[1]); + } else if (input_type == 'w') { + fmt = (mkldnn_memory_format_t)get_desired_format_weight(dims[0], dims[1]); + } + break; + default: + throw mkldnn::error(mkldnn_invalid_arguments + , "MKLDNN does not support dimensions" + + ndims); + } + + return fmt; +} + + + +inline mkldnn_memory_format_t public_format(mkldnn_memory_format_t origin) +{ + mkldnn_memory_format_t ret; + // review this relations carefully + switch(origin) { + case mkldnn_nchw: + case mkldnn_nhwc: + case mkldnn_chwn: + case mkldnn_nChw8c: + case mkldnn_nChw16c: + ret = mkldnn_nchw; + break; + case mkldnn_oihw: + case mkldnn_ihwo: + case mkldnn_hwio: + case mkldnn_OIhw8i8o: + case mkldnn_OIhw16i16o: + case mkldnn_OIhw8o8i: + case mkldnn_OIhw16o16i: + case mkldnn_OIhw8i16o2i: + case mkldnn_OIhw8o16i2o: + case mkldnn_Oihw8o: + case mkldnn_Oihw16o: + case mkldnn_Ohwi8o: + case mkldnn_Ohwi16o: + case mkldnn_OhIw16o4i: + ret = mkldnn_oihw; + break; + default: + ret = origin; + break; + } + + return ret; +} + +inline mkldnn_memory_format_t format_2_as_4(mkldnn_memory_format_t origin) +{ + mkldnn_memory_format_t ret; + // review this relations carefully + switch(origin) { + case mkldnn_nc: + ret = mkldnn_nchw; + break; + case mkldnn_oi: + ret = mkldnn_oihw; + break; + default: + ret = origin; + break; + } + return ret; +} + +class Tensor { +public: + // Allocate memory in constructor + Tensor() : ndims_(0), type_(UNKNOWN_TYPE), size_(0), data_(nullptr) {} + virtual ~Tensor() = default; + + Tensor(int ndims, vector dims, data_type_t type) + : ndims_(ndims), dims_(dims), type_(type) { + size_ = std::accumulate(dims.begin(), dims.begin() + ndims, 1 + , std::multiplies()); + data_ = std::shared_ptr(new avx::byte [len()] + , [] (avx::byte *p) {delete [] p;}); + mm_fmt_ = ndims2format(ndims); + memory::data_type dt = to_mkldnn_type(); + mem_.reset(new mkldnn::memory( + { { { dims_ }, dt, static_cast(mm_fmt_) } + , cpu_engine }, data_.get())); + } + // input_type: 'd': data, 'w': weight + Tensor(int ndims, vector dims, void *data, data_type_t type, char input_type='d') + : ndims_(ndims), dims_(dims), type_(type) { + size_ = std::accumulate(dims.begin(), dims.begin() + ndims, 1 + , std::multiplies()); + data_ = std::shared_ptr(new avx::byte [len()] + , [] (avx::byte *p) {delete [] p;}); + //memcpy(data_.get(), data, len()); + memory::data_type dt = to_mkldnn_type(); + if (dt == memory::data_type::f32 && len() > 0) { //currently, mkldnn only support most f32 currently, may add int8 in future? + auto mm_fmt_i = ndims2format(ndims, input_type); + mm_fmt_ = ndims2format_preferred(ndims, dims, input_type); + auto mem_i = mkldnn::memory( + { { { dims_ }, dt, static_cast(mm_fmt_i) } + , cpu_engine }, data); + + mem_.reset(new mkldnn::memory( + { { { dims_ }, dt, static_cast(mm_fmt_) } + , cpu_engine }, data_.get())); + auto reorder_prim = reorder(mem_i, *mem_); + std::vector prims = {reorder_prim}; + mkldnn::stream s(mkldnn::stream::kind::eager); + s.submit(prims).wait(); + } else { + mm_fmt_ = ndims2format(ndims, input_type); + fast_memcpy((char*)data_.get(), (char*)data, len()); + mem_.reset(new mkldnn::memory( + { { { dims_ }, dt, static_cast(mm_fmt_) } + , cpu_engine }, data_.get())); + } + } + + Tensor(int ndims, vector dims, std::shared_ptr data, data_type_t type) + : ndims_(ndims), dims_(dims), type_(type) { + size_ = std::accumulate(dims.begin(), dims.begin() + ndims, 1 + , std::multiplies()); + data_ = data; + mm_fmt_ = ndims2format(ndims); + memory::data_type dt = to_mkldnn_type(); + mem_.reset(new mkldnn::memory( + { { { dims_ }, dt, static_cast(mm_fmt_) } + , cpu_engine }, data_.get())); + } + + Tensor(int ndims, vector dims, std::shared_ptr data + , mkldnn_memory_format_t mm_fmt, data_type_t type) + : ndims_(ndims), dims_(dims), type_(type) { + size_ = std::accumulate(dims.begin(), dims.begin() + ndims, 1 + , std::multiplies()); + data_ = data; + mm_fmt_ = mm_fmt; + memory::data_type dt = to_mkldnn_type(); + mem_.reset(new mkldnn::memory( + { { { dims_ }, dt, static_cast(mm_fmt_) } + , cpu_engine }, data_.get())); + } + + Tensor(int ndims, vector dims, + mkldnn_memory_format_t mm_fmt, data_type_t type) + : Tensor(ndims, dims, type) { + mm_fmt_ = mm_fmt; + memory::data_type dt = to_mkldnn_type(); + mem_.reset(new mkldnn::memory( + { { { dims_ }, dt, static_cast(mm_fmt_) } + , cpu_engine }, data_.get())); + } + +#if 0 + Tensor(int ndims, vector dims, void *data, + mkldnn_memory_format_t mm_fmt, data_type_t type=FLOAT32) + : Tensor(ndims, dims, data, type) { + mm_fmt_ = mm_fmt; + memory::data_type dt = to_mkldnn_type(); + mem_.reset(new mkldnn::memory( + { { { dims_ }, dt, static_cast(mm_fmt_) } + , cpu_engine }, data_.get())); + } +#endif + + Tensor(mkldnn::memory::dims dims + , mkldnn_data_type_t dt + , mkldnn::memory::format format + , shared_ptr data) + : ndims_(dims.size()), dims_(dims) { + type_ = to_tensor_type(dt); + size_ = std::accumulate(dims.begin(), dims.end(), 1 + , std::multiplies()); + data_ = data; + mm_fmt_ = mkldnn_memory_format_t(format); + mem_.reset(new mkldnn::memory( + { { { dims_ }, static_cast(dt) + , static_cast(mm_fmt_) } + , cpu_engine }, data_.get())); + + } + + Tensor(mkldnn::memory::dims dims + , mkldnn::memory::data_type dt + , mkldnn::memory::format format + , const mkldnn::engine &engine) + : Tensor({{std::move(dims), dt, format}, engine}) {} + + Tensor(mkldnn::memory::primitive_desc pd) { + auto md = pd.desc().data; + ndims_ = md.ndims; + dims_.assign(md.dims, md.dims + md.ndims); + type_ = to_tensor_type(md.data_type); + size_ = std::accumulate(md.dims, md.dims + md.ndims, 1 + , std::multiplies()); + data_ = std::shared_ptr(new avx::byte [len()] + , [] (avx::byte *p) {delete [] p;}); + mm_fmt_ = md.format; + memory::data_type dt = to_mkldnn_type(); + mem_.reset(new mkldnn::memory( + { { { dims_ }, dt , static_cast(mm_fmt_) } + , cpu_engine }, data_.get())); + } + + inline void reset_memory(mkldnn_memory_format_t mkldnn_mfmt, avx::byte *data) { + mm_fmt_ = mkldnn_mfmt; + data_.reset(data); + memory::data_type dt = to_mkldnn_type(); + mem_.reset(new mkldnn::memory( + { { { dims_ }, dt, static_cast(mm_fmt_) } + , cpu_engine }, data_.get())); + } + + inline void reset_memory(mkldnn_memory_format_t mkldnn_mfmt, shared_ptr data) { + mm_fmt_ = mkldnn_mfmt; + data_ = data; + memory::data_type dt = to_mkldnn_type(); + mem_.reset(new mkldnn::memory( + { { { dims_ }, dt, static_cast(mm_fmt_) } + , cpu_engine }, data_.get())); + } + + inline void reset_memory(mkldnn_memory_format_t mkldnn_mfmt, vector dims) { + mm_fmt_ = mkldnn_mfmt; + memory::data_type dt = to_mkldnn_type(); + mem_.reset(new mkldnn::memory( + { { { dims }, dt, static_cast(mm_fmt_) } + , cpu_engine }, data_.get())); + } + + inline size_t len() { + return size_ * type2size(type_); + } + + inline bool incompatible() const { + return (public_format(mm_fmt_) != mm_fmt_); + } + + inline memory::data_type to_mkldnn_type() const { + memory::data_type type; + switch (type_) { + case FLOAT32: + type = memory::data_type::f32; + break; + case SINT32: + type = memory::data_type::s32; + break; + case SINT16: + type = memory::data_type::s16; + break; + case SINT8: + type = memory::data_type::s8; + break; + case UINT8: + type = memory::data_type::u8; + break; + default: + type = memory::data_undef; + break; + } + return type; + } + + inline data_type_t to_tensor_type(mkldnn_data_type_t type) const { + data_type_t dt; + switch (type) { + case mkldnn_f32: + dt = FLOAT32; + break; + case mkldnn_s32: + dt = SINT32; + break; + case mkldnn_s16: + dt = SINT16; + break; + case mkldnn_s8: + dt = SINT8; + break; + case mkldnn_u8: + dt = UINT8; + break; + default: + dt = UNKNOWN_TYPE; + break; + } + return dt; + } + + inline void *data() const { return data_.get(); } + inline std::shared_ptr share_data() const { + return data_; + } + + inline size_type size() const { return size_; } + inline mkldnn::engine get_engine() const { + return cpu_engine; + } + + inline int ndims() const { + return ndims_; + } + + inline vector dims() const { + return dims_; + } + + inline data_type_t type() const { + return type_; + } + + inline mkldnn::memory mkldnn_memory() const { + return *(to_mkldnn_memory()); + } + + inline memory::desc desc() const { + return to_mkldnn_memory()->get_primitive_desc().desc(); + } + + inline mkldnn_memory_format_t format() const { + return mm_fmt_; + } + + inline mkldnn::memory::format cxx_format() const { + return static_cast(mm_fmt_); + } + + inline mkldnn::memory::dims cxx_dims() const { + mkldnn::memory::dims ret(dims_.begin(), dims_.begin() + ndims_); + return ret; + } + + inline mkldnn::memory::data_type cxx_data_type() const { + return static_cast(to_mkldnn_type()); + } + + inline Tensor *reshape(vector dims) { + int ndims = dims.size(); + // Reorder to public format + mkldnn_memory_format_t public_fmt = public_format(mm_fmt_); + if (public_fmt != mm_fmt_) { + //printf("reorder----\n"); + memory::data_type dt = to_mkldnn_type(); + auto data = new avx::byte [len()]; + auto mem = mkldnn::memory( + { { { dims_ }, dt, static_cast(public_fmt) } + , cpu_engine }, data); + + auto reorder_prim = reorder(*mem_, mem); + std::vector prims = { reorder_prim }; + mkldnn::stream s(mkldnn::stream::kind::eager); + s.submit(prims).wait(); + + reset_memory(public_fmt, data); + } + + return new Tensor(ndims, dims, data_, type_); + } + + inline bool copyto(Tensor *src) { + if ((src->type() != type()) || (src->dims() != dims())) { + return false; + } + mm_fmt_ = src->format(); + fast_memcpy((char*)data_.get(), (char*)src->data(), len()); + memory::data_type dt = to_mkldnn_type(); + mem_.reset(new mkldnn::memory( + { { { dims_ }, dt, static_cast(mm_fmt_) } + , cpu_engine }, data_.get())); + return true; + } + + inline void copyto(char *src) { + mm_fmt_ = public_format(mm_fmt_); + fast_memcpy((char*)data_.get(), src, len()); + memory::data_type dt = to_mkldnn_type(); + mem_.reset(new mkldnn::memory( + { { { dims_ }, dt, static_cast(mm_fmt_) } + , cpu_engine }, data_.get())); + return; + } + + Tensor * sum(vector axis); + +protected: + int ndims_; + vector dims_; + data_type_t type_; + size_t size_; + std::shared_ptr data_; + + mkldnn_memory_format_t mm_fmt_; + std::shared_ptr mem_; +private: + inline shared_ptr to_mkldnn_memory() const { + return mem_; + } +}; diff --git a/python/ideep4py/include/primitives/bn.h b/python/ideep4py/include/primitives/bn.h new file mode 100644 index 00000000..e9c3da82 --- /dev/null +++ b/python/ideep4py/include/primitives/bn.h @@ -0,0 +1,55 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _BN_H_ +#define _BN_H_ + +#include +#include +#include +#include "layer.h" +#include "tensor.h" + +template +class batch_normalization : public Layer +{ +public: + batch_normalization() {}; + ~batch_normalization() {}; + + static std::vector Forward(Tensor *src, + Tensor *w, + Tensor *mean, + Tensor *var, + float eps); + + static std::vector Backward(Tensor *src, + Tensor *diff_dst, + Tensor *mean, + Tensor *var, + Tensor *w, + float eps); +}; + +#endif diff --git a/python/ideep4py/include/primitives/concat.h b/python/ideep4py/include/primitives/concat.h new file mode 100644 index 00000000..ea449ca2 --- /dev/null +++ b/python/ideep4py/include/primitives/concat.h @@ -0,0 +1,62 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _CONCAT_H_ +#define _CONCAT_H_ + +#include +#include +#include +#include "layer.h" +#include "op_param.h" +#include "tensor.h" + +template +class Concat : public Layer +{ +public: + Concat(); + ~Concat(); + + /* + * Concat Forward + * params: + * src: input vector + * axis + */ + static Tensor *Forward(std::vector src, int axis); + + /* + * Concat backward + * params: + * diff_dst: input vector + * axis + */ + static std::vector Backward(Tensor *diff_dst, std::vector offsets, int axis); +}; + +#endif // _CONCAT_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/conv.h b/python/ideep4py/include/primitives/conv.h new file mode 100644 index 00000000..7369eefd --- /dev/null +++ b/python/ideep4py/include/primitives/conv.h @@ -0,0 +1,98 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _CONV_H_ +#define _CONV_H_ + +#include +#include +#include +#include "layer.h" +#include "op_param.h" +#include "tensor.h" + +template +class Convolution2D : public Layer +{ +public: + Convolution2D(); + ~Convolution2D(); + + /* + * Convolution Forward + * Y = W*X + b + * params: + * src: input, x + * weight: weights, w + * dst: output, y + * bias: bias, b + * cp: convolution parameters + */ + static Tensor *Forward(Tensor *src, + Tensor *weights, + Tensor *bias, + conv_param_t *cp); + + /* + * Convolution backward weights + * gW = gy*x + * params: + * src: input, x + * diff_dst: diff dst, gy + * cp: convolution parameters + */ + static Tensor *BackwardWeights(Tensor *src, + Tensor *diff_dst, + conv_param_t *cp); + + /* + * Convolution backward weights & bias + * gW = gy*x + * params: + * src: input, x + * diff_dst: diff dst, gy + * cp: convolution parameters + */ + static std::vector BackwardWeightsBias(Tensor *src, + Tensor *diff_dst, + conv_param_t *cp); + + /* + * Convolution backward data + * gx = gy*w + * param: + * weights: weights, w + * diff_dst: diff dst, gy + * cp: convolution parameters + */ + static Tensor *BackwardData(Tensor *weights, + Tensor *diff_dst, + conv_param_t *cp); + +}; + +#endif // _CONV_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/dropout.h b/python/ideep4py/include/primitives/dropout.h new file mode 100644 index 00000000..24af68c6 --- /dev/null +++ b/python/ideep4py/include/primitives/dropout.h @@ -0,0 +1,58 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#pragma once + +#include +#include +#include +#include "layer.h" +#include "tensor.h" + +template +class Dropout : public Layer { +public: + /* + * Dropout Forward + * params: + * x: input + * ratio: input, dropout ratio + * y: output, vector. [0]: mask, [1]: y + * y = mask*x + */ + static std::vector Forward(Tensor* x, float ratio); + + /* + * Dropout backward + * params: + * mask: input, dropout mask generated in the forward + * gy: input + * gx: output + * gx = mask*gy + */ + static Tensor* Backward(Tensor* mask, Tensor* gy); +}; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/eltwise.h b/python/ideep4py/include/primitives/eltwise.h new file mode 100644 index 00000000..71d5fdd6 --- /dev/null +++ b/python/ideep4py/include/primitives/eltwise.h @@ -0,0 +1,80 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#pragma once + +#include +#include +#include +#include "layer.h" +#include "tensor.h" + +typedef enum _eltwise_algorithm { + ELTWISE_RELU = mkldnn::eltwise_relu, + ELTWISE_TANH = mkldnn::eltwise_tanh, + ELTWISE_ELU = mkldnn::eltwise_elu, + ELTWISE_SQUARE = mkldnn::eltwise_square, + ELTWISE_ABS = mkldnn::eltwise_abs, + ELTWISE_SQRT = mkldnn::eltwise_sqrt, + ELTWISE_LINEAR = mkldnn::eltwise_linear, + ELTWISE_BOUNDED_RELU = mkldnn::eltwise_bounded_relu, + ELTWISE_SOFT_RELU = mkldnn::eltwise_soft_relu, + ELTWISE_LOGISTIC = mkldnn::eltwise_logistic, +} eltwise_algorithm_t; + + +static inline mkldnn::algorithm ideepy2mkldnn_eltwise_algorithm(eltwise_algorithm_t alg_kind) { + return (mkldnn::algorithm)alg_kind; +} + +template class Eltwise; +template +class Eltwise : public Layer +{ +public: + Eltwise(); + ~Eltwise(); + + /* + * Eltwise Forward + * params: + * src: input, x + * dst: output, y + * y = max(x, 0) + */ + static Tensor *Forward(Tensor *src, eltwise_algorithm_t alg_kind, T2 alpha, T2 beta); + + /* + * Eltwise backward data + * params: + * src: input, x + * diff_dst: input, gy + * dst: output, gx + * gx = gy*y + */ + static Tensor *Backward(Tensor *src, Tensor *diff_dst, eltwise_algorithm_t alg_kind, T2 alpha, T2 beta); +}; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/layer.h b/python/ideep4py/include/primitives/layer.h new file mode 100644 index 00000000..6ab412dd --- /dev/null +++ b/python/ideep4py/include/primitives/layer.h @@ -0,0 +1,40 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _LAYER_H_ +#define _LAYER_H_ + +#include +#include + +template +class Layer { +public: + virtual ~Layer() {} +}; + +#endif // _LAYER_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/linear.h b/python/ideep4py/include/primitives/linear.h new file mode 100644 index 00000000..f329118f --- /dev/null +++ b/python/ideep4py/include/primitives/linear.h @@ -0,0 +1,74 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _LINEAR_H_ +#define _LINEAR_H_ + +#include +#include +#include +#include "layer.h" +#include "op_param.h" +#include "tensor.h" + +template +class Linear : public Layer +{ +public: + Linear(); + ~Linear(); + /* + *Linear forward + * Y = W*X + b + * params: + * src: input, x + * weights: weights, w + * dst: output, y + * bias: bias, b + */ + static Tensor *Forward( Tensor* src, + Tensor* weights, + Tensor* bias); + /* + * Linear backward weights + * gW = gy*x + * params: + * src: input, x + * diff_dst: diff dst, gy + */ + static std::vector BackwardWeights(Tensor* src, + Tensor* diff_dst, + bool need_bias); + /* + * Linear backward data + * gx = gy*w + * param: + * weights: weights, w + * diff_dst: diff dst, gy + */ + static Tensor *BackwardData(Tensor* weights, + Tensor* diff_dst); +}; +#endif //_LINEAR_H_ + diff --git a/python/ideep4py/include/primitives/lrn.h b/python/ideep4py/include/primitives/lrn.h new file mode 100755 index 00000000..293193b0 --- /dev/null +++ b/python/ideep4py/include/primitives/lrn.h @@ -0,0 +1,69 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _LRN_H_ +#define _LRN_H_ + +#include +#include +#include +#include "layer.h" +#include "op_param.h" +#include "tensor.h" + +template +class LocalResponseNormalization : public Layer +{ +public: + LocalResponseNormalization(); + ~LocalResponseNormalization(); + + /* + * Lrn Forward + * params: + * src: input, x + * pp: lrn parameters + * + * ret + * vector: + * return dst and workspace + */ + static std::vector Forward(Tensor *src, lrn_param_t *pp); + + /* + * Lrn backward + * param: + * src: x + * diff_dst: diff dst, gy + * pp: lrn parameters + * return diff_src gx + */ + static Tensor *Backward(Tensor* src, Tensor *diff_dst, Tensor *ws, lrn_param_t* pp); + +}; + +#endif // _LRN_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/op_param.h b/python/ideep4py/include/primitives/op_param.h new file mode 100644 index 00000000..dff288f4 --- /dev/null +++ b/python/ideep4py/include/primitives/op_param.h @@ -0,0 +1,66 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _OP_PARAM_H_ +#define _OP_PARAM_H_ + +#include + +struct conv_param_t { + std::vector out_dims; + int kh, kw; // kernel size + int dilate_y = 0, dilate_x = 0; // in MKL-DNN, common conv is treated as 0 dilate + int sy, sx; // stride + int pad_lh, pad_lw, pad_rh, pad_rw; //padding +}; + +struct pooling_param_t { + std::vector out_dims; + int kh, kw; // kernel size + int sy, sx; // stride + int pad_lh, pad_lw, pad_rh, pad_rw; //padding + + enum algorithm { + pooling_max, + pooling_avg, + pooling_avg_include_padding, + pooling_avg_exclude_padding, + } algo_kind; +}; + +struct lrn_param_t { + int n; // local size + double k; + double alpha; + double beta; + + enum algorithm { + lrn_across_channels, + lrn_within_channel, + } algo_kind; +}; +#endif // _OP_PARAM_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/ops/bn_bwd.h b/python/ideep4py/include/primitives/ops/bn_bwd.h new file mode 100644 index 00000000..bd355129 --- /dev/null +++ b/python/ideep4py/include/primitives/ops/bn_bwd.h @@ -0,0 +1,95 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _BN_BWD_H_ +#define _BN_BWD_H_ + +#include +#include +#include "op.h" + +template +class batch_normalization_bwd : public Op { +public: + batch_normalization_bwd(mkldnn::memory::dims src_d, + mkldnn::memory::dims diff_dst_d, + float eps, bool scale_shift) : + flags_(0), bn_size_(src_d[1]), + bn_bwd_(nullptr), src_mem_(nullptr), + diff_dst_mem_(nullptr), mean_mem_(nullptr), + var_mem_(nullptr), w_mem_(nullptr), + diff_src_mem_(nullptr), diff_w_mem_(nullptr), + bwd_stream_(new mkldnn::stream(mkldnn::stream::kind::eager)) { + setup(src_d, diff_dst_d, eps, scale_shift); + } + + ~batch_normalization_bwd() {} + + void setup(mkldnn::memory::dims src_d, + mkldnn::memory::dims diff_dst_d, + float eps, bool scale_shift); + + void execute(void *src, void *diff_dst, void *mean, + void *var, void *w, void *diff_src, void *diff_w); + +public: + mkldnn_memory_format_t get_src_fmt() { + return (*src_mem_).get_primitive_desc().desc().data.format; + } + + mkldnn_memory_format_t get_diff_dst_fmt() { + return (*diff_dst_mem_).get_primitive_desc().desc().data.format; + } + + mkldnn_memory_format_t get_diff_src_fmt() { + return (*diff_src_mem_).get_primitive_desc().desc().data.format; + } + + mkldnn_memory_format_t get_diff_w_fmt() { + return (*diff_w_mem_).get_primitive_desc().desc().data.format; + } + +private: + unsigned long flags_; + int bn_size_; + + std::shared_ptr bn_bwd_; + + std::shared_ptr src_mem_; + std::shared_ptr diff_dst_mem_; + std::shared_ptr mean_mem_; + std::shared_ptr var_mem_; + std::shared_ptr w_mem_; + std::shared_ptr diff_src_mem_; + std::shared_ptr diff_w_mem_; + + std::vector bwd_primitives_; + std::shared_ptr bwd_stream_; + + mkldnn::memory::desc get_desc_data(mkldnn::memory m) { + return m.get_primitive_desc().desc().data; + } +}; + +#endif // _BN_BWD_H_ diff --git a/python/ideep4py/include/primitives/ops/bn_fwd.h b/python/ideep4py/include/primitives/ops/bn_fwd.h new file mode 100644 index 00000000..4fcb0ee5 --- /dev/null +++ b/python/ideep4py/include/primitives/ops/bn_fwd.h @@ -0,0 +1,113 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _BN_FWD_H_ +#define _BN_FWD_H_ + +#include +#include +#include "op.h" + +template +class batch_normalization_fwd : public Op { +public: + batch_normalization_fwd(mkldnn::memory::dims src_d, + float eps, + bool scale_shift, + bool global_stats, + bool training) : + flags_(0), pkind_(mkldnn::forward_training), + bn_size_(src_d[1]), bn_fwd_(nullptr), src_mem_(nullptr), + w_mem_(nullptr), dst_mem_(nullptr), + mean_mem_(nullptr), var_mem_(nullptr), + fwd_stream_(new mkldnn::stream(mkldnn::stream::kind::eager)) { + setup(src_d, eps, scale_shift, global_stats, training); + } + + ~batch_normalization_fwd() {} + + void setup(mkldnn::memory::dims src_d, float eps, + bool scale_shift, bool global_stats, bool training); + + void execute(void *src, void *w, void *dst, void *mean, void *var); + +public: + mkldnn_memory_format_t get_src_fmt() { + return (*src_mem_).get_primitive_desc().desc().data.format; + } + + mkldnn_memory_format_t get_dst_fmt() { + return (*dst_mem_).get_primitive_desc().desc().data.format; + } + + mkldnn_memory_format_t get_mean_fmt() { + return (*mean_mem_).get_primitive_desc().desc().data.format; + } + + int get_mean_ndims() { + return static_cast((*mean_mem_).get_primitive_desc().desc().data.ndims); + } + + mkldnn::memory::dims get_mean_dims() { + std::vector dims; + dims.push_back(bn_size_); + return dims; + } + + mkldnn_memory_format_t get_var_fmt() { + return (*var_mem_).get_primitive_desc().desc().data.format; + } + + int get_var_ndims() { + return static_cast((*var_mem_).get_primitive_desc().desc().data.ndims); + } + + mkldnn::memory::dims get_var_dims() { + std::vector dims; + dims.push_back(bn_size_); + return dims; + } + +private: + unsigned long flags_; + mkldnn::prop_kind pkind_; + int bn_size_; + + std::shared_ptr bn_fwd_; + + std::shared_ptr src_mem_; + std::shared_ptr w_mem_; + std::shared_ptr dst_mem_; + std::shared_ptr mean_mem_; + std::shared_ptr var_mem_; + + std::vector fwd_primitives_; + std::shared_ptr fwd_stream_; + + mkldnn::memory::desc get_desc_data(mkldnn::memory m) { + return m.get_primitive_desc().desc().data; + } +}; + +#endif // _BN_FWD_H_ diff --git a/python/ideep4py/include/primitives/ops/concat_bwd.h b/python/ideep4py/include/primitives/ops/concat_bwd.h new file mode 100644 index 00000000..e7ae06b8 --- /dev/null +++ b/python/ideep4py/include/primitives/ops/concat_bwd.h @@ -0,0 +1,86 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _CONCAT_BWD_H_ +#define _CONCAT_BWD_H_ + +#include +#include +#include +#include "op.h" + +template +class ConcatBwd : public Op +{ +public: + ConcatBwd(std::vector diff_src_ds, + mkldnn::memory::dims diff_dst_d, + int axis); + ~ConcatBwd(); + + /* + * Concat backward primitive setup + * Params: + * src_ds: inputs + * dst_d: output, (n, out_c, out_h, out_w) + * axis: axis to concat + */ + void setup(std::vector diff_src_ds, + mkldnn::memory::dims diff_dst_d, + int axis); + + /* + * Concat forward execute with bias + */ + void execute(std::vector diff_srcs, void *diff_dst); + +public: + // expected memory format for this primitive instance + // forward + std::vector diff_src_fmts_; + mkldnn::memory::format diff_dst_fmt_; + +private: + int axis_; + + //MKLDNN memory + //memory desc + std::vector diff_src_mems_; // gxs + + std::shared_ptr diff_dst_md_; // gy + std::shared_ptr diff_dst_mpd_; // gy + std::shared_ptr diff_dst_mem_; // gy + + //desc & prmitive desc + std::shared_ptr reorder_pd_; + std::shared_ptr reorder_prim_; + + std::shared_ptr bwd_stream_; + std::vector bwd_primitives_; //bwd primitive vector +}; + +#endif // _CONCAT_BWD_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/ops/concat_fwd.h b/python/ideep4py/include/primitives/ops/concat_fwd.h new file mode 100644 index 00000000..e8484337 --- /dev/null +++ b/python/ideep4py/include/primitives/ops/concat_fwd.h @@ -0,0 +1,86 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _CONCAT_FWD_H_ +#define _CONCAT_FWD_H_ + +#include +#include +#include +#include "op.h" + +template +class ConcatFwd : public Op +{ +public: + ConcatFwd(std::vector src_ds, + mkldnn::memory::dims dst_d, int axis); + ~ConcatFwd(); + + /* + * Concat forward primitive setup + * Params: + * src_ds: inputs + * dst_d: output, (n, out_c, out_h, out_w) + * axis: axis to concat + */ + void setup(std::vector src_d, + mkldnn::memory::dims dst_d, + int axis); + + /* + * Concat forward execute with bias + */ + void execute(std::vector src, void *dst); + +public: + // expected memory format for this primitive instance + // forward + std::vector src_fmts_; + mkldnn::memory::format dst_fmt_; + +private: + int axis_; + + //MKLDNN memory + //forward + //memory desc + std::vector src_mpds_; //xs + std::vector src_mems_; + std::vector src_prim_at_; // xs + + std::shared_ptr dst_md_; // y + std::shared_ptr dst_mem_; //y + + std::shared_ptr fwd_stream_; + + //desc & prmitive desc + std::shared_ptr concat_pd_; + std::shared_ptr concat_fwd_; +}; + +#endif // _CONCAT_FWD_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/ops/conv_bwd_data.h b/python/ideep4py/include/primitives/ops/conv_bwd_data.h new file mode 100644 index 00000000..765d99f6 --- /dev/null +++ b/python/ideep4py/include/primitives/ops/conv_bwd_data.h @@ -0,0 +1,112 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _CONV_BWD_DATA_H_ +#define _CONV_BWD_DATA_H_ + +#include +#include +#include +#include "op.h" + +template +class Convolution2DBwdData : public Op +{ +public: + Convolution2DBwdData(mkldnn::memory::dims diff_src_d, + mkldnn::memory::dims w_d, + mkldnn::memory::dims diff_dst_d, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw); + ~Convolution2DBwdData(); + + /* + * Convolution backward data primitive setup + * Params: + * diff_src_d: input, (n,c,h,w) + * w_d: diff weight, (out_c, in_c, h, w) + * diff_dst_d: output, (n, out_c, out_h, out_w) + */ + void setup(mkldnn::memory::dims diff_src_d, + mkldnn::memory::dims w_d, + mkldnn::memory::dims diff_dst_d, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, + int pad_rh, int pad_rw); + + /* + * Convolution backward weights without bias + */ + void execute(void* diff_src, void* w, void* diff_dst); + +public: + // expected memory format for this primitive instance + // forward + mkldnn::memory::format diff_src_fmt_; + mkldnn::memory::format weights_fmt_; + mkldnn::memory::format diff_dst_fmt_; + + // convolution primitive + std::shared_ptr conv_bwd_data_; + +private: + //MKLDNN memory + //backward weights + std::shared_ptr diff_src_mem_; // gx + std::shared_ptr weights_mem_;// W + std::shared_ptr diff_dst_mem_; //gy + + // + std::shared_ptr bwd_data_stream_; + std::vector bwd_data_primitives_; + + //desc & prmitive desc + //backward weights + std::shared_ptr bwd_data_desc_; + std::shared_ptr bwd_data_pd_; + + // FIXME + // forward hint, will be remove in future + std::shared_ptr fwd_desc_; + std::shared_ptr fwd_pd_; + + //memory dims + mkldnn::memory::dims dilates_; + mkldnn::memory::dims strides_; + mkldnn::memory::dims padding_l_; + mkldnn::memory::dims padding_r_; + + //memory desc + //forward & backward can share same mem desc + std::shared_ptr diff_src_md_; //gx + std::shared_ptr weights_md_;// W + std::shared_ptr diff_dst_md_; // gy +}; + +#endif // _CONV_BWD_DATA_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/ops/conv_bwd_weights.h b/python/ideep4py/include/primitives/ops/conv_bwd_weights.h new file mode 100644 index 00000000..2f16f276 --- /dev/null +++ b/python/ideep4py/include/primitives/ops/conv_bwd_weights.h @@ -0,0 +1,118 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _CONV_BWD_WEIGHTS_H_ +#define _CONV_BWD_WEIGHTS_H_ + +#include +#include +#include +#include "op.h" + +template +class Convolution2DBwdWeights : public Op +{ +public: + Convolution2DBwdWeights(mkldnn::memory::dims src_d, mkldnn::memory::dims diff_w_d, + mkldnn::memory::dims diff_b_d, mkldnn::memory::dims diff_dst_d, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw); + ~Convolution2DBwdWeights(); + + /* + * Convolution backward weight primitive setup + * Params: + * src_d: input, (n,c,h,w) + * diff_w_d: diff weight, (out_c, in_c, h, w) + * diff_b_d: diff_bias + * diff_dst_d: output, (n, out_c, out_h, out_w) + */ + void setup(mkldnn::memory::dims src_d, mkldnn::memory::dims diff_w_d, + mkldnn::memory::dims diff_b_d, mkldnn::memory::dims diff_dst_d, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, + int pad_rh, int pad_rw); + + /* + * Convolution backward weights with bias + */ + void execute(void* src, void* diff_w, void* diff_b, void* diff_dst); + + /* + * Convolution backward weights without bias + */ + void execute(void* src, void* diff_w, void* diff_dst); + +public: + // expected memory format for this primitive instance + // forward + mkldnn::memory::format src_fmt_; + mkldnn::memory::format diff_weights_fmt_; + mkldnn::memory::format diff_dst_fmt_; + + // convolution primitive + std::shared_ptr conv_bwd_weights_; + +private: + //MKLDNN memory + //backward weights + std::shared_ptr src_mem_; // x + std::shared_ptr diff_weights_mem_;// gW + std::shared_ptr diff_bias_mem_;// gb + std::shared_ptr diff_dst_mem_; //gy + + // + std::shared_ptr bwd_weights_stream_; + std::vector bwd_weights_primitives_; + + //desc & prmitive desc + //backward weights + std::shared_ptr bwd_weights_desc_; + std::shared_ptr bwd_weights_pd_; + + // FIXME + // forward hint, will be remove in future + std::shared_ptr fwd_desc_; + std::shared_ptr fwd_pd_; + + //memory dims + mkldnn::memory::dims dilates_; + mkldnn::memory::dims strides_; + mkldnn::memory::dims padding_l_; + mkldnn::memory::dims padding_r_; + + //memory desc + //forward & backward can share same mem desc + std::shared_ptr src_md_; //x + std::shared_ptr diff_weights_md_;// gW + std::shared_ptr diff_bias_md_; // gb + std::shared_ptr diff_dst_md_; // gy +}; + +#endif // _CONV_BWD_WEIGHTS_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/ops/conv_fwd.h b/python/ideep4py/include/primitives/ops/conv_fwd.h new file mode 100644 index 00000000..2f2bf216 --- /dev/null +++ b/python/ideep4py/include/primitives/ops/conv_fwd.h @@ -0,0 +1,111 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _CONV_FWD_H_ +#define _CONV_FWD_H_ + +#include +#include +#include +#include "op.h" + +template +class Convolution2DFwd : public Op +{ +public: + Convolution2DFwd(mkldnn::memory::dims src_d, mkldnn::memory::dims w_d, + mkldnn::memory::dims b_d, mkldnn::memory::dims dst_d, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw); + ~Convolution2DFwd(); + + /* + * Convolution forward primitive setup + * Params: + * src_d: input, (n,c,h,w) + * W_d: weight, (out_c, in_c, h, w) + * b_d: bias, if no bias, expected b_d as None dims ({}), not NULL + * dst_d: output, (n, out_c, out_h, out_w) + */ + void setup(mkldnn::memory::dims src_d, mkldnn::memory::dims w_d, + mkldnn::memory::dims b_d, mkldnn::memory::dims dst_d, + int dilate_y, int dilate_x, + int s1, int s2, + int pl1, int pl2, + int pr1, int pr2); + + /* + * Convolution forward execute with bias + */ + void execute(void* src, void* w, void* b, void* dst); + + /* + * Convolution forward execute without bias + */ + void execute(void* src, void* w, void* dst); + +public: + // expected memory format for this primitive instance + // forward + mkldnn::memory::format src_fmt_; + mkldnn::memory::format weights_fmt_; + mkldnn::memory::format dst_fmt_; + + // convolution primitive + std::shared_ptr conv_fwd_; + +private: + //MKLDNN memory + //forward + std::shared_ptr src_mem_; // x + std::shared_ptr weights_mem_;// W + std::shared_ptr bias_mem_;// b + std::shared_ptr dst_mem_; //y + + std::shared_ptr fwd_stream_; + std::vector fwd_primitives_; + + //desc & prmitive desc + //forward + std::shared_ptr fwd_desc_; + std::shared_ptr fwd_pd_; + + //memory dims + mkldnn::memory::dims dilates_; + mkldnn::memory::dims strides_; + mkldnn::memory::dims padding_l_; + mkldnn::memory::dims padding_r_; + + //memory desc + std::shared_ptr src_md_; //x + std::shared_ptr weights_md_;// W + std::shared_ptr bias_md_; // b + std::shared_ptr dst_md_; // y +}; + +#endif // _CONV_FWD_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/ops/eltwise_bwd.h b/python/ideep4py/include/primitives/ops/eltwise_bwd.h new file mode 100644 index 00000000..0da8989d --- /dev/null +++ b/python/ideep4py/include/primitives/ops/eltwise_bwd.h @@ -0,0 +1,89 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#pragma once + +#include +#include +#include "op.h" + +template class EltwiseBwd; +template +class EltwiseBwd : public Op +{ +public: + EltwiseBwd(mkldnn::memory::dims src_d, mkldnn::algorithm alg_kind, mkldnn::memory::format dst_diff_fmt, T2 alpha, T2 beta); + ~EltwiseBwd(); + + /* + * Eltwise backward primitive setup + * Params: + * src_d: input, (n,c,h,w) + * dst_d: output, (n, out_c, out_h, out_w) + */ + void setup(mkldnn::memory::dims src_d, mkldnn::algorithm alg_kind, mkldnn::memory::format dst_diff_fmt, T2 alpha, T2 beta); + + /* + * Eltwise backward execute + */ + void execute(void* src, void* dst_diff, void *src_diff); + +public: + // expected memory format for this primitive instance + // backward + mkldnn::memory::format src_diff_fmt_; + + // Eltwise primitive + std::shared_ptr eltwise_bwd_; + +private: + //MKLDNN memory + //backward + std::shared_ptr src_mem_; // x + std::shared_ptr dst_diff_mem_; //gy + std::shared_ptr src_diff_mem_; //gx + + std::shared_ptr bwd_stream_; + std::vector bwd_primitives_; + + //desc & prmitive desc + //backward + std::shared_ptr bwd_desc_; + std::shared_ptr bwd_pd_; + + //memory desc + std::shared_ptr src_md_; //x + std::shared_ptr dst_diff_md_; // gy + + //memory primitive desc + std::shared_ptr src_mpd_; //x + std::shared_ptr dst_diff_mpd_; //gy + + // fwd primitive desc + std::shared_ptr fwd_desc_; + std::shared_ptr fwd_pd_; +}; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/ops/eltwise_fwd.h b/python/ideep4py/include/primitives/ops/eltwise_fwd.h new file mode 100644 index 00000000..12d75d6d --- /dev/null +++ b/python/ideep4py/include/primitives/ops/eltwise_fwd.h @@ -0,0 +1,84 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#pragma once + +#include +#include +#include "op.h" + +template class EltwiseFwd; +template +class EltwiseFwd : public Op +{ +public: + EltwiseFwd(mkldnn::memory::dims src_d, mkldnn::algorithm alg_kind, mkldnn::memory::format src_fmt, T2 alpha, T2 beta); + ~EltwiseFwd(); + + /* + * Eltwise forward primitive setup + * Params: + * src_d: input, (n,c,h,w) + * dst_d: output, (n, out_c, out_h, out_w) + */ + void setup(mkldnn::memory::dims src_d, mkldnn::algorithm alg_kind, mkldnn::memory::format src_fmt, T2 alpha, T2 beta); + + /* + * Eltwise forward execute + */ + void execute(void* src, void* dst); + +public: + // expected memory format for this primitive instance + // forward + mkldnn::memory::format src_fmt_; + mkldnn::memory::format dst_fmt_; + + // Eltwise primitive + std::shared_ptr eltwise_fwd_; + +private: + //MKLDNN memory + //forward + std::shared_ptr src_mem_; // x + std::shared_ptr dst_mem_; //y + + std::shared_ptr fwd_stream_; + std::vector fwd_primitives_; + + //desc & prmitive desc + //forward + std::shared_ptr fwd_desc_; + std::shared_ptr fwd_pd_; + + //memory desc + std::shared_ptr src_md_; //x + std::shared_ptr dst_md_; // y + + //memory primitive desc + std::shared_ptr src_mpd_; //x +}; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/ops/linear_bwd_data.h b/python/ideep4py/include/primitives/ops/linear_bwd_data.h new file mode 100644 index 00000000..f4cf7d82 --- /dev/null +++ b/python/ideep4py/include/primitives/ops/linear_bwd_data.h @@ -0,0 +1,125 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _LINEAR_BWD_DATA_H_ +#define _LINEAR_BWD_DATA_H_ + +#include +#include +#include +#include "op.h" + +template +class LinearBwdData : public Op +{ +public: + LinearBwdData(mkldnn::memory::dims diff_src_d, + mkldnn::memory::dims w_d, + mkldnn::memory::dims diff_dst_d); + ~LinearBwdData(); + /* + * Linear backward data primitive setup + * Params: + * diff_src_d: input, (n,c,h,w) + * w_d: diff weight, (out_c, in_c, h, w) + * diff_dst_d: output, (n, out_c, out_h, out_w) + */ + void setup(mkldnn::memory::dims diff_src_d, + mkldnn::memory::dims w_d, + mkldnn::memory::dims diff_dst_d); + /* + * Linear backward weights without bias + */ + void execute(void* diff_src, void* w, void* diff_dst); +public: + // expected memory format for this primitive instance + // forward + mkldnn::memory::format diff_src_fmt_; + mkldnn::memory::format weights_fmt_; + mkldnn::memory::format diff_dst_fmt_; + + //linear primitive + std::shared_ptr linear_bwd_data_; +private: + //MKLDNN memory + //backward weights + std::shared_ptr diff_src_mem_; // gx + std::shared_ptr weights_mem_;//w + std::shared_ptr diff_dst_mem_; //gy + + // + std::shared_ptr bwd_data_stream_; + std::vector bwd_data_primitives_; + + //desc & primitive desc + //backward weights + std::shared_ptr bwd_data_desc_; + std::shared_ptr bwd_data_pd_; + + //FIXME + //forward hint, will be removed in the future; + std::shared_ptr fwd_desc_; + std::shared_ptr fwd_pd_; + + //memory desc + //forward & backward can share the same mem desc + std::shared_ptr diff_src_md_; //gx + std::shared_ptr weights_md_; // W + std::shared_ptr diff_dst_md_; //gy +}; +#endif //_LINEAR_BWD_DATA_H + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/python/ideep4py/include/primitives/ops/linear_bwd_weights.h b/python/ideep4py/include/primitives/ops/linear_bwd_weights.h new file mode 100644 index 00000000..62600bd4 --- /dev/null +++ b/python/ideep4py/include/primitives/ops/linear_bwd_weights.h @@ -0,0 +1,116 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _LINEAR_BWD_WEIGHTS_H_ +#define _LINEAR_BWD_WEIGHTS_H_ + +#include +#include +#include +#include "op.h" + +template +class LinearBwdWeights : public Op +{ +public: + LinearBwdWeights(mkldnn::memory::dims src_d, mkldnn::memory::dims diff_w_d, + mkldnn::memory::dims diff_b_d, mkldnn::memory::dims diff_dst_d); + ~LinearBwdWeights(); + /* + * Linear backward weights primitive setup + * Params: + * src_d: input, (n,c,h,w) + * diff_w_d: diff weight, (out_c, in_c, h, w) + * diff_b_d: diff_bias + * diff_dst_d: output, (n, out_c, out_h, out_w) + */ + void setup(mkldnn::memory::dims src_d, mkldnn::memory::dims diff_w_d, + mkldnn::memory::dims diff_b_d, mkldnn::memory::dims diff_dst_d); + /* + * Linear backward wieghts with bias + */ + void execute(void* src, void* diff_w, void* diff_b, void* diff_dst); + /* + * Linear backward weights without bias + */ + void execute(void* src, void* diff_w, void* diff_dst); +public: + //expected memory format for this primitive instance + // forward + mkldnn::memory::format src_fmt_; + mkldnn::memory::format diff_weights_fmt_; + mkldnn::memory::format diff_dst_fmt_; + //linear primitive + std::shared_ptr linear_bwd_weights_; +private: + //MKLDNN memory + //backward weights + std::shared_ptr src_mem_;//x + std::shared_ptr diff_weights_mem_; // gw + std::shared_ptr diff_bias_mem_; //gb + std::shared_ptr diff_dst_mem_; // gy + // + std::shared_ptr bwd_weights_stream_; + std::vector bwd_weights_primitives_; + //desc & primitive desc + //backward weights + std::shared_ptr bwd_weights_desc_; + std::shared_ptr bwd_weights_pd_; + //FIXME + //forward hint, will be removed in future + std::shared_ptr fwd_desc_; + std::shared_ptr fwd_pd_; + + //memory desc + //forward & backward can share the same mem desc + std::shared_ptr src_md_; //x + std::shared_ptr diff_weights_md_;//gW + std::shared_ptr diff_bias_md_;//gb + std::shared_ptr diff_dst_md_;//gy +}; + +#endif //_CONV_BWD_WEIGHTS_H + + + + + + + + + + + + + + + + + + + + + + + diff --git a/python/ideep4py/include/primitives/ops/linear_fwd.h b/python/ideep4py/include/primitives/ops/linear_fwd.h new file mode 100644 index 00000000..ae22eef7 --- /dev/null +++ b/python/ideep4py/include/primitives/ops/linear_fwd.h @@ -0,0 +1,116 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _LINEAR_FWD_H_ +#define _LINEAR_FWD_H_ + +#include +#include +#include +#include "op.h" + +template +class LinearFwd : public Op +{ +public: + LinearFwd(mkldnn::memory::dims src_d, mkldnn::memory::dims w_d, + mkldnn::memory::dims b_d, mkldnn::memory::dims dst_d); + ~LinearFwd(); + /*Linear forward primitive setup + * Params: + * src_d: input, (n, c, h, w) + * W_d: weight, (out_c, in_c, h, w) + * b_d: bias, if no bias, expected b_d as None dims({}), not NULL + * dst_d: output, (n, out_c, out_h, out_w) + */ + void setup(mkldnn::memory::dims src_d, mkldnn::memory::dims w_d, + mkldnn::memory::dims b_d, mkldnn::memory::dims dst_d); + /* + * Linear forward execute with bias + */ + void execute(void *src, void* w, void* b, void* dst); + /* + * Linear forward execute without bias + */ + void execute(void *src, void* w, void* dst); +public: + //expected memory format for this primitive instance + //forward + mkldnn::memory::format src_fmt_; + mkldnn::memory::format weights_fmt_; + mkldnn::memory::format dst_fmt_; + //linear primitive + std::shared_ptr linear_fwd_; +private: + //MKLDNN memory + //forward + std::shared_ptr src_mem_;// x + std::shared_ptr weights_mem_;// W + std::shared_ptr bias_mem_;// b + std::shared_ptr dst_mem_; // y + + std::shared_ptr fwd_stream_; + std::vector fwd_primitives_; + + //desc & primitive desc + //forward + std::shared_ptr fwd_desc_; + std::shared_ptr fwd_pd_; + //memory desc + std::shared_ptr src_md_;//x + std::shared_ptr weights_md_;//W + std::shared_ptr bias_md_;//b + std::shared_ptr dst_md_;// y +}; +#endif //__LINEAR_FWD_H_ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/python/ideep4py/include/primitives/ops/lrn_bwd.h b/python/ideep4py/include/primitives/ops/lrn_bwd.h new file mode 100755 index 00000000..59e024af --- /dev/null +++ b/python/ideep4py/include/primitives/ops/lrn_bwd.h @@ -0,0 +1,106 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#pragma once +#ifndef _LRN_BWD_H_ +#define _LRN_BWD_H_ + +#include +#include +#include +#include +#include "op.h" + +template +class LocalResponseNormalizationBwd: public Op{ +public: + LocalResponseNormalizationBwd(mkldnn::memory::dims src_d, + mkldnn::memory::dims diff_dst_d, + mkldnn::memory::dims ws_d, + mkldnn::memory::data_type ws_dt, + int n, double k, double alpha, double beta, + mkldnn::algorithm alg_kind); // alg_kind = mkldnn::algorithm::lrn_across_channels + + ~LocalResponseNormalizationBwd(); + + /* + * lrn backward primitive setup + * Params: + * src_d: src + * diff_dst_d: diff dst + */ + void setup(mkldnn::memory::dims src_d, + mkldnn::memory::dims diff_dst_d, + mkldnn::memory::dims ws_d, + mkldnn::memory::data_type ws_dt, + int n, double k, double alpha, double beta, + mkldnn::algorithm alg_kind); // alg_kind = mkldnn::algorithm::lrn_across_channels + + /* + * lrn backward execute + * params: + * src: + * diff_src: diff_src + * diff_dst: diff_dst + * ws: workspace + */ + void execute(void *src, void *diff_src, void *diff_dst, void *ws=NULL); + +public: + // expected memory format + mkldnn::memory::format src_fmt_; + mkldnn::memory::format diff_src_fmt_; + mkldnn::memory::format diff_dst_fmt_; + mkldnn::memory::format ws_fmt_; + + // algo + mkldnn::algorithm alg_kind_; +private: + // lrn primitive + std::shared_ptr bwd_; + std::shared_ptr bwd_stream_; + + // MKL-DNN memory, just dummy data + std::shared_ptr src_mem_; + std::shared_ptr ws_mem_; + std::shared_ptr diff_src_mem_; + std::shared_ptr diff_dst_mem_; + std::shared_ptr src_md_; + std::shared_ptr diff_src_md_; + std::shared_ptr diff_dst_md_; + + // fwd hint + std::shared_ptr fwd_desc_; + std::shared_ptr fwd_pd_; + + std::shared_ptr bwd_desc_; + std::shared_ptr bwd_pd_; + + std::vector bwd_primitives_; +}; + +#endif // _LRN_BWD_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/ops/lrn_fwd.h b/python/ideep4py/include/primitives/ops/lrn_fwd.h new file mode 100755 index 00000000..ec45443f --- /dev/null +++ b/python/ideep4py/include/primitives/ops/lrn_fwd.h @@ -0,0 +1,97 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#pragma once +#ifndef _LRN_FWD_H_ +#define _LRN_FWD_H_ + +#include +#include +#include +#include +#include "op.h" + +template +class LocalResponseNormalizationFwd: public Op{ +public: + LocalResponseNormalizationFwd(mkldnn::memory::dims src_d, mkldnn::memory::format src_fmt, + int n, double k, double alpha, double beta, + mkldnn::algorithm alg_kind); // alg_kind = mkldnn::algorithm::lrn_across_channels + + ~LocalResponseNormalizationFwd(); + + /* + * lrn forward primitive setup + * Params: + * src_d: input + * dst_d: out_put + */ + void setup(mkldnn::memory::dims src_d, mkldnn::memory::format src_fmt, + int n, double k, double alpha, double beta, + mkldnn::algorithm alg_kind); // alg_kind = mkldnn::algorithm::lrn_across_channels + + /* + * lrn forward execute + * params: + * src: input + * dst: output + * ws: workspace + */ + void execute(void *src, void *dst, void *ws=NULL); + +public: + // expected memory format + mkldnn::memory::format src_fmt_; + mkldnn::memory::format dst_fmt_; + mkldnn::memory::format ws_fmt_; + //workspace size + mkldnn::memory::dims ws_dims_; + mkldnn::memory::data_type ws_dt_; + size_t ws_size_; + + // algo + mkldnn::algorithm alg_kind_; + // int local_size_; +private: + // lrn primitive + std::shared_ptr fwd_; + std::shared_ptr fwd_stream_; + + // MKL-DNN memory, just dummy data + std::shared_ptr ws_mem_; + std::shared_ptr src_mem_; + std::shared_ptr dst_mem_; + std::shared_ptr src_md_; + std::shared_ptr dst_md_; + + std::shared_ptr fwd_desc_; + std::shared_ptr fwd_pd_; + + std::vector fwd_primitives_; +}; + +#endif // _LRN_FWD_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/ops/op.h b/python/ideep4py/include/primitives/ops/op.h new file mode 100644 index 00000000..73020301 --- /dev/null +++ b/python/ideep4py/include/primitives/ops/op.h @@ -0,0 +1,42 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _OP_H_ +#define _OP_H_ + +#include +#include + +template +class Op { +public: + virtual ~Op() {} + virtual void execute(){ return; }; + virtual void setup(){ return; }; +}; + +#endif // _OP_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/ops/pooling_bwd.h b/python/ideep4py/include/primitives/ops/pooling_bwd.h new file mode 100644 index 00000000..101c1eec --- /dev/null +++ b/python/ideep4py/include/primitives/ops/pooling_bwd.h @@ -0,0 +1,107 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#pragma once +#ifndef _POOLING_BWD_H_ +#define _POOLING_BWD_H_ + +#include +#include +#include +#include +#include "op.h" + +template +class Pooling2DBwd: public Op{ +public: + Pooling2DBwd(mkldnn::memory::dims diff_src_d, + mkldnn::memory::dims diff_dst_d, + mkldnn::memory::dims ws_d, + mkldnn::memory::data_type ws_dt, + int ker_h, int ker_w, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw, + mkldnn::algorithm alg_kind); // alg_kind = pooling_max + // or pooling_avg + ~Pooling2DBwd(); + + /* + * Pooling backward primitive setup + * Params: + * diff_src_d: diff src + * diff_dst_d: diff dst + */ + void setup(mkldnn::memory::dims diff_src_d, + mkldnn::memory::dims diff_dst_d, + mkldnn::memory::dims ws_d, + mkldnn::memory::data_type ws_dt, + int ker_h, int ker_w, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw, + mkldnn::algorithm alg_kind); // alg_kind = pooling_max + // or pooling_avg + + /* + * Pooling backward execute + * params: + * diff_src: diff_src + * diff_dst: diff_dst + * ws: workspace + */ + void execute(void *diff_src, void *diff_dst, void *ws=NULL); + +public: + // expected memory format + mkldnn::memory::format diff_src_fmt_; + mkldnn::memory::format diff_dst_fmt_; + mkldnn::memory::format ws_fmt_; + + // algo + mkldnn::algorithm alg_kind_; +private: + // pooling primitive + std::shared_ptr bwd_; + std::shared_ptr bwd_stream_; + + // MKL-DNN memory, just dummy data + std::shared_ptr ws_mem_; + std::shared_ptr diff_src_mem_; + std::shared_ptr diff_dst_mem_; + std::shared_ptr diff_src_md_; + std::shared_ptr diff_dst_md_; + + // fwd hint + std::shared_ptr fwd_desc_; + std::shared_ptr fwd_pd_; + + std::shared_ptr bwd_desc_; + std::shared_ptr bwd_pd_; + + std::vector bwd_primitives_; +}; + +#endif // _POOLING_BWD_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/ops/pooling_fwd.h b/python/ideep4py/include/primitives/ops/pooling_fwd.h new file mode 100644 index 00000000..9c716373 --- /dev/null +++ b/python/ideep4py/include/primitives/ops/pooling_fwd.h @@ -0,0 +1,101 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#pragma once +#ifndef _POOLING_FWD_H_ +#define _POOLING_FWD_H_ + +#include +#include +#include +#include +#include "op.h" + +template +class Pooling2DFwd: public Op{ +public: + Pooling2DFwd(mkldnn::memory::dims src_d, mkldnn::memory::dims dst_d, + int ker_h, int ker_w, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw, + mkldnn::algorithm alg_kind); // alg_kind = pooling_max + // or pooling_avg + ~Pooling2DFwd(); + + /* + * Pooling forward primitive setup + * Params: + * src_d: input + * dst_d: out_put + */ + void setup(mkldnn::memory::dims src_d, mkldnn::memory::dims dst_d, + int ker_h, int ker_w, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw, + mkldnn::algorithm alg_kind); // alg_kind = pooling_max + // or pooling_avg + + /* + * Pooling forward execute + * params: + * src: input + * dst: output + * ws: workspace + */ + void execute(void *src, void *dst, void *ws=NULL); + +public: + // expected memory format + mkldnn::memory::format src_fmt_; + mkldnn::memory::format dst_fmt_; + mkldnn::memory::format ws_fmt_; + //workspace size + mkldnn::memory::dims ws_dims_; + mkldnn::memory::data_type ws_dt_; + size_t ws_size_; + + // algo + mkldnn::algorithm alg_kind_; +private: + // pooling primitive + std::shared_ptr fwd_; + std::shared_ptr fwd_stream_; + + // MKL-DNN memory, just dummy data + std::shared_ptr ws_mem_; + std::shared_ptr src_mem_; + std::shared_ptr dst_mem_; + std::shared_ptr src_md_; + std::shared_ptr dst_md_; + + std::shared_ptr fwd_desc_; + std::shared_ptr fwd_pd_; + + std::vector fwd_primitives_; +}; + +#endif // _POOLING_FWD_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/ops/reorder_op.h b/python/ideep4py/include/primitives/ops/reorder_op.h new file mode 100644 index 00000000..28a548bc --- /dev/null +++ b/python/ideep4py/include/primitives/ops/reorder_op.h @@ -0,0 +1,78 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _REORDER_OP_H_ +#define _REORDER_OP_H_ + +#include +#include +#include +#include "op.h" + +template +class ReorderOp : public Op +{ +public: + ReorderOp(mkldnn::memory::dims dims, mkldnn::memory::format src_fmt, mkldnn::memory::format dst_fmt); + ~ReorderOp(); + + /* + * Reorder primitive setup + * Params: + * dims: + * src_fmt: + * dst_fmt: + */ + void setup(mkldnn::memory::dims dims, mkldnn::memory::format src_fmt, mkldnn::memory::format dst_fmt); + + /* + * reorder execute + */ + void execute(void* src, void* dst); + +public: + // expected memory format for this primitive instance + mkldnn::memory::format src_fmt_; + mkldnn::memory::format dst_fmt_; + + // reorder primitive + std::shared_ptr reorder_prim_; + +private: + //MKLDNN memory + //forward + std::shared_ptr src_mem_; // x + std::shared_ptr dst_mem_; //y + + std::shared_ptr reorder_stream_; + + //memory desc + std::shared_ptr src_md_; //x + std::shared_ptr dst_md_; // y +}; + +#endif // _REORDER_OP_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/pooling.h b/python/ideep4py/include/primitives/pooling.h new file mode 100644 index 00000000..8c0403ef --- /dev/null +++ b/python/ideep4py/include/primitives/pooling.h @@ -0,0 +1,71 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _POOLING_H_ +#define _POOLING_H_ + +#include +#include +#include +#include "layer.h" +#include "op_param.h" +#include "tensor.h" + +template +class Pooling2D : public Layer +{ +public: + Pooling2D(); + ~Pooling2D(); + + /* + * Pooling Forward + * params: + * src: input, x + * pp: pooling parameters + * + * ret + * vector: + * Max pooling: return dst and workspace + * Avg pooling: return dst + */ + static std::vector Forward(Tensor *src, + pooling_param_t *pp); + + /* + * Pooling backward + * param: + * diff_dst: diff dst, gy + * pp: pooling parameters + */ + static Tensor *Backward(Tensor *diff_dst, + Tensor *ws, + pooling_param_t *pp); + +}; + +#endif // _POOLING_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/prim_mgr/bn_bwd_factory.h b/python/ideep4py/include/primitives/prim_mgr/bn_bwd_factory.h new file mode 100644 index 00000000..86096a88 --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/bn_bwd_factory.h @@ -0,0 +1,96 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _BN_BWD_FACTORY_ +#define _BN_BWD_FACTORY_ + +#include +#include +#include "op.h" +#include "op_factory.h" +#include +#include "utils.h" +#include "bn_bwd.h" + +template +class batch_normalization_bwd_factory : public OpFactory { + +private: + batch_normalization_bwd_factory() {} + ~batch_normalization_bwd_factory() {} + +public: + static batch_normalization_bwd * get(mkldnn::memory::dims src_d, + mkldnn::memory::dims diff_dst_d, float eps, bool scale_shift) { + auto bn_bwd = dynamic_cast*>( + batch_normalization_bwd_factory::get_instance().get_bn_bwd( + src_d, diff_dst_d, eps, scale_shift)); + + if (bn_bwd == nullptr) { + bn_bwd = new batch_normalization_bwd( + src_d, diff_dst_d, eps, scale_shift); + batch_normalization_bwd_factory::get_instance().set_bn_bwd( + src_d, diff_dst_d, eps, scale_shift, bn_bwd); + } + + return bn_bwd; + } + + static batch_normalization_bwd_factory & get_instance() { + static batch_normalization_bwd_factory instance_; + return instance_; + } + +private: +#define BN_BWD_PREFIX "bn_bwd_" + Op * get_bn_bwd(mkldnn::memory::dims src_d, + mkldnn::memory::dims diff_dst_d, + float eps, bool scale_shift) { + + std::string key = BN_BWD_PREFIX; + + key += dims_to_string(src_d); + key += dims_to_string(diff_dst_d); + key += float_to_string(eps); + key += bool_to_string(scale_shift); + + return this->get_op(key); + } + + void set_bn_bwd(mkldnn::memory::dims src_d, + mkldnn::memory::dims diff_dst_d, + float eps, bool scale_shift, Op *op) { + + std::string key = BN_BWD_PREFIX; + + key += dims_to_string(src_d); + key += dims_to_string(diff_dst_d); + key += float_to_string(eps); + key += bool_to_string(scale_shift); + + this->set_op(key, op); + } +}; + +#endif // _BN_BWD_FACTORY_ diff --git a/python/ideep4py/include/primitives/prim_mgr/bn_fwd_factory.h b/python/ideep4py/include/primitives/prim_mgr/bn_fwd_factory.h new file mode 100644 index 00000000..d3b36b76 --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/bn_fwd_factory.h @@ -0,0 +1,98 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _BN_FWD_FACTORY_ +#define _BN_FWD_FACTORY_ + +#include +#include +#include "op.h" +#include "op_factory.h" +#include +#include "utils.h" +#include "bn_fwd.h" + +template +class batch_normalization_fwd_factory : public OpFactory { + +private: + batch_normalization_fwd_factory() {} + ~batch_normalization_fwd_factory() {} + +public: + static batch_normalization_fwd * get( + mkldnn::memory::dims src_d, float eps, + bool scale_shift, bool global_stats, bool training) { + + auto bn_fwd = dynamic_cast*>( + batch_normalization_fwd_factory::get_instance().get_bn_fwd( + src_d, eps, scale_shift, global_stats, training)); + + if (bn_fwd == nullptr) { + bn_fwd = new batch_normalization_fwd( + src_d, eps, scale_shift, global_stats, training); + batch_normalization_fwd_factory::get_instance().set_bn_fwd( + src_d, eps, scale_shift, global_stats, training, bn_fwd); + } + + return bn_fwd; + } + + static batch_normalization_fwd_factory & get_instance() { + static batch_normalization_fwd_factory instance_; + return instance_; + } + +private: +#define BN_FWD_PREFIX "bn_fwd_" + Op * get_bn_fwd(mkldnn::memory::dims src_d, float eps, bool scale_shift, + bool global_stats, bool training) { + + std::string key = BN_FWD_PREFIX; + + key += dims_to_string(src_d); + key += float_to_string(eps); + key += bool_to_string(scale_shift); + key += bool_to_string(global_stats); + key += bool_to_string(training); + + return this->get_op(key); + } + + void set_bn_fwd(mkldnn::memory::dims src_d, float eps, bool scale_shift, + bool global_stats, bool training, Op *op) { + + std::string key = BN_FWD_PREFIX; + + key += dims_to_string(src_d); + key += float_to_string(eps); + key += bool_to_string(scale_shift); + key += bool_to_string(global_stats); + key += bool_to_string(training); + + this->set_op(key, op); + } +}; + +#endif // _BN_FWD_FACTORY_ diff --git a/python/ideep4py/include/primitives/prim_mgr/concat_bwd_factory.h b/python/ideep4py/include/primitives/prim_mgr/concat_bwd_factory.h new file mode 100644 index 00000000..ee3607a8 --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/concat_bwd_factory.h @@ -0,0 +1,99 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _CONCAT_BWD_FACTORY_ +#define _CONCAT_BWD_FACTORY_ +#include +#include +#include "op.h" +#include "op_factory.h" +#include +#include "utils.h" +#include "concat_bwd.h" + +template +class ConcatBwdFactory : public OpFactory +{ +private: + ConcatBwdFactory() {} + ~ConcatBwdFactory() {} + +public: + static ConcatBwd* get( std::vector diff_src, + mkldnn::memory::dims diff_dst, + int axis) { + ConcatBwd* concat_backward = NULL; + + //try to find a suitable one in pool + concat_backward = dynamic_cast*> ( + ConcatBwdFactory::get_instance().get_concat_bwd(diff_src, diff_dst, axis)); + + if (concat_backward == NULL) { + //LOG(INFO) << "create a new one for concat bwd"; + concat_backward = new ConcatBwd( diff_src, diff_dst, axis); + ConcatBwdFactory::get_instance().set_concat_bwd( diff_src, diff_dst, axis, concat_backward); + } else { + //LOG(INFO) << "reuse exist one for concat bwd"; + } + return concat_backward; + } + + static ConcatBwdFactory& get_instance() { + static ConcatBwdFactory instance_; + return instance_; + } + +private: +#define CONCAT_BWD_PREFIX "concat_bwd_" + Op* get_concat_bwd( std::vector diff_src, + mkldnn::memory::dims diff_dst, + int axis) { + std::string key = CONCAT_BWD_PREFIX; + + for (int i = 0; i < diff_src.size(); i++) { + key += dims_to_string(diff_src[i]); + } + key += dims_to_string(diff_dst); + key += int_to_string(axis); + + return this->get_op(key); + } + + void set_concat_bwd( std::vector diff_src, + mkldnn::memory::dims diff_dst, + int axis, + Op *op) { + std::string key = CONCAT_BWD_PREFIX; + + for (int i = 0; i < diff_src.size(); i++) { + key += dims_to_string(diff_src[i]); + } + key += dims_to_string(diff_dst); + key += int_to_string(axis); + + this->set_op(key, op); + } +}; + +#endif // _CONCAT_BWD_FACTORY_ diff --git a/python/ideep4py/include/primitives/prim_mgr/concat_fwd_factory.h b/python/ideep4py/include/primitives/prim_mgr/concat_fwd_factory.h new file mode 100644 index 00000000..7ced4089 --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/concat_fwd_factory.h @@ -0,0 +1,99 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _CONCAT_FWD_FACTORY_ +#define _CONCAT_FWD_FACTORY_ +#include +#include +#include "op.h" +#include "op_factory.h" +#include +#include "utils.h" +#include "concat_fwd.h" + +template +class ConcatFwdFactory : public OpFactory +{ +private: + ConcatFwdFactory() {} + ~ConcatFwdFactory() {} + +public: + static ConcatFwd* get( std::vector src, + mkldnn::memory::dims dst, + int axis) { + ConcatFwd* concat_forward = NULL; + + //try to find a suitable one in pool + concat_forward = dynamic_cast*> ( + ConcatFwdFactory::get_instance().get_concat_fwd(src, dst, axis)); + + if (concat_forward == NULL) { + //LOG(INFO) << "create a new one for concat fwd"; + concat_forward = new ConcatFwd( src, dst, axis); + ConcatFwdFactory::get_instance().set_concat_fwd( src, dst, axis, concat_forward); + } else { + //LOG(INFO) << "reuse exist one for concat fwd"; + } + return concat_forward; + } + + static ConcatFwdFactory& get_instance() { + static ConcatFwdFactory instance_; + return instance_; + } + +private: +#define CONCAT_FWD_PREFIX "concat_fwd_" + Op* get_concat_fwd( std::vector src, + mkldnn::memory::dims dst, + int axis) { + std::string key = CONCAT_FWD_PREFIX; + + for (int i = 0; i < src.size(); i++) { + key += dims_to_string(src[i]); + } + key += dims_to_string(dst); + key += int_to_string(axis); + + return this->get_op(key); + } + + void set_concat_fwd( std::vector src, + mkldnn::memory::dims dst, + int axis, + Op *op) { + std::string key = CONCAT_FWD_PREFIX; + + for (int i = 0; i < src.size(); i++) { + key += dims_to_string(src[i]); + } + key += dims_to_string(dst); + key += int_to_string(axis); + + this->set_op(key, op); + } +}; + +#endif // _CONCAT_FWD_FACTORY_ diff --git a/python/ideep4py/include/primitives/prim_mgr/conv_bwd_data_factory.h b/python/ideep4py/include/primitives/prim_mgr/conv_bwd_data_factory.h new file mode 100644 index 00000000..3c092563 --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/conv_bwd_data_factory.h @@ -0,0 +1,120 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _CONV_BWD_DATA_FACTORY_ +#define _CONV_BWD_DATA_FACTORY_ +#include +#include +#include "op.h" +#include "op_factory.h" +#include +#include "utils.h" +#include "conv_bwd_data.h" + +template +class Convolution2DBwdDataFactory : public OpFactory +{ +private: + Convolution2DBwdDataFactory() {} + ~Convolution2DBwdDataFactory() {} + +public: + static Convolution2DBwdData* get(mkldnn::memory::dims diff_src, + mkldnn::memory::dims w, + mkldnn::memory::dims diff_dst, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw) { + Convolution2DBwdData* conv2d_backward_data = NULL; + + //try to find a suitable one in pool + conv2d_backward_data = dynamic_cast*> ( + Convolution2DBwdDataFactory::get_instance().get_conv2d_bwd_data( diff_src, w, diff_dst, dilate_y, dilate_x, sy, sx, pad_lh, pad_lw, pad_rh, pad_rw)); + + if (conv2d_backward_data == NULL) { + //LOG(INFO) << "create a new one for conv2d bwd data"; + conv2d_backward_data = new Convolution2DBwdData( diff_src, w, diff_dst, dilate_y, dilate_x, sy, sx, pad_lh, pad_lw, pad_rh, pad_rw); + Convolution2DBwdDataFactory::get_instance().set_conv2d_bwd_data( diff_src, w, diff_dst, dilate_y, dilate_x, sy, sx, pad_lh, pad_lw, pad_rh, pad_rw, conv2d_backward_data); + } else { + //LOG(INFO) << "reuse a existed one for conv2d bwd data"; + } + return conv2d_backward_data; + } + + static Convolution2DBwdDataFactory& get_instance() { + static Convolution2DBwdDataFactory instance_; + return instance_; + } + +private: +#define CONVOLUTION2D_BWD_DATA_PREFIX "conv2d_bwd_data_" + Op* get_conv2d_bwd_data(mkldnn::memory::dims diff_src, + mkldnn::memory::dims w, + mkldnn::memory::dims diff_dst, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw) { + std::string key = CONVOLUTION2D_BWD_DATA_PREFIX; + + key += dims_to_string(diff_src); + key += dims_to_string(w); + key += dims_to_string(diff_dst); + key += int_to_string(dilate_y); + key += int_to_string(dilate_x); + key += int_to_string(sy); + key += int_to_string(sx); + key += int_to_string(pad_lh); + key += int_to_string(pad_lw); + key += int_to_string(pad_rh); + key += int_to_string(pad_rw); + + return this->get_op(key); + } + + void set_conv2d_bwd_data(mkldnn::memory::dims diff_src, + mkldnn::memory::dims w, + mkldnn::memory::dims diff_dst, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw, + Op *op) { + std::string key = CONVOLUTION2D_BWD_DATA_PREFIX; + + key += dims_to_string(diff_src); + key += dims_to_string(w); + key += dims_to_string(diff_dst); + key += int_to_string(dilate_y); + key += int_to_string(dilate_x); + key += int_to_string(sy); + key += int_to_string(sx); + key += int_to_string(pad_lh); + key += int_to_string(pad_lw); + key += int_to_string(pad_rh); + key += int_to_string(pad_rw); + + this->set_op(key, op); + } +}; + +#endif // _CONV_BWD_DATA_FACTORY_ diff --git a/python/ideep4py/include/primitives/prim_mgr/conv_bwd_weights_factory.h b/python/ideep4py/include/primitives/prim_mgr/conv_bwd_weights_factory.h new file mode 100644 index 00000000..b33e42ac --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/conv_bwd_weights_factory.h @@ -0,0 +1,119 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _CONV_BWD_WEIGHTS_FACTORY_ +#define _CONV_BWD_WEIGHTS_FACTORY_ +#include +#include +#include "op.h" +#include "op_factory.h" +#include +#include "utils.h" +#include "conv_bwd_weights.h" + +template +class Convolution2DBwdWeightsFactory : public OpFactory +{ +private: + Convolution2DBwdWeightsFactory() {} + ~Convolution2DBwdWeightsFactory() {} + +public: + static Convolution2DBwdWeights* get(mkldnn::memory::dims x, mkldnn::memory::dims diff_w, + mkldnn::memory::dims diff_b, mkldnn::memory::dims diff_y, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw) { + Convolution2DBwdWeights* conv2d_backward_weights = NULL; + + //try to find a suitable one in pool + conv2d_backward_weights = dynamic_cast*> ( + Convolution2DBwdWeightsFactory::get_instance().get_conv2d_bwd_weights( x, diff_w, diff_b, diff_y, dilate_y, dilate_x, sy, sx, pad_lh, pad_lw, pad_rh, pad_rw)); + + if (conv2d_backward_weights == NULL) { + //LOG(INFO) << "create a new one for conv2d bwd weights"; + conv2d_backward_weights = new Convolution2DBwdWeights( x, diff_w, diff_b, diff_y, dilate_y, dilate_x, sy, sx, pad_lh, pad_lw, pad_rh, pad_rw); + Convolution2DBwdWeightsFactory::get_instance().set_conv2d_bwd_weights( x, diff_w, diff_b, diff_y, dilate_y, dilate_x, sy, sx, pad_lh, pad_lw, pad_rh, pad_rw, conv2d_backward_weights); + } else { + //LOG(INFO) << "reuse existed one for conv2d bwd weights"; + } + return conv2d_backward_weights; + } + + static Convolution2DBwdWeightsFactory& get_instance() { + static Convolution2DBwdWeightsFactory instance_; + return instance_; + } + +private: +#define CONVOLUTION2D_BWD_WEIGHTS_PREFIX "conv2d_bwd_weights_" + Op* get_conv2d_bwd_weights(mkldnn::memory::dims x, mkldnn::memory::dims diff_w, + mkldnn::memory::dims diff_b, mkldnn::memory::dims diff_y, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw) { + std::string key = CONVOLUTION2D_BWD_WEIGHTS_PREFIX; + + key += dims_to_string(x); + key += dims_to_string(diff_w); + key += dims_to_string(diff_b); + key += dims_to_string(diff_y); + key += int_to_string(dilate_y); + key += int_to_string(dilate_x); + key += int_to_string(sy); + key += int_to_string(sx); + key += int_to_string(pad_lh); + key += int_to_string(pad_lw); + key += int_to_string(pad_rh); + key += int_to_string(pad_rw); + + return this->get_op(key); + } + + void set_conv2d_bwd_weights(mkldnn::memory::dims x, mkldnn::memory::dims diff_w, + mkldnn::memory::dims diff_b, mkldnn::memory::dims diff_y, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw, + Op *op) { + std::string key = CONVOLUTION2D_BWD_WEIGHTS_PREFIX; + + key += dims_to_string(x); + key += dims_to_string(diff_w); + key += dims_to_string(diff_b); + key += dims_to_string(diff_y); + key += int_to_string(dilate_y); + key += int_to_string(dilate_x); + key += int_to_string(sy); + key += int_to_string(sx); + key += int_to_string(pad_lh); + key += int_to_string(pad_lw); + key += int_to_string(pad_rh); + key += int_to_string(pad_rw); + + this->set_op(key, op); + } +}; + +#endif // _CONV_BWD_WEIGHTS_FACTORY_ diff --git a/python/ideep4py/include/primitives/prim_mgr/conv_fwd_factory.h b/python/ideep4py/include/primitives/prim_mgr/conv_fwd_factory.h new file mode 100644 index 00000000..421b002f --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/conv_fwd_factory.h @@ -0,0 +1,119 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _CONV_FWD_FACTORY_ +#define _CONV_FWD_FACTORY_ +#include +#include +#include "op.h" +#include "op_factory.h" +#include +#include "utils.h" +#include "conv_fwd.h" + +template +class Convolution2DFwdFactory : public OpFactory +{ +private: + Convolution2DFwdFactory() {} + ~Convolution2DFwdFactory() {} + +public: + static Convolution2DFwd* get( mkldnn::memory::dims x, mkldnn::memory::dims w, + mkldnn::memory::dims b, mkldnn::memory::dims y, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw) { + Convolution2DFwd* conv2d_forward = NULL; + + //try to find a suitable one in pool + conv2d_forward = dynamic_cast*> ( + Convolution2DFwdFactory::get_instance().get_conv2d_fwd( x, w, b, y, dilate_y, dilate_x, sy, sx, pad_lh, pad_lw, pad_rh, pad_rw)); + + if (conv2d_forward == NULL) { + //LOG(INFO) << "create a new one for conv2d fwd"; + conv2d_forward = new Convolution2DFwd( x, w, b, y, dilate_y, dilate_x, sy, sx, pad_lh, pad_lw, pad_rh, pad_rw); + Convolution2DFwdFactory::get_instance().set_conv2d_fwd( x, w, b, y, dilate_y, dilate_x, sy, sx, pad_lh, pad_lw, pad_rh, pad_rw, conv2d_forward); + } else { + //LOG(INFO) << "reuse exist one for conv2d fwd"; + } + return conv2d_forward; + } + + static Convolution2DFwdFactory& get_instance() { + static Convolution2DFwdFactory instance_; + return instance_; + } + +private: +#define CONVOLUTION2D_FWD_PREFIX "conv2d_fwd_" + Op* get_conv2d_fwd( mkldnn::memory::dims x, mkldnn::memory::dims w, + mkldnn::memory::dims b, mkldnn::memory::dims y, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw) { + std::string key = CONVOLUTION2D_FWD_PREFIX; + + key += dims_to_string(x); + key += dims_to_string(w); + key += dims_to_string(b); + key += dims_to_string(y); + key += int_to_string(dilate_y); + key += int_to_string(dilate_x); + key += int_to_string(sy); + key += int_to_string(sx); + key += int_to_string(pad_lh); + key += int_to_string(pad_lw); + key += int_to_string(pad_rh); + key += int_to_string(pad_rw); + + return this->get_op(key); + } + + void set_conv2d_fwd( mkldnn::memory::dims x, mkldnn::memory::dims w, + mkldnn::memory::dims b, mkldnn::memory::dims y, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw, + Op *op) { + std::string key = CONVOLUTION2D_FWD_PREFIX; + + key += dims_to_string(x); + key += dims_to_string(w); + key += dims_to_string(b); + key += dims_to_string(y); + key += int_to_string(dilate_y); + key += int_to_string(dilate_x); + key += int_to_string(sy); + key += int_to_string(sx); + key += int_to_string(pad_lh); + key += int_to_string(pad_lw); + key += int_to_string(pad_rh); + key += int_to_string(pad_rw); + + this->set_op(key, op); + } +}; + +#endif // _CONV_FWD_FACTORY_ diff --git a/python/ideep4py/include/primitives/prim_mgr/eltwise_bwd_factory.h b/python/ideep4py/include/primitives/prim_mgr/eltwise_bwd_factory.h new file mode 100644 index 00000000..5fc8a902 --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/eltwise_bwd_factory.h @@ -0,0 +1,90 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#pragma once + +#include +#include +#include "op.h" +#include "op_factory.h" +#include +#include "utils.h" +#include "eltwise_bwd.h" + +template +class EltwiseBwdFactory : public OpFactory +{ +private: + EltwiseBwdFactory() {} + ~EltwiseBwdFactory() {} + +public: + static EltwiseBwd* get(mkldnn::memory::dims x, mkldnn::algorithm alg_kind, mkldnn::memory::format dst_diff_fmt, T2 alpha, T2 beta) { + EltwiseBwd* eltwise_backward = nullptr; + + //try to find a suitable one in pool + eltwise_backward = dynamic_cast*> ( + EltwiseBwdFactory::get_instance().get_eltwise_bwd(x, alg_kind, dst_diff_fmt, alpha, beta)); + + if (eltwise_backward == nullptr) { + //LOG(INFO) << "create a new one for eltwise bwd"; + eltwise_backward = new EltwiseBwd(x, alg_kind, dst_diff_fmt, alpha, beta); + EltwiseBwdFactory::get_instance().set_eltwise_bwd(x, alg_kind, dst_diff_fmt, alpha, beta, eltwise_backward); + } else { + //LOG(INFO) << "reuse exist one for eltwise bwd"; + } + return eltwise_backward; + } + + static EltwiseBwdFactory& get_instance() { + static EltwiseBwdFactory instance_; + return instance_; + } + +private: +#define ELTWISE_BWD_PREFIX "eltwise_bwd_" + Op* get_eltwise_bwd(mkldnn::memory::dims x, mkldnn::algorithm alg_kind, mkldnn::memory::format dst_diff_fmt, T2 alpha, T2 beta) { + std::string key = ELTWISE_BWD_PREFIX; + + key += dims_to_string(x); + key += int_to_string((int)alg_kind); + key + float_to_string((float)alpha); + key + float_to_string((float)beta); + key += int_to_string(dst_diff_fmt); + + return this->get_op(key); + } + + void set_eltwise_bwd(mkldnn::memory::dims x, mkldnn::algorithm alg_kind, mkldnn::memory::format dst_diff_fmt, T2 alpha, T2 beta, Op *op) { + std::string key = ELTWISE_BWD_PREFIX; + + key += dims_to_string(x); + key += int_to_string((int)alg_kind); + key + float_to_string((float)alpha); + key + float_to_string((float)beta); + key += int_to_string(dst_diff_fmt); + + this->set_op(key, op); + } +}; diff --git a/python/ideep4py/include/primitives/prim_mgr/eltwise_fwd_factory.h b/python/ideep4py/include/primitives/prim_mgr/eltwise_fwd_factory.h new file mode 100644 index 00000000..db5bd3e4 --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/eltwise_fwd_factory.h @@ -0,0 +1,93 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#pragma once + +#include +#include +#include +#include "op.h" +#include "op_factory.h" +#include +#include "utils.h" +#include "eltwise_fwd.h" + +template +class EltwiseFwdFactory : public OpFactory +{ +private: + EltwiseFwdFactory() {} + ~EltwiseFwdFactory() {} + +public: + static EltwiseFwd* get(mkldnn::memory::dims x, mkldnn::algorithm alg_kind, mkldnn::memory::format src_fmt, T2 alpha, T2 beta) { + EltwiseFwd* eltwise_forward = nullptr; + + //try to find a suitable one in pool + eltwise_forward = dynamic_cast*> ( + EltwiseFwdFactory::get_instance().get_eltwise_fwd(x, alg_kind, src_fmt, alpha, beta)); + + if (eltwise_forward == nullptr) { + //LOG(INFO) << "create a new one for eltwise fwd"; + eltwise_forward = new EltwiseFwd(x, alg_kind, src_fmt, alpha, beta); + EltwiseFwdFactory::get_instance().set_eltwise_fwd(x, alg_kind, src_fmt, alpha, beta, eltwise_forward); + } else { + //LOG(INFO) << "reuse exist one for eltwise fwd"; + } + return eltwise_forward; + } + + static EltwiseFwdFactory& get_instance() { + static EltwiseFwdFactory instance_; + return instance_; + } + +private: +#define ELTWISE_FWD_PREFIX "eltwise_fwd_" + Op* get_eltwise_fwd(mkldnn::memory::dims x, mkldnn::algorithm alg_kind, mkldnn::memory::format src_fmt, T2 alpha, T2 beta) { + std::string key = ELTWISE_FWD_PREFIX; + + key += dims_to_string(x); + key += int_to_string((int)alg_kind); + // key += typeid(alpha).name(); + key + float_to_string((float)alpha); + key + float_to_string((float)beta); + key += int_to_string(src_fmt); + + return this->get_op(key); + } + + void set_eltwise_fwd(mkldnn::memory::dims x, mkldnn::algorithm alg_kind, mkldnn::memory::format src_fmt, T2 alpha, T2 beta, Op* op) { + std::string key = ELTWISE_FWD_PREFIX; + + key += dims_to_string(x); + key += int_to_string((int)alg_kind); + // key += typeid(alpha).name(); + key + float_to_string((float)alpha); + key + float_to_string((float)beta); + key += int_to_string(src_fmt); + + this->set_op(key, op); + } +}; diff --git a/python/ideep4py/include/primitives/prim_mgr/linear_bwd_data_factory.h b/python/ideep4py/include/primitives/prim_mgr/linear_bwd_data_factory.h new file mode 100644 index 00000000..115846a0 --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/linear_bwd_data_factory.h @@ -0,0 +1,92 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _LINEAR_BWD_DATA_FACTORY_ +#define _LINEAR_BWD_DATA_FACTORY_ +#include +#include +#include "op.h" +#include "op_factory.h" +#include +#include "utils.h" +#include "linear_bwd_data.h" + +template +class LinearBwdDataFactory : public OpFactory +{ +private: + LinearBwdDataFactory() {} + ~LinearBwdDataFactory() {} + +public: + static LinearBwdData* get(mkldnn::memory::dims diff_src, + mkldnn::memory::dims w, mkldnn::memory::dims diff_dst) { + LinearBwdData* linear_backward_data = NULL; + //try to find a suitable one in pool + linear_backward_data = dynamic_cast*>( + LinearBwdDataFactory::get_instance().get_linear_bwd_data(diff_src, w, diff_dst)); + if (linear_backward_data == NULL) { + //LOG(INFO) << "create a new one for linear bwd data"; + linear_backward_data = new LinearBwdData(diff_src, w, diff_dst); + LinearBwdDataFactory::get_instance().set_linear_bwd_data(diff_src, w, diff_dst, linear_backward_data); + } else { + //LOG(INFO) << "reuse a exited one for linear bwd data"; + } + return linear_backward_data; + } + + static LinearBwdDataFactory& get_instance() { + static LinearBwdDataFactory instance_; + return instance_; + } + +private: +#define LINEAR_BWD_DATA_PREFIX "linear_bwd_data_" + Op* get_linear_bwd_data(mkldnn::memory::dims diff_src, + mkldnn::memory::dims w, + mkldnn::memory::dims diff_dst) { + std::string key = LINEAR_BWD_DATA_PREFIX; + + key += dims_to_string(diff_src); + key += dims_to_string(w); + key += dims_to_string(diff_dst); + + return this->get_op(key); + } + + void set_linear_bwd_data(mkldnn::memory::dims diff_src, + mkldnn::memory::dims w, + mkldnn::memory::dims diff_dst, + Op *op) { + std::string key = LINEAR_BWD_DATA_PREFIX; + + key += dims_to_string(diff_src); + key += dims_to_string(w); + key += dims_to_string(diff_dst); + + this->set_op(key, op); + } +}; + +#endif //_LINEAR_BWD_DATA_FACTORY_ diff --git a/python/ideep4py/include/primitives/prim_mgr/linear_bwd_weights_factory.h b/python/ideep4py/include/primitives/prim_mgr/linear_bwd_weights_factory.h new file mode 100644 index 00000000..c073ff05 --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/linear_bwd_weights_factory.h @@ -0,0 +1,96 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _LINEAR_BWD_WEIGHTS_FACTORY_ +#define _LINEAR_BWD_WEIGHTS_FACTORY_ +#include +#include +#include "op.h" +#include "op_factory.h" +#include +#include "utils.h" +#include "linear_bwd_weights.h" + +template +class LinearBwdWeightsFactory : public OpFactory +{ +private: + LinearBwdWeightsFactory() {} + ~LinearBwdWeightsFactory() {} + +public: + static LinearBwdWeights* get(mkldnn::memory::dims x, mkldnn::memory::dims diff_w, + mkldnn::memory::dims diff_b, mkldnn::memory::dims diff_y) { + LinearBwdWeights* linear_backward_weights = NULL; + //try to find a suit one in pool + linear_backward_weights = dynamic_cast*>( + LinearBwdWeightsFactory::get_instance().get_linear_bwd_weights(x, diff_w, diff_b, diff_y)); + if (linear_backward_weights == NULL) { + //LOG(INFO) << "create a new one for linear bwd weights"; + linear_backward_weights = new LinearBwdWeights(x, diff_w, diff_b, diff_y); + LinearBwdWeightsFactory::get_instance().set_linear_bwd_weights(x, diff_w, diff_b, diff_y, linear_backward_weights); + } else { + //LOG(INFO) << "reuse existed one for linear bwd weights"; + } + return linear_backward_weights; + } + + static LinearBwdWeightsFactory& get_instance() { + static LinearBwdWeightsFactory instance_; + return instance_; + } + +private: +#define LINEAR_BWD_WEIGHTS_PREFIX "linear_bwd_weights_" + Op* get_linear_bwd_weights(mkldnn::memory::dims x, + mkldnn::memory::dims diff_w, + mkldnn::memory::dims diff_b, + mkldnn::memory::dims diff_y) { + std::string key = LINEAR_BWD_WEIGHTS_PREFIX; + + key += dims_to_string(x); + key += dims_to_string(diff_w); + key += dims_to_string(diff_b); + key += dims_to_string(diff_y); + + return this->get_op(key); + } + + void set_linear_bwd_weights(mkldnn::memory::dims x, + mkldnn::memory::dims diff_w, + mkldnn::memory::dims diff_b, + mkldnn::memory::dims diff_y, + Op *op) { + std::string key = LINEAR_BWD_WEIGHTS_PREFIX; + + key += dims_to_string(x); + key += dims_to_string(diff_w); + key += dims_to_string(diff_b); + key += dims_to_string(diff_y); + + this->set_op(key, op); + } +}; + +#endif//_LINEAR_BWD_WEIGHTS_FACTORY_ diff --git a/python/ideep4py/include/primitives/prim_mgr/linear_fwd_factory.h b/python/ideep4py/include/primitives/prim_mgr/linear_fwd_factory.h new file mode 100644 index 00000000..2e8f951f --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/linear_fwd_factory.h @@ -0,0 +1,95 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _LINEAR_FWD_FACTORY_ +#define _LINEAR_FWD_FACTORY_ +#include +#include +#include "op.h" +#include "op_factory.h" +#include +#include "utils.h" +#include "linear_fwd.h" + +template +class LinearFwdFactory : public OpFactory +{ +private: + LinearFwdFactory() {} + ~LinearFwdFactory() {} + +public: + static LinearFwd* get(mkldnn::memory::dims x, mkldnn::memory::dims w, + mkldnn::memory::dims b, mkldnn::memory::dims y) { + LinearFwd* linear_forward = NULL; + //try to find a suitable one in pool + linear_forward = dynamic_cast*> ( + LinearFwdFactory::get_instance().get_linear_fwd(x, w, b, y)); + if (linear_forward == NULL) { + //LOG(INFO) << "create a new one for linear fwd"; + linear_forward = new LinearFwd(x, w, b, y); + LinearFwdFactory::get_instance().set_linear_fwd(x, w, b, y, linear_forward); + } else { + //LOG(INFO) << "reuse exist one linear fwd"; + } + return linear_forward; + } + static LinearFwdFactory& get_instance() { + static LinearFwdFactory instance_; + return instance_; + } + +private: +#define LINEAR_FWD_PREFIX "linear_fwd_" + Op* get_linear_fwd(mkldnn::memory::dims x, + mkldnn::memory::dims w, + mkldnn::memory::dims b, + mkldnn::memory::dims y) { + std::string key = LINEAR_FWD_PREFIX; + + key += dims_to_string(x); + key += dims_to_string(w); + key += dims_to_string(b); + key += dims_to_string(y); + + return this->get_op(key); + } + + void set_linear_fwd(mkldnn::memory::dims x, + mkldnn::memory::dims w, + mkldnn::memory::dims b, + mkldnn::memory::dims y, + Op* op) { + std::string key = LINEAR_FWD_PREFIX; + + key += dims_to_string(x); + key += dims_to_string(w); + key += dims_to_string(b); + key += dims_to_string(y); + + return; + } +}; + +#endif //_LINEAR_FWD_FACTORY diff --git a/python/ideep4py/include/primitives/prim_mgr/lrn_bwd_factory.h b/python/ideep4py/include/primitives/prim_mgr/lrn_bwd_factory.h new file mode 100644 index 00000000..d3110aa7 --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/lrn_bwd_factory.h @@ -0,0 +1,117 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _LRN_BWD_FACTORY_ +#define _LRN_BWD_FACTORY_ +#include +#include +#include "op.h" +#include "op_factory.h" +#include +#include "utils.h" +#include "lrn_bwd.h" + +template +class LocalResponseNormalizationBwdFactory : public OpFactory +{ +private: + LocalResponseNormalizationBwdFactory() {} + ~LocalResponseNormalizationBwdFactory() {} + +public: + static LocalResponseNormalizationBwd* get(mkldnn::memory::dims src_d, + mkldnn::memory::dims dst_d, + mkldnn::memory::dims ws_d, + mkldnn::memory::data_type ws_dt, + int n, double k, double alpha, double beta, + mkldnn::algorithm alg_kind) { + + LocalResponseNormalizationBwd* lrn_backward = NULL; + + //try to find a suitable one in pool + lrn_backward = dynamic_cast*> ( + LocalResponseNormalizationBwdFactory::get_instance().get_lrn_bwd( src_d, dst_d, ws_d, ws_dt, n, k, alpha, beta, alg_kind)); + + if (lrn_backward == NULL) { + //LOG(INFO) << "create a new one for lrn bwd: " << alg_kind; + lrn_backward = new LocalResponseNormalizationBwd( src_d, dst_d, ws_d, ws_dt, n, k, alpha, beta, alg_kind); + LocalResponseNormalizationBwdFactory::get_instance().set_lrn_bwd( src_d, dst_d, ws_d, ws_dt, n, k, alpha, beta, alg_kind, lrn_backward); + } else { + //LOG(INFO) << "reuse exist one for lrn bwd: " << alg_kind; + } + return lrn_backward; + } + + static LocalResponseNormalizationBwdFactory& get_instance() { + static LocalResponseNormalizationBwdFactory instance_; + return instance_; + } + +private: +#define LRN_BWD_PREFIX "lrn_bwd_" + Op* get_lrn_bwd(mkldnn::memory::dims src_d, + mkldnn::memory::dims dst_d, + mkldnn::memory::dims ws_d, + mkldnn::memory::data_type ws_dt, + int n, double k, double alpha, double beta, + mkldnn::algorithm alg_kind) { + std::string key = LRN_BWD_PREFIX; + + key += dims_to_string(src_d); + key += dims_to_string(dst_d); + key += dims_to_string(ws_d); + key += int_to_string(ws_dt); + key += int_to_string(n); + key += double_to_string(k); + key += double_to_string(alpha); + key += double_to_string(beta); + key += int_to_string(alg_kind); + + return this->get_op(key); + }; + + void set_lrn_bwd(mkldnn::memory::dims src_d, + mkldnn::memory::dims dst_d, + mkldnn::memory::dims ws_d, + mkldnn::memory::data_type ws_dt, + int n, double k, double alpha, double beta, + mkldnn::algorithm alg_kind, + Op *op) { + std::string key = LRN_BWD_PREFIX; + + key += dims_to_string(src_d); + key += dims_to_string(dst_d); + key += dims_to_string(ws_d); + key += int_to_string(ws_dt); + key += int_to_string(n); + key += double_to_string(k); + key += double_to_string(alpha); + key += double_to_string(beta); + key += int_to_string(alg_kind); + + this->set_op(key, op); + } +}; + +#endif // _LRN_BWD_FACTORY_ diff --git a/python/ideep4py/include/primitives/prim_mgr/lrn_fwd_factory.h b/python/ideep4py/include/primitives/prim_mgr/lrn_fwd_factory.h new file mode 100755 index 00000000..4cb36cb9 --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/lrn_fwd_factory.h @@ -0,0 +1,106 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _LRN_FWD_FACTORY_ +#define _LRN_FWD_FACTORY_ +#include +#include +#include "op.h" +#include "op_factory.h" +#include +#include "utils.h" +#include "lrn_fwd.h" + +template +class LocalResponseNormalizationFwdFactory : public OpFactory +{ +private: + LocalResponseNormalizationFwdFactory() {} + ~LocalResponseNormalizationFwdFactory() {} + +public: + static LocalResponseNormalizationFwd* get( + mkldnn::memory::dims src_d, mkldnn::memory::format src_fmt, + int n, double k, double alpha, double beta, + mkldnn::algorithm alg_kind) + { + LocalResponseNormalizationFwd* lrn_forward = NULL; + + //try to find a suitable one in pool + lrn_forward = dynamic_cast*> ( + LocalResponseNormalizationFwdFactory::get_instance().get_lrn_fwd(src_d, src_fmt, n, k, alpha, beta, alg_kind)); + + if (lrn_forward == NULL) { + //LOG(INFO) << "create a new one for lrn fwd: " << alg_kind; + lrn_forward = new LocalResponseNormalizationFwd(src_d, src_fmt, n, k, alpha, beta, alg_kind); + LocalResponseNormalizationFwdFactory::get_instance().set_lrn_fwd( src_d, src_fmt, n, k, alpha, beta, alg_kind, lrn_forward); + } else { + //LOG(INFO) << "reuse exist one for lrn fwd: " << alg_kind; + } + return lrn_forward; + } + + static LocalResponseNormalizationFwdFactory& get_instance() { + static LocalResponseNormalizationFwdFactory instance_; + return instance_; + } + +private: +#define LRN_FWD_PREFIX "lrn_fwd_" + Op* get_lrn_fwd(mkldnn::memory::dims src_d, + mkldnn::memory::format src_fmt, + int n, double k, double alpha, double beta, + mkldnn::algorithm alg_kind) { + std::string key = LRN_FWD_PREFIX; + + key += dims_to_string(src_d); + key += int_to_string(src_fmt); + key += int_to_string(n); + key += double_to_string(k); + key += double_to_string(alpha); + key += double_to_string(beta); + key += int_to_string(alg_kind); + + return this->get_op(key); + } + + void set_lrn_fwd(mkldnn::memory::dims src_d, + mkldnn::memory::format src_fmt, + int n, double k, double alpha, double beta, + mkldnn::algorithm alg_kind, Op *op) { + std::string key = LRN_FWD_PREFIX; + + key += dims_to_string(src_d); + key += int_to_string(src_fmt); + key += int_to_string(n); + key += double_to_string(k); + key += double_to_string(alpha); + key += double_to_string(beta); + key += int_to_string(alg_kind); + + this->set_op(key, op); + } +}; + +#endif // _LRN_FWD_FACTORY_ diff --git a/python/ideep4py/include/primitives/prim_mgr/op_factory.h b/python/ideep4py/include/primitives/prim_mgr/op_factory.h new file mode 100644 index 00000000..44c36d61 --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/op_factory.h @@ -0,0 +1,77 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _OP_FACTORY_ +#define _OP_FACTORY_ + +#include +#include +#include "op.h" +#include "config.h" + +extern bool enable_prim_reuse; + +template +class OpFactory { +public: + OpFactory() {}; + ~OpFactory() {}; + // virtual Op* get() {return NULL;} + + Op* get_op(std::string key) { + // if not enable primitive reuse + // just return NULL + if (!enable_prim_reuse) + return NULL; + + auto stream_iter = map_.find(key); + if (stream_iter == map_.end()) { + return NULL; + } else { + return stream_iter->second; + } + }; + + void set_op(std::string key, Op* op) { + // if not enable primitive reuse + // just return + if (!enable_prim_reuse) + return; + + auto stream_iter = map_.find(key); + if (stream_iter == map_.end()) { + map_[key]=op; + } else { + throw new std::invalid_argument("cannot set same key to a new stream"); + } + }; + +public: + std::unordered_map*> map_; +}; + +#endif // _OP_FACTORY_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/include/primitives/prim_mgr/pooling_bwd_factory.h b/python/ideep4py/include/primitives/prim_mgr/pooling_bwd_factory.h new file mode 100644 index 00000000..a0b8a9de --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/pooling_bwd_factory.h @@ -0,0 +1,130 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _POOLING_BWD_FACTORY_ +#define _POOLING_BWD_FACTORY_ +#include +#include +#include "op.h" +#include "op_factory.h" +#include +#include "utils.h" +#include "pooling_bwd.h" + +template +class Pooling2DBwdFactory : public OpFactory +{ +private: + Pooling2DBwdFactory() {} + ~Pooling2DBwdFactory() {} + +public: + static Pooling2DBwd* get(mkldnn::memory::dims src_d, + mkldnn::memory::dims dst_d, + mkldnn::memory::dims ws_d, + mkldnn::memory::data_type ws_dt, + int ker_h, int ker_w, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw, + mkldnn::algorithm alg_kind) { + Pooling2DBwd* pooling2d_backward = NULL; + + //try to find a suitable one in pool + pooling2d_backward = dynamic_cast*> ( + Pooling2DBwdFactory::get_instance().get_pooling2d_bwd( src_d, dst_d, ws_d, ws_dt, ker_h, ker_w, sy, sx, pad_lh, pad_lw, pad_rh, pad_rw, alg_kind)); + + if (pooling2d_backward == NULL) { + //LOG(INFO) << "create a new one for pooling bwd: " << alg_kind; + pooling2d_backward = new Pooling2DBwd( src_d, dst_d, ws_d, ws_dt, ker_h, ker_w, sy, sx, pad_lh, pad_lw, pad_rh, pad_rw, alg_kind); + Pooling2DBwdFactory::get_instance().set_pooling2d_bwd( src_d, dst_d, ws_d, ws_dt, ker_h, ker_w, sy, sx, pad_lh, pad_lw, pad_rh, pad_rw, alg_kind, pooling2d_backward); + } else { + //LOG(INFO) << "reuse exist one for pooling bwd: " << alg_kind; + } + return pooling2d_backward; + } + + static Pooling2DBwdFactory& get_instance() { + static Pooling2DBwdFactory instance_; + return instance_; + } + +private: +#define POOLING2D_BWD_PREFIX "pooling2d_bwd_" + Op* get_pooling2d_bwd(mkldnn::memory::dims src_d, + mkldnn::memory::dims dst_d, + mkldnn::memory::dims ws_d, + mkldnn::memory::data_type ws_dt, + int ker_h, int ker_w, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw, + mkldnn::algorithm alg_kind) { + std::string key = POOLING2D_BWD_PREFIX; + + key += dims_to_string(src_d); + key += dims_to_string(dst_d); + key += dims_to_string(ws_d); + key += int_to_string(ws_dt); + key += int_to_string(ker_h); + key += int_to_string(ker_w); + key += int_to_string(sy); + key += int_to_string(sx); + key += int_to_string(pad_lh); + key += int_to_string(pad_lw); + key += int_to_string(pad_rh); + key += int_to_string(pad_rw); + key += int_to_string(alg_kind); + + return this->get_op(key); + }; + + void set_pooling2d_bwd(mkldnn::memory::dims src_d, + mkldnn::memory::dims dst_d, + mkldnn::memory::dims ws_d, + mkldnn::memory::data_type ws_dt, + int ker_h, int ker_w, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw, + mkldnn::algorithm alg_kind, + Op *op) { + std::string key = POOLING2D_BWD_PREFIX; + + key += dims_to_string(src_d); + key += dims_to_string(dst_d); + key += dims_to_string(ws_d); + key += int_to_string(ws_dt); + key += int_to_string(ker_h); + key += int_to_string(ker_w); + key += int_to_string(sy); + key += int_to_string(sx); + key += int_to_string(pad_lh); + key += int_to_string(pad_lw); + key += int_to_string(pad_rh); + key += int_to_string(pad_rw); + key += int_to_string(alg_kind); + + this->set_op(key, op); + } +}; + +#endif // _POOLING_BWD_FACTORY_ diff --git a/python/ideep4py/include/primitives/prim_mgr/pooling_fwd_factory.h b/python/ideep4py/include/primitives/prim_mgr/pooling_fwd_factory.h new file mode 100644 index 00000000..e134dffd --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/pooling_fwd_factory.h @@ -0,0 +1,120 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _POOLING_FWD_FACTORY_ +#define _POOLING_FWD_FACTORY_ +#include +#include +#include "op.h" +#include "op_factory.h" +#include +#include "utils.h" +#include "pooling_fwd.h" + +template +class Pooling2DFwdFactory : public OpFactory +{ +private: + Pooling2DFwdFactory() {} + ~Pooling2DFwdFactory() {} + +public: + static Pooling2DFwd* get(mkldnn::memory::dims src_d, + mkldnn::memory::dims dst_d, + int ker_h, int ker_w, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw, + mkldnn::algorithm alg_kind) { + Pooling2DFwd* pooling2d_forward = NULL; + + //try to find a suitable one in pool + pooling2d_forward = dynamic_cast*> ( + Pooling2DFwdFactory::get_instance().get_pooling2d_fwd( src_d, dst_d, ker_h, ker_w, sy, sx, pad_lh, pad_lw, pad_rh, pad_rw, alg_kind)); + + if (pooling2d_forward == NULL) { + //LOG(INFO) << "create a new one for pooling fwd: " << alg_kind; + pooling2d_forward = new Pooling2DFwd( src_d, dst_d, ker_h, ker_w, sy, sx, pad_lh, pad_lw, pad_rh, pad_rw, alg_kind); + Pooling2DFwdFactory::get_instance().set_pooling2d_fwd( src_d, dst_d, ker_h, ker_w, sy, sx, pad_lh, pad_lw, pad_rh, pad_rw, alg_kind, pooling2d_forward); + } else { + //LOG(INFO) << "reuse exist one for pooling fwd: " << alg_kind; + } + return pooling2d_forward; + } + + static Pooling2DFwdFactory& get_instance() { + static Pooling2DFwdFactory instance_; + return instance_; + } + +private: +#define POOLING2D_FWD_PREFIX "pooling2d_fwd_" + Op* get_pooling2d_fwd(mkldnn::memory::dims src_d, + mkldnn::memory::dims dst_d, + int ker_h, int ker_w, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw, + mkldnn::algorithm alg_kind) { + std::string key = POOLING2D_FWD_PREFIX; + + key += dims_to_string(src_d); + key += dims_to_string(dst_d); + key += int_to_string(ker_h); + key += int_to_string(ker_w); + key += int_to_string(sy); + key += int_to_string(sx); + key += int_to_string(pad_lh); + key += int_to_string(pad_lw); + key += int_to_string(pad_rh); + key += int_to_string(pad_rw); + key += int_to_string(alg_kind); + + return this->get_op(key); + } + + void set_pooling2d_fwd(mkldnn::memory::dims src_d, + mkldnn::memory::dims dst_d, + int ker_h, int ker_w, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw, + mkldnn::algorithm alg_kind, + Op *op) { + std::string key = POOLING2D_FWD_PREFIX; + + key += dims_to_string(src_d); + key += dims_to_string(dst_d); + key += int_to_string(ker_h); + key += int_to_string(ker_w); + key += int_to_string(sy); + key += int_to_string(sx); + key += int_to_string(pad_lh); + key += int_to_string(pad_lw); + key += int_to_string(pad_rh); + key += int_to_string(pad_rw); + key += int_to_string(alg_kind); + + this->set_op(key, op); + } +}; + +#endif // _POOLING_FWD_FACTORY_ diff --git a/python/ideep4py/include/primitives/prim_mgr/prim_factory.h b/python/ideep4py/include/primitives/prim_mgr/prim_factory.h new file mode 100644 index 00000000..02a3c827 --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/prim_factory.h @@ -0,0 +1,43 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _PRIM_FACTORY_ +#define _PRIM_FACTORY_ + +#include "reorder_factory.h" +#include "conv_fwd_factory.h" +#include "conv_bwd_data_factory.h" +#include "conv_bwd_weights_factory.h" +#include "pooling_fwd_factory.h" +#include "pooling_bwd_factory.h" +#include "eltwise_fwd_factory.h" +#include "eltwise_bwd_factory.h" +#include "bn_fwd_factory.h" +#include "bn_bwd_factory.h" +#include "concat_fwd_factory.h" +#include "concat_bwd_factory.h" +#include "lrn_fwd_factory.h" +#include "lrn_bwd_factory.h" + +#endif // _PRIM_FACTORY_ diff --git a/python/ideep4py/include/primitives/prim_mgr/reorder_factory.h b/python/ideep4py/include/primitives/prim_mgr/reorder_factory.h new file mode 100644 index 00000000..41293fbb --- /dev/null +++ b/python/ideep4py/include/primitives/prim_mgr/reorder_factory.h @@ -0,0 +1,93 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _REORDER_FACTORY_ +#define _REORDER_FACTORY_ +#include +#include +#include "op.h" +#include "op_factory.h" +#include +#include "utils.h" +#include "reorder_op.h" + +template +class ReorderFactory : public OpFactory +{ +private: + ReorderFactory() {} + ~ReorderFactory() {} + +public: + static ReorderOp* get(mkldnn::memory::dims dims, mkldnn::memory::format src_fmt, mkldnn::memory::format dst_fmt) { + ReorderOp* reorder_op = NULL; + + //try to find a suitable one in pool + reorder_op = dynamic_cast*> ( + ReorderFactory::get_instance().get_reorder(dims, src_fmt, dst_fmt)); + + if (reorder_op == NULL) { + //LOG(INFO) << "create a new one for reorder"; + reorder_op = new ReorderOp( dims, src_fmt, dst_fmt); + ReorderFactory::get_instance().set_reorder( dims, src_fmt, dst_fmt, reorder_op); + } else { + //LOG(INFO) << "reuse exist one for reorder"; + } + return reorder_op; + } + + static ReorderFactory& get_instance() { + static ReorderFactory instance_; + return instance_; + } + +private: +#define REORDER_PREFIX "reorder_" + Op* get_reorder(mkldnn::memory::dims dims, + mkldnn::memory::format src_fmt, + mkldnn::memory::format dst_fmt) { + std::string key = REORDER_PREFIX; + + key += dims_to_string(dims); + key += int_to_string((int)src_fmt); + key += int_to_string((int)dst_fmt); + + return this->get_op(key); + } + + void set_reorder(mkldnn::memory::dims dims, + mkldnn::memory::format src_fmt, + mkldnn::memory::format dst_fmt, + Op *op) { + std::string key = REORDER_PREFIX; + + key += dims_to_string(dims); + key += int_to_string((int)src_fmt); + key += int_to_string((int)dst_fmt); + + this->set_op(key, op); + } +}; + +#endif // _REORDER_FACTORY_ diff --git a/python/ideep4py/include/swigpyrun.h b/python/ideep4py/include/swigpyrun.h new file mode 100644 index 00000000..f9bcd6c4 --- /dev/null +++ b/python/ideep4py/include/swigpyrun.h @@ -0,0 +1,2988 @@ +/* ---------------------------------------------------------------------------- + * This file was automatically generated by SWIG (http://www.swig.org). + * Version 3.0.12 + * + * This file is not intended to be easily readable and contains a number of + * coding conventions designed to improve portability and efficiency. Do not make + * changes to this file unless you know what you are doing--modify the SWIG + * interface file instead. + * ----------------------------------------------------------------------------- */ +#if !defined(SWIGPYTHON) +#define SWIGPYTHON + +#define SWIGPYTHON_BUILTIN +/* ----------------------------------------------------------------------------- + * This section contains generic SWIG labels for method/variable + * declarations/attributes, and other compiler dependent labels. + * ----------------------------------------------------------------------------- */ + +/* template workaround for compilers that cannot correctly implement the C++ standard */ +#ifndef SWIGTEMPLATEDISAMBIGUATOR +# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560) +# define SWIGTEMPLATEDISAMBIGUATOR template +# elif defined(__HP_aCC) +/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */ +/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */ +# define SWIGTEMPLATEDISAMBIGUATOR template +# else +# define SWIGTEMPLATEDISAMBIGUATOR +# endif +#endif + +/* inline attribute */ +#ifndef SWIGINLINE +# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__)) +# define SWIGINLINE inline +# else +# define SWIGINLINE +# endif +#endif + +/* attribute recognised by some compilers to avoid 'unused' warnings */ +#ifndef SWIGUNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +# elif defined(__ICC) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +#endif + +#ifndef SWIG_MSC_UNSUPPRESS_4505 +# if defined(_MSC_VER) +# pragma warning(disable : 4505) /* unreferenced local function has been removed */ +# endif +#endif + +#ifndef SWIGUNUSEDPARM +# ifdef __cplusplus +# define SWIGUNUSEDPARM(p) +# else +# define SWIGUNUSEDPARM(p) p SWIGUNUSED +# endif +#endif + +/* internal SWIG method */ +#ifndef SWIGINTERN +# define SWIGINTERN static SWIGUNUSED +#endif + +/* internal inline SWIG method */ +#ifndef SWIGINTERNINLINE +# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE +#endif + +/* exporting methods */ +#if defined(__GNUC__) +# if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +# ifndef GCC_HASCLASSVISIBILITY +# define GCC_HASCLASSVISIBILITY +# endif +# endif +#endif + +#ifndef SWIGEXPORT +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# if defined(STATIC_LINKED) +# define SWIGEXPORT +# else +# define SWIGEXPORT __declspec(dllexport) +# endif +# else +# if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY) +# define SWIGEXPORT __attribute__ ((visibility("default"))) +# else +# define SWIGEXPORT +# endif +# endif +#endif + +/* calling conventions for Windows */ +#ifndef SWIGSTDCALL +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# define SWIGSTDCALL __stdcall +# else +# define SWIGSTDCALL +# endif +#endif + +/* Deal with Microsoft's attempt at deprecating C standard runtime functions */ +#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE) +# define _CRT_SECURE_NO_DEPRECATE +#endif + +/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */ +#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE) +# define _SCL_SECURE_NO_DEPRECATE +#endif + +/* Deal with Apple's deprecated 'AssertMacros.h' from Carbon-framework */ +#if defined(__APPLE__) && !defined(__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES) +# define __ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES 0 +#endif + +/* Intel's compiler complains if a variable which was never initialised is + * cast to void, which is a common idiom which we use to indicate that we + * are aware a variable isn't used. So we just silence that warning. + * See: https://github.com/swig/swig/issues/192 for more discussion. + */ +#ifdef __INTEL_COMPILER +# pragma warning disable 592 +#endif +/* Errors in SWIG */ +#define SWIG_UnknownError -1 +#define SWIG_IOError -2 +#define SWIG_RuntimeError -3 +#define SWIG_IndexError -4 +#define SWIG_TypeError -5 +#define SWIG_DivisionByZero -6 +#define SWIG_OverflowError -7 +#define SWIG_SyntaxError -8 +#define SWIG_ValueError -9 +#define SWIG_SystemError -10 +#define SWIG_AttributeError -11 +#define SWIG_MemoryError -12 +#define SWIG_NullReferenceError -13 + + +/* ----------------------------------------------------------------------------- + * swigrun.swg + * + * This file contains generic C API SWIG runtime support for pointer + * type checking. + * ----------------------------------------------------------------------------- */ + +/* This should only be incremented when either the layout of swig_type_info changes, + or for whatever reason, the runtime changes incompatibly */ +#define SWIG_RUNTIME_VERSION "4" + +/* define SWIG_TYPE_TABLE_NAME as "SWIG_TYPE_TABLE" */ +#ifdef SWIG_TYPE_TABLE +# define SWIG_QUOTE_STRING(x) #x +# define SWIG_EXPAND_AND_QUOTE_STRING(x) SWIG_QUOTE_STRING(x) +# define SWIG_TYPE_TABLE_NAME SWIG_EXPAND_AND_QUOTE_STRING(SWIG_TYPE_TABLE) +#else +# define SWIG_TYPE_TABLE_NAME +#endif + +/* + You can use the SWIGRUNTIME and SWIGRUNTIMEINLINE macros for + creating a static or dynamic library from the SWIG runtime code. + In 99.9% of the cases, SWIG just needs to declare them as 'static'. + + But only do this if strictly necessary, ie, if you have problems + with your compiler or suchlike. +*/ + +#ifndef SWIGRUNTIME +# define SWIGRUNTIME SWIGINTERN +#endif + +#ifndef SWIGRUNTIMEINLINE +# define SWIGRUNTIMEINLINE SWIGRUNTIME SWIGINLINE +#endif + +/* Generic buffer size */ +#ifndef SWIG_BUFFER_SIZE +# define SWIG_BUFFER_SIZE 1024 +#endif + +/* Flags for pointer conversions */ +#define SWIG_POINTER_DISOWN 0x1 +#define SWIG_CAST_NEW_MEMORY 0x2 + +/* Flags for new pointer objects */ +#define SWIG_POINTER_OWN 0x1 + + +/* + Flags/methods for returning states. + + The SWIG conversion methods, as ConvertPtr, return an integer + that tells if the conversion was successful or not. And if not, + an error code can be returned (see swigerrors.swg for the codes). + + Use the following macros/flags to set or process the returning + states. + + In old versions of SWIG, code such as the following was usually written: + + if (SWIG_ConvertPtr(obj,vptr,ty.flags) != -1) { + // success code + } else { + //fail code + } + + Now you can be more explicit: + + int res = SWIG_ConvertPtr(obj,vptr,ty.flags); + if (SWIG_IsOK(res)) { + // success code + } else { + // fail code + } + + which is the same really, but now you can also do + + Type *ptr; + int res = SWIG_ConvertPtr(obj,(void **)(&ptr),ty.flags); + if (SWIG_IsOK(res)) { + // success code + if (SWIG_IsNewObj(res) { + ... + delete *ptr; + } else { + ... + } + } else { + // fail code + } + + I.e., now SWIG_ConvertPtr can return new objects and you can + identify the case and take care of the deallocation. Of course that + also requires SWIG_ConvertPtr to return new result values, such as + + int SWIG_ConvertPtr(obj, ptr,...) { + if () { + if () { + *ptr = ; + return SWIG_NEWOBJ; + } else { + *ptr = ; + return SWIG_OLDOBJ; + } + } else { + return SWIG_BADOBJ; + } + } + + Of course, returning the plain '0(success)/-1(fail)' still works, but you can be + more explicit by returning SWIG_BADOBJ, SWIG_ERROR or any of the + SWIG errors code. + + Finally, if the SWIG_CASTRANK_MODE is enabled, the result code + allows to return the 'cast rank', for example, if you have this + + int food(double) + int fooi(int); + + and you call + + food(1) // cast rank '1' (1 -> 1.0) + fooi(1) // cast rank '0' + + just use the SWIG_AddCast()/SWIG_CheckState() +*/ + +#define SWIG_OK (0) +#define SWIG_ERROR (-1) +#define SWIG_IsOK(r) (r >= 0) +#define SWIG_ArgError(r) ((r != SWIG_ERROR) ? r : SWIG_TypeError) + +/* The CastRankLimit says how many bits are used for the cast rank */ +#define SWIG_CASTRANKLIMIT (1 << 8) +/* The NewMask denotes the object was created (using new/malloc) */ +#define SWIG_NEWOBJMASK (SWIG_CASTRANKLIMIT << 1) +/* The TmpMask is for in/out typemaps that use temporal objects */ +#define SWIG_TMPOBJMASK (SWIG_NEWOBJMASK << 1) +/* Simple returning values */ +#define SWIG_BADOBJ (SWIG_ERROR) +#define SWIG_OLDOBJ (SWIG_OK) +#define SWIG_NEWOBJ (SWIG_OK | SWIG_NEWOBJMASK) +#define SWIG_TMPOBJ (SWIG_OK | SWIG_TMPOBJMASK) +/* Check, add and del mask methods */ +#define SWIG_AddNewMask(r) (SWIG_IsOK(r) ? (r | SWIG_NEWOBJMASK) : r) +#define SWIG_DelNewMask(r) (SWIG_IsOK(r) ? (r & ~SWIG_NEWOBJMASK) : r) +#define SWIG_IsNewObj(r) (SWIG_IsOK(r) && (r & SWIG_NEWOBJMASK)) +#define SWIG_AddTmpMask(r) (SWIG_IsOK(r) ? (r | SWIG_TMPOBJMASK) : r) +#define SWIG_DelTmpMask(r) (SWIG_IsOK(r) ? (r & ~SWIG_TMPOBJMASK) : r) +#define SWIG_IsTmpObj(r) (SWIG_IsOK(r) && (r & SWIG_TMPOBJMASK)) + +/* Cast-Rank Mode */ +#if defined(SWIG_CASTRANK_MODE) +# ifndef SWIG_TypeRank +# define SWIG_TypeRank unsigned long +# endif +# ifndef SWIG_MAXCASTRANK /* Default cast allowed */ +# define SWIG_MAXCASTRANK (2) +# endif +# define SWIG_CASTRANKMASK ((SWIG_CASTRANKLIMIT) -1) +# define SWIG_CastRank(r) (r & SWIG_CASTRANKMASK) +SWIGINTERNINLINE int SWIG_AddCast(int r) { + return SWIG_IsOK(r) ? ((SWIG_CastRank(r) < SWIG_MAXCASTRANK) ? (r + 1) : SWIG_ERROR) : r; +} +SWIGINTERNINLINE int SWIG_CheckState(int r) { + return SWIG_IsOK(r) ? SWIG_CastRank(r) + 1 : 0; +} +#else /* no cast-rank mode */ +# define SWIG_AddCast(r) (r) +# define SWIG_CheckState(r) (SWIG_IsOK(r) ? 1 : 0) +#endif + + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void *(*swig_converter_func)(void *, int *); +typedef struct swig_type_info *(*swig_dycast_func)(void **); + +/* Structure to store information on one type */ +typedef struct swig_type_info { + const char *name; /* mangled name of this type */ + const char *str; /* human readable name of this type */ + swig_dycast_func dcast; /* dynamic cast function down a hierarchy */ + struct swig_cast_info *cast; /* linked list of types that can cast into this type */ + void *clientdata; /* language specific type data */ + int owndata; /* flag if the structure owns the clientdata */ +} swig_type_info; + +/* Structure to store a type and conversion function used for casting */ +typedef struct swig_cast_info { + swig_type_info *type; /* pointer to type that is equivalent to this type */ + swig_converter_func converter; /* function to cast the void pointers */ + struct swig_cast_info *next; /* pointer to next cast in linked list */ + struct swig_cast_info *prev; /* pointer to the previous cast */ +} swig_cast_info; + +/* Structure used to store module information + * Each module generates one structure like this, and the runtime collects + * all of these structures and stores them in a circularly linked list.*/ +typedef struct swig_module_info { + swig_type_info **types; /* Array of pointers to swig_type_info structures that are in this module */ + size_t size; /* Number of types in this module */ + struct swig_module_info *next; /* Pointer to next element in circularly linked list */ + swig_type_info **type_initial; /* Array of initially generated type structures */ + swig_cast_info **cast_initial; /* Array of initially generated casting structures */ + void *clientdata; /* Language specific module data */ +} swig_module_info; + +/* + Compare two type names skipping the space characters, therefore + "char*" == "char *" and "Class" == "Class", etc. + + Return 0 when the two name types are equivalent, as in + strncmp, but skipping ' '. +*/ +SWIGRUNTIME int +SWIG_TypeNameComp(const char *f1, const char *l1, + const char *f2, const char *l2) { + for (;(f1 != l1) && (f2 != l2); ++f1, ++f2) { + while ((*f1 == ' ') && (f1 != l1)) ++f1; + while ((*f2 == ' ') && (f2 != l2)) ++f2; + if (*f1 != *f2) return (*f1 > *f2) ? 1 : -1; + } + return (int)((l1 - f1) - (l2 - f2)); +} + +/* + Check type equivalence in a name list like ||... + Return 0 if equal, -1 if nb < tb, 1 if nb > tb +*/ +SWIGRUNTIME int +SWIG_TypeCmp(const char *nb, const char *tb) { + int equiv = 1; + const char* te = tb + strlen(tb); + const char* ne = nb; + while (equiv != 0 && *ne) { + for (nb = ne; *ne; ++ne) { + if (*ne == '|') break; + } + equiv = SWIG_TypeNameComp(nb, ne, tb, te); + if (*ne) ++ne; + } + return equiv; +} + +/* + Check type equivalence in a name list like ||... + Return 0 if not equal, 1 if equal +*/ +SWIGRUNTIME int +SWIG_TypeEquiv(const char *nb, const char *tb) { + return SWIG_TypeCmp(nb, tb) == 0 ? 1 : 0; +} + +/* + Check the typename +*/ +SWIGRUNTIME swig_cast_info * +SWIG_TypeCheck(const char *c, swig_type_info *ty) { + if (ty) { + swig_cast_info *iter = ty->cast; + while (iter) { + if (strcmp(iter->type->name, c) == 0) { + if (iter == ty->cast) + return iter; + /* Move iter to the top of the linked list */ + iter->prev->next = iter->next; + if (iter->next) + iter->next->prev = iter->prev; + iter->next = ty->cast; + iter->prev = 0; + if (ty->cast) ty->cast->prev = iter; + ty->cast = iter; + return iter; + } + iter = iter->next; + } + } + return 0; +} + +/* + Identical to SWIG_TypeCheck, except strcmp is replaced with a pointer comparison +*/ +SWIGRUNTIME swig_cast_info * +SWIG_TypeCheckStruct(swig_type_info *from, swig_type_info *ty) { + if (ty) { + swig_cast_info *iter = ty->cast; + while (iter) { + if (iter->type == from) { + if (iter == ty->cast) + return iter; + /* Move iter to the top of the linked list */ + iter->prev->next = iter->next; + if (iter->next) + iter->next->prev = iter->prev; + iter->next = ty->cast; + iter->prev = 0; + if (ty->cast) ty->cast->prev = iter; + ty->cast = iter; + return iter; + } + iter = iter->next; + } + } + return 0; +} + +/* + Cast a pointer up an inheritance hierarchy +*/ +SWIGRUNTIMEINLINE void * +SWIG_TypeCast(swig_cast_info *ty, void *ptr, int *newmemory) { + return ((!ty) || (!ty->converter)) ? ptr : (*ty->converter)(ptr, newmemory); +} + +/* + Dynamic pointer casting. Down an inheritance hierarchy +*/ +SWIGRUNTIME swig_type_info * +SWIG_TypeDynamicCast(swig_type_info *ty, void **ptr) { + swig_type_info *lastty = ty; + if (!ty || !ty->dcast) return ty; + while (ty && (ty->dcast)) { + ty = (*ty->dcast)(ptr); + if (ty) lastty = ty; + } + return lastty; +} + +/* + Return the name associated with this type +*/ +SWIGRUNTIMEINLINE const char * +SWIG_TypeName(const swig_type_info *ty) { + return ty->name; +} + +/* + Return the pretty name associated with this type, + that is an unmangled type name in a form presentable to the user. +*/ +SWIGRUNTIME const char * +SWIG_TypePrettyName(const swig_type_info *type) { + /* The "str" field contains the equivalent pretty names of the + type, separated by vertical-bar characters. We choose + to print the last name, as it is often (?) the most + specific. */ + if (!type) return NULL; + if (type->str != NULL) { + const char *last_name = type->str; + const char *s; + for (s = type->str; *s; s++) + if (*s == '|') last_name = s+1; + return last_name; + } + else + return type->name; +} + +/* + Set the clientdata field for a type +*/ +SWIGRUNTIME void +SWIG_TypeClientData(swig_type_info *ti, void *clientdata) { + swig_cast_info *cast = ti->cast; + /* if (ti->clientdata == clientdata) return; */ + ti->clientdata = clientdata; + + while (cast) { + if (!cast->converter) { + swig_type_info *tc = cast->type; + if (!tc->clientdata) { + SWIG_TypeClientData(tc, clientdata); + } + } + cast = cast->next; + } +} +SWIGRUNTIME void +SWIG_TypeNewClientData(swig_type_info *ti, void *clientdata) { + SWIG_TypeClientData(ti, clientdata); + ti->owndata = 1; +} + +/* + Search for a swig_type_info structure only by mangled name + Search is a O(log #types) + + We start searching at module start, and finish searching when start == end. + Note: if start == end at the beginning of the function, we go all the way around + the circular list. +*/ +SWIGRUNTIME swig_type_info * +SWIG_MangledTypeQueryModule(swig_module_info *start, + swig_module_info *end, + const char *name) { + swig_module_info *iter = start; + do { + if (iter->size) { + size_t l = 0; + size_t r = iter->size - 1; + do { + /* since l+r >= 0, we can (>> 1) instead (/ 2) */ + size_t i = (l + r) >> 1; + const char *iname = iter->types[i]->name; + if (iname) { + int compare = strcmp(name, iname); + if (compare == 0) { + return iter->types[i]; + } else if (compare < 0) { + if (i) { + r = i - 1; + } else { + break; + } + } else if (compare > 0) { + l = i + 1; + } + } else { + break; /* should never happen */ + } + } while (l <= r); + } + iter = iter->next; + } while (iter != end); + return 0; +} + +/* + Search for a swig_type_info structure for either a mangled name or a human readable name. + It first searches the mangled names of the types, which is a O(log #types) + If a type is not found it then searches the human readable names, which is O(#types). + + We start searching at module start, and finish searching when start == end. + Note: if start == end at the beginning of the function, we go all the way around + the circular list. +*/ +SWIGRUNTIME swig_type_info * +SWIG_TypeQueryModule(swig_module_info *start, + swig_module_info *end, + const char *name) { + /* STEP 1: Search the name field using binary search */ + swig_type_info *ret = SWIG_MangledTypeQueryModule(start, end, name); + if (ret) { + return ret; + } else { + /* STEP 2: If the type hasn't been found, do a complete search + of the str field (the human readable name) */ + swig_module_info *iter = start; + do { + size_t i = 0; + for (; i < iter->size; ++i) { + if (iter->types[i]->str && (SWIG_TypeEquiv(iter->types[i]->str, name))) + return iter->types[i]; + } + iter = iter->next; + } while (iter != end); + } + + /* neither found a match */ + return 0; +} + +/* + Pack binary data into a string +*/ +SWIGRUNTIME char * +SWIG_PackData(char *c, void *ptr, size_t sz) { + static const char hex[17] = "0123456789abcdef"; + const unsigned char *u = (unsigned char *) ptr; + const unsigned char *eu = u + sz; + for (; u != eu; ++u) { + unsigned char uu = *u; + *(c++) = hex[(uu & 0xf0) >> 4]; + *(c++) = hex[uu & 0xf]; + } + return c; +} + +/* + Unpack binary data from a string +*/ +SWIGRUNTIME const char * +SWIG_UnpackData(const char *c, void *ptr, size_t sz) { + unsigned char *u = (unsigned char *) ptr; + const unsigned char *eu = u + sz; + for (; u != eu; ++u) { + char d = *(c++); + unsigned char uu; + if ((d >= '0') && (d <= '9')) + uu = (unsigned char)((d - '0') << 4); + else if ((d >= 'a') && (d <= 'f')) + uu = (unsigned char)((d - ('a'-10)) << 4); + else + return (char *) 0; + d = *(c++); + if ((d >= '0') && (d <= '9')) + uu |= (unsigned char)(d - '0'); + else if ((d >= 'a') && (d <= 'f')) + uu |= (unsigned char)(d - ('a'-10)); + else + return (char *) 0; + *u = uu; + } + return c; +} + +/* + Pack 'void *' into a string buffer. +*/ +SWIGRUNTIME char * +SWIG_PackVoidPtr(char *buff, void *ptr, const char *name, size_t bsz) { + char *r = buff; + if ((2*sizeof(void *) + 2) > bsz) return 0; + *(r++) = '_'; + r = SWIG_PackData(r,&ptr,sizeof(void *)); + if (strlen(name) + 1 > (bsz - (r - buff))) return 0; + strcpy(r,name); + return buff; +} + +SWIGRUNTIME const char * +SWIG_UnpackVoidPtr(const char *c, void **ptr, const char *name) { + if (*c != '_') { + if (strcmp(c,"NULL") == 0) { + *ptr = (void *) 0; + return name; + } else { + return 0; + } + } + return SWIG_UnpackData(++c,ptr,sizeof(void *)); +} + +SWIGRUNTIME char * +SWIG_PackDataName(char *buff, void *ptr, size_t sz, const char *name, size_t bsz) { + char *r = buff; + size_t lname = (name ? strlen(name) : 0); + if ((2*sz + 2 + lname) > bsz) return 0; + *(r++) = '_'; + r = SWIG_PackData(r,ptr,sz); + if (lname) { + strncpy(r,name,lname+1); + } else { + *r = 0; + } + return buff; +} + +SWIGRUNTIME const char * +SWIG_UnpackDataName(const char *c, void *ptr, size_t sz, const char *name) { + if (*c != '_') { + if (strcmp(c,"NULL") == 0) { + memset(ptr,0,sz); + return name; + } else { + return 0; + } + } + return SWIG_UnpackData(++c,ptr,sz); +} + +#ifdef __cplusplus +} +#endif +/* Compatibility macros for Python 3 */ +#if PY_VERSION_HEX >= 0x03000000 + +#define PyClass_Check(obj) PyObject_IsInstance(obj, (PyObject *)&PyType_Type) +#define PyInt_Check(x) PyLong_Check(x) +#define PyInt_AsLong(x) PyLong_AsLong(x) +#define PyInt_FromLong(x) PyLong_FromLong(x) +#define PyInt_FromSize_t(x) PyLong_FromSize_t(x) +#define PyString_Check(name) PyBytes_Check(name) +#define PyString_FromString(x) PyUnicode_FromString(x) +#define PyString_Format(fmt, args) PyUnicode_Format(fmt, args) +#define PyString_AsString(str) PyBytes_AsString(str) +#define PyString_Size(str) PyBytes_Size(str) +#define PyString_InternFromString(key) PyUnicode_InternFromString(key) +#define Py_TPFLAGS_HAVE_CLASS Py_TPFLAGS_BASETYPE +#define PyString_AS_STRING(x) PyUnicode_AS_STRING(x) +#define _PyLong_FromSsize_t(x) PyLong_FromSsize_t(x) + +#endif + +#ifndef Py_TYPE +# define Py_TYPE(op) ((op)->ob_type) +#endif + +/* SWIG APIs for compatibility of both Python 2 & 3 */ + +#if PY_VERSION_HEX >= 0x03000000 +# define SWIG_Python_str_FromFormat PyUnicode_FromFormat +#else +# define SWIG_Python_str_FromFormat PyString_FromFormat +#endif + + +/* Warning: This function will allocate a new string in Python 3, + * so please call SWIG_Python_str_DelForPy3(x) to free the space. + */ +SWIGINTERN char* +SWIG_Python_str_AsChar(PyObject *str) +{ +#if PY_VERSION_HEX >= 0x03000000 + char *cstr; + char *newstr; + Py_ssize_t len; + str = PyUnicode_AsUTF8String(str); + PyBytes_AsStringAndSize(str, &cstr, &len); + newstr = (char *) malloc(len+1); + memcpy(newstr, cstr, len+1); + Py_XDECREF(str); + return newstr; +#else + return PyString_AsString(str); +#endif +} + +#if PY_VERSION_HEX >= 0x03000000 +# define SWIG_Python_str_DelForPy3(x) free( (void*) (x) ) +#else +# define SWIG_Python_str_DelForPy3(x) +#endif + + +SWIGINTERN PyObject* +SWIG_Python_str_FromChar(const char *c) +{ +#if PY_VERSION_HEX >= 0x03000000 + return PyUnicode_FromString(c); +#else + return PyString_FromString(c); +#endif +} + +/* Add PyOS_snprintf for old Pythons */ +#if PY_VERSION_HEX < 0x02020000 +# if defined(_MSC_VER) || defined(__BORLANDC__) || defined(_WATCOM) +# define PyOS_snprintf _snprintf +# else +# define PyOS_snprintf snprintf +# endif +#endif + +/* A crude PyString_FromFormat implementation for old Pythons */ +#if PY_VERSION_HEX < 0x02020000 + +#ifndef SWIG_PYBUFFER_SIZE +# define SWIG_PYBUFFER_SIZE 1024 +#endif + +static PyObject * +PyString_FromFormat(const char *fmt, ...) { + va_list ap; + char buf[SWIG_PYBUFFER_SIZE * 2]; + int res; + va_start(ap, fmt); + res = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + return (res < 0 || res >= (int)sizeof(buf)) ? 0 : PyString_FromString(buf); +} +#endif + +#ifndef PyObject_DEL +# define PyObject_DEL PyObject_Del +#endif + +/* A crude PyExc_StopIteration exception for old Pythons */ +#if PY_VERSION_HEX < 0x02020000 +# ifndef PyExc_StopIteration +# define PyExc_StopIteration PyExc_RuntimeError +# endif +# ifndef PyObject_GenericGetAttr +# define PyObject_GenericGetAttr 0 +# endif +#endif + +/* Py_NotImplemented is defined in 2.1 and up. */ +#if PY_VERSION_HEX < 0x02010000 +# ifndef Py_NotImplemented +# define Py_NotImplemented PyExc_RuntimeError +# endif +#endif + +/* A crude PyString_AsStringAndSize implementation for old Pythons */ +#if PY_VERSION_HEX < 0x02010000 +# ifndef PyString_AsStringAndSize +# define PyString_AsStringAndSize(obj, s, len) {*s = PyString_AsString(obj); *len = *s ? strlen(*s) : 0;} +# endif +#endif + +/* PySequence_Size for old Pythons */ +#if PY_VERSION_HEX < 0x02000000 +# ifndef PySequence_Size +# define PySequence_Size PySequence_Length +# endif +#endif + +/* PyBool_FromLong for old Pythons */ +#if PY_VERSION_HEX < 0x02030000 +static +PyObject *PyBool_FromLong(long ok) +{ + PyObject *result = ok ? Py_True : Py_False; + Py_INCREF(result); + return result; +} +#endif + +/* Py_ssize_t for old Pythons */ +/* This code is as recommended by: */ +/* http://www.python.org/dev/peps/pep-0353/#conversion-guidelines */ +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +# define PY_SSIZE_T_MAX INT_MAX +# define PY_SSIZE_T_MIN INT_MIN +typedef inquiry lenfunc; +typedef intargfunc ssizeargfunc; +typedef intintargfunc ssizessizeargfunc; +typedef intobjargproc ssizeobjargproc; +typedef intintobjargproc ssizessizeobjargproc; +typedef getreadbufferproc readbufferproc; +typedef getwritebufferproc writebufferproc; +typedef getsegcountproc segcountproc; +typedef getcharbufferproc charbufferproc; +static long PyNumber_AsSsize_t (PyObject *x, void *SWIGUNUSEDPARM(exc)) +{ + long result = 0; + PyObject *i = PyNumber_Int(x); + if (i) { + result = PyInt_AsLong(i); + Py_DECREF(i); + } + return result; +} +#endif + +#if PY_VERSION_HEX < 0x02050000 +#define PyInt_FromSize_t(x) PyInt_FromLong((long)x) +#endif + +#if PY_VERSION_HEX < 0x02040000 +#define Py_VISIT(op) \ + do { \ + if (op) { \ + int vret = visit((op), arg); \ + if (vret) \ + return vret; \ + } \ + } while (0) +#endif + +#if PY_VERSION_HEX < 0x02030000 +typedef struct { + PyTypeObject type; + PyNumberMethods as_number; + PyMappingMethods as_mapping; + PySequenceMethods as_sequence; + PyBufferProcs as_buffer; + PyObject *name, *slots; +} PyHeapTypeObject; +#endif + +#if PY_VERSION_HEX < 0x02030000 +typedef destructor freefunc; +#endif + +#if ((PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION > 6) || \ + (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION > 0) || \ + (PY_MAJOR_VERSION > 3)) +# define SWIGPY_USE_CAPSULE +# define SWIGPY_CAPSULE_NAME ((char*)"swig_runtime_data" SWIG_RUNTIME_VERSION ".type_pointer_capsule" SWIG_TYPE_TABLE_NAME) +#endif + +#if PY_VERSION_HEX < 0x03020000 +#define PyDescr_TYPE(x) (((PyDescrObject *)(x))->d_type) +#define PyDescr_NAME(x) (((PyDescrObject *)(x))->d_name) +#define Py_hash_t long +#endif +/* ----------------------------------------------------------------------------- + * error manipulation + * ----------------------------------------------------------------------------- */ + +SWIGRUNTIME PyObject* +SWIG_Python_ErrorType(int code) { + PyObject* type = 0; + switch(code) { + case SWIG_MemoryError: + type = PyExc_MemoryError; + break; + case SWIG_IOError: + type = PyExc_IOError; + break; + case SWIG_RuntimeError: + type = PyExc_RuntimeError; + break; + case SWIG_IndexError: + type = PyExc_IndexError; + break; + case SWIG_TypeError: + type = PyExc_TypeError; + break; + case SWIG_DivisionByZero: + type = PyExc_ZeroDivisionError; + break; + case SWIG_OverflowError: + type = PyExc_OverflowError; + break; + case SWIG_SyntaxError: + type = PyExc_SyntaxError; + break; + case SWIG_ValueError: + type = PyExc_ValueError; + break; + case SWIG_SystemError: + type = PyExc_SystemError; + break; + case SWIG_AttributeError: + type = PyExc_AttributeError; + break; + default: + type = PyExc_RuntimeError; + } + return type; +} + + +SWIGRUNTIME void +SWIG_Python_AddErrorMsg(const char* mesg) +{ + PyObject *type = 0; + PyObject *value = 0; + PyObject *traceback = 0; + + if (PyErr_Occurred()) PyErr_Fetch(&type, &value, &traceback); + if (value) { + char *tmp; + PyObject *old_str = PyObject_Str(value); + PyErr_Clear(); + Py_XINCREF(type); + + PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg); + SWIG_Python_str_DelForPy3(tmp); + Py_DECREF(old_str); + Py_DECREF(value); + } else { + PyErr_SetString(PyExc_RuntimeError, mesg); + } +} +#if defined(SWIG_PYTHON_NO_THREADS) +# if defined(SWIG_PYTHON_THREADS) +# undef SWIG_PYTHON_THREADS +# endif +#endif +#if defined(SWIG_PYTHON_THREADS) /* Threading support is enabled */ +# if !defined(SWIG_PYTHON_USE_GIL) && !defined(SWIG_PYTHON_NO_USE_GIL) +# if (PY_VERSION_HEX >= 0x02030000) /* For 2.3 or later, use the PyGILState calls */ +# define SWIG_PYTHON_USE_GIL +# endif +# endif +# if defined(SWIG_PYTHON_USE_GIL) /* Use PyGILState threads calls */ +# ifndef SWIG_PYTHON_INITIALIZE_THREADS +# define SWIG_PYTHON_INITIALIZE_THREADS PyEval_InitThreads() +# endif +# ifdef __cplusplus /* C++ code */ + class SWIG_Python_Thread_Block { + bool status; + PyGILState_STATE state; + public: + void end() { if (status) { PyGILState_Release(state); status = false;} } + SWIG_Python_Thread_Block() : status(true), state(PyGILState_Ensure()) {} + ~SWIG_Python_Thread_Block() { end(); } + }; + class SWIG_Python_Thread_Allow { + bool status; + PyThreadState *save; + public: + void end() { if (status) { PyEval_RestoreThread(save); status = false; }} + SWIG_Python_Thread_Allow() : status(true), save(PyEval_SaveThread()) {} + ~SWIG_Python_Thread_Allow() { end(); } + }; +# define SWIG_PYTHON_THREAD_BEGIN_BLOCK SWIG_Python_Thread_Block _swig_thread_block +# define SWIG_PYTHON_THREAD_END_BLOCK _swig_thread_block.end() +# define SWIG_PYTHON_THREAD_BEGIN_ALLOW SWIG_Python_Thread_Allow _swig_thread_allow +# define SWIG_PYTHON_THREAD_END_ALLOW _swig_thread_allow.end() +# else /* C code */ +# define SWIG_PYTHON_THREAD_BEGIN_BLOCK PyGILState_STATE _swig_thread_block = PyGILState_Ensure() +# define SWIG_PYTHON_THREAD_END_BLOCK PyGILState_Release(_swig_thread_block) +# define SWIG_PYTHON_THREAD_BEGIN_ALLOW PyThreadState *_swig_thread_allow = PyEval_SaveThread() +# define SWIG_PYTHON_THREAD_END_ALLOW PyEval_RestoreThread(_swig_thread_allow) +# endif +# else /* Old thread way, not implemented, user must provide it */ +# if !defined(SWIG_PYTHON_INITIALIZE_THREADS) +# define SWIG_PYTHON_INITIALIZE_THREADS +# endif +# if !defined(SWIG_PYTHON_THREAD_BEGIN_BLOCK) +# define SWIG_PYTHON_THREAD_BEGIN_BLOCK +# endif +# if !defined(SWIG_PYTHON_THREAD_END_BLOCK) +# define SWIG_PYTHON_THREAD_END_BLOCK +# endif +# if !defined(SWIG_PYTHON_THREAD_BEGIN_ALLOW) +# define SWIG_PYTHON_THREAD_BEGIN_ALLOW +# endif +# if !defined(SWIG_PYTHON_THREAD_END_ALLOW) +# define SWIG_PYTHON_THREAD_END_ALLOW +# endif +# endif +#else /* No thread support */ +# define SWIG_PYTHON_INITIALIZE_THREADS +# define SWIG_PYTHON_THREAD_BEGIN_BLOCK +# define SWIG_PYTHON_THREAD_END_BLOCK +# define SWIG_PYTHON_THREAD_BEGIN_ALLOW +# define SWIG_PYTHON_THREAD_END_ALLOW +#endif +/* ----------------------------------------------------------------------------- + * Python API portion that goes into the runtime + * ----------------------------------------------------------------------------- */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* ----------------------------------------------------------------------------- + * Constant declarations + * ----------------------------------------------------------------------------- */ + +/* Constant Types */ +#define SWIG_PY_POINTER 4 +#define SWIG_PY_BINARY 5 + +/* Constant information structure */ +typedef struct swig_const_info { + int type; + char *name; + long lvalue; + double dvalue; + void *pvalue; + swig_type_info **ptype; +} swig_const_info; + + +/* ----------------------------------------------------------------------------- + * Wrapper of PyInstanceMethod_New() used in Python 3 + * It is exported to the generated module, used for -fastproxy + * ----------------------------------------------------------------------------- */ +#if PY_VERSION_HEX >= 0x03000000 +SWIGRUNTIME PyObject* SWIG_PyInstanceMethod_New(PyObject *SWIGUNUSEDPARM(self), PyObject *func) +{ + return PyInstanceMethod_New(func); +} +#else +SWIGRUNTIME PyObject* SWIG_PyInstanceMethod_New(PyObject *SWIGUNUSEDPARM(self), PyObject *SWIGUNUSEDPARM(func)) +{ + return NULL; +} +#endif + +#ifdef __cplusplus +} +#endif + +/* ----------------------------------------------------------------------------- + * pyrun.swg + * + * This file contains the runtime support for Python modules + * and includes code for managing global variables and pointer + * type checking. + * + * ----------------------------------------------------------------------------- */ + +/* Common SWIG API */ + +/* for raw pointers */ +#define SWIG_Python_ConvertPtr(obj, pptr, type, flags) SWIG_Python_ConvertPtrAndOwn(obj, pptr, type, flags, 0) +#define SWIG_ConvertPtr(obj, pptr, type, flags) SWIG_Python_ConvertPtr(obj, pptr, type, flags) +#define SWIG_ConvertPtrAndOwn(obj,pptr,type,flags,own) SWIG_Python_ConvertPtrAndOwn(obj, pptr, type, flags, own) + +#ifdef SWIGPYTHON_BUILTIN +#define SWIG_NewPointerObj(ptr, type, flags) SWIG_Python_NewPointerObj(self, ptr, type, flags) +#else +#define SWIG_NewPointerObj(ptr, type, flags) SWIG_Python_NewPointerObj(NULL, ptr, type, flags) +#endif + +#define SWIG_InternalNewPointerObj(ptr, type, flags) SWIG_Python_NewPointerObj(NULL, ptr, type, flags) + +#define SWIG_CheckImplicit(ty) SWIG_Python_CheckImplicit(ty) +#define SWIG_AcquirePtr(ptr, src) SWIG_Python_AcquirePtr(ptr, src) +#define swig_owntype int + +/* for raw packed data */ +#define SWIG_ConvertPacked(obj, ptr, sz, ty) SWIG_Python_ConvertPacked(obj, ptr, sz, ty) +#define SWIG_NewPackedObj(ptr, sz, type) SWIG_Python_NewPackedObj(ptr, sz, type) + +/* for class or struct pointers */ +#define SWIG_ConvertInstance(obj, pptr, type, flags) SWIG_ConvertPtr(obj, pptr, type, flags) +#define SWIG_NewInstanceObj(ptr, type, flags) SWIG_NewPointerObj(ptr, type, flags) + +/* for C or C++ function pointers */ +#define SWIG_ConvertFunctionPtr(obj, pptr, type) SWIG_Python_ConvertFunctionPtr(obj, pptr, type) +#define SWIG_NewFunctionPtrObj(ptr, type) SWIG_Python_NewPointerObj(NULL, ptr, type, 0) + +/* for C++ member pointers, ie, member methods */ +#define SWIG_ConvertMember(obj, ptr, sz, ty) SWIG_Python_ConvertPacked(obj, ptr, sz, ty) +#define SWIG_NewMemberObj(ptr, sz, type) SWIG_Python_NewPackedObj(ptr, sz, type) + + +/* Runtime API */ + +#define SWIG_GetModule(clientdata) SWIG_Python_GetModule(clientdata) +#define SWIG_SetModule(clientdata, pointer) SWIG_Python_SetModule(pointer) +#define SWIG_NewClientData(obj) SwigPyClientData_New(obj) + +#define SWIG_SetErrorObj SWIG_Python_SetErrorObj +#define SWIG_SetErrorMsg SWIG_Python_SetErrorMsg +#define SWIG_ErrorType(code) SWIG_Python_ErrorType(code) +#define SWIG_Error(code, msg) SWIG_Python_SetErrorMsg(SWIG_ErrorType(code), msg) +#define SWIG_fail goto fail + + +/* Runtime API implementation */ + +/* Error manipulation */ + +SWIGINTERN void +SWIG_Python_SetErrorObj(PyObject *errtype, PyObject *obj) { + SWIG_PYTHON_THREAD_BEGIN_BLOCK; + PyErr_SetObject(errtype, obj); + Py_DECREF(obj); + SWIG_PYTHON_THREAD_END_BLOCK; +} + +SWIGINTERN void +SWIG_Python_SetErrorMsg(PyObject *errtype, const char *msg) { + SWIG_PYTHON_THREAD_BEGIN_BLOCK; + PyErr_SetString(errtype, msg); + SWIG_PYTHON_THREAD_END_BLOCK; +} + +#define SWIG_Python_Raise(obj, type, desc) SWIG_Python_SetErrorObj(SWIG_Python_ExceptionType(desc), obj) + +/* Set a constant value */ + +#if defined(SWIGPYTHON_BUILTIN) + +SWIGINTERN void +SwigPyBuiltin_AddPublicSymbol(PyObject *seq, const char *key) { + PyObject *s = PyString_InternFromString(key); + PyList_Append(seq, s); + Py_DECREF(s); +} + +SWIGINTERN void +SWIG_Python_SetConstant(PyObject *d, PyObject *public_interface, const char *name, PyObject *obj) { +#if PY_VERSION_HEX < 0x02030000 + PyDict_SetItemString(d, (char *)name, obj); +#else + PyDict_SetItemString(d, name, obj); +#endif + Py_DECREF(obj); + if (public_interface) + SwigPyBuiltin_AddPublicSymbol(public_interface, name); +} + +#else + +SWIGINTERN void +SWIG_Python_SetConstant(PyObject *d, const char *name, PyObject *obj) { +#if PY_VERSION_HEX < 0x02030000 + PyDict_SetItemString(d, (char *)name, obj); +#else + PyDict_SetItemString(d, name, obj); +#endif + Py_DECREF(obj); +} + +#endif + +/* Append a value to the result obj */ + +SWIGINTERN PyObject* +SWIG_Python_AppendOutput(PyObject* result, PyObject* obj) { +#if !defined(SWIG_PYTHON_OUTPUT_TUPLE) + if (!result) { + result = obj; + } else if (result == Py_None) { + Py_DECREF(result); + result = obj; + } else { + if (!PyList_Check(result)) { + PyObject *o2 = result; + result = PyList_New(1); + PyList_SetItem(result, 0, o2); + } + PyList_Append(result,obj); + Py_DECREF(obj); + } + return result; +#else + PyObject* o2; + PyObject* o3; + if (!result) { + result = obj; + } else if (result == Py_None) { + Py_DECREF(result); + result = obj; + } else { + if (!PyTuple_Check(result)) { + o2 = result; + result = PyTuple_New(1); + PyTuple_SET_ITEM(result, 0, o2); + } + o3 = PyTuple_New(1); + PyTuple_SET_ITEM(o3, 0, obj); + o2 = result; + result = PySequence_Concat(o2, o3); + Py_DECREF(o2); + Py_DECREF(o3); + } + return result; +#endif +} + +/* Unpack the argument tuple */ + +SWIGINTERN Py_ssize_t +SWIG_Python_UnpackTuple(PyObject *args, const char *name, Py_ssize_t min, Py_ssize_t max, PyObject **objs) +{ + if (!args) { + if (!min && !max) { + return 1; + } else { + PyErr_Format(PyExc_TypeError, "%s expected %s%d arguments, got none", + name, (min == max ? "" : "at least "), (int)min); + return 0; + } + } + if (!PyTuple_Check(args)) { + if (min <= 1 && max >= 1) { + Py_ssize_t i; + objs[0] = args; + for (i = 1; i < max; ++i) { + objs[i] = 0; + } + return 2; + } + PyErr_SetString(PyExc_SystemError, "UnpackTuple() argument list is not a tuple"); + return 0; + } else { + Py_ssize_t l = PyTuple_GET_SIZE(args); + if (l < min) { + PyErr_Format(PyExc_TypeError, "%s expected %s%d arguments, got %d", + name, (min == max ? "" : "at least "), (int)min, (int)l); + return 0; + } else if (l > max) { + PyErr_Format(PyExc_TypeError, "%s expected %s%d arguments, got %d", + name, (min == max ? "" : "at most "), (int)max, (int)l); + return 0; + } else { + Py_ssize_t i; + for (i = 0; i < l; ++i) { + objs[i] = PyTuple_GET_ITEM(args, i); + } + for (; l < max; ++l) { + objs[l] = 0; + } + return i + 1; + } + } +} + +/* A functor is a function object with one single object argument */ +#if PY_VERSION_HEX >= 0x02020000 +#define SWIG_Python_CallFunctor(functor, obj) PyObject_CallFunctionObjArgs(functor, obj, NULL); +#else +#define SWIG_Python_CallFunctor(functor, obj) PyObject_CallFunction(functor, "O", obj); +#endif + +/* + Helper for static pointer initialization for both C and C++ code, for example + static PyObject *SWIG_STATIC_POINTER(MyVar) = NewSomething(...); +*/ +#ifdef __cplusplus +#define SWIG_STATIC_POINTER(var) var +#else +#define SWIG_STATIC_POINTER(var) var = 0; if (!var) var +#endif + +/* ----------------------------------------------------------------------------- + * Pointer declarations + * ----------------------------------------------------------------------------- */ + +/* Flags for new pointer objects */ +#define SWIG_POINTER_NOSHADOW (SWIG_POINTER_OWN << 1) +#define SWIG_POINTER_NEW (SWIG_POINTER_NOSHADOW | SWIG_POINTER_OWN) + +#define SWIG_POINTER_IMPLICIT_CONV (SWIG_POINTER_DISOWN << 1) + +#define SWIG_BUILTIN_TP_INIT (SWIG_POINTER_OWN << 2) +#define SWIG_BUILTIN_INIT (SWIG_BUILTIN_TP_INIT | SWIG_POINTER_OWN) + +#ifdef __cplusplus +extern "C" { +#endif + +/* How to access Py_None */ +#if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# ifndef SWIG_PYTHON_NO_BUILD_NONE +# ifndef SWIG_PYTHON_BUILD_NONE +# define SWIG_PYTHON_BUILD_NONE +# endif +# endif +#endif + +#ifdef SWIG_PYTHON_BUILD_NONE +# ifdef Py_None +# undef Py_None +# define Py_None SWIG_Py_None() +# endif +SWIGRUNTIMEINLINE PyObject * +_SWIG_Py_None(void) +{ + PyObject *none = Py_BuildValue((char*)""); + Py_DECREF(none); + return none; +} +SWIGRUNTIME PyObject * +SWIG_Py_None(void) +{ + static PyObject *SWIG_STATIC_POINTER(none) = _SWIG_Py_None(); + return none; +} +#endif + +/* The python void return value */ + +SWIGRUNTIMEINLINE PyObject * +SWIG_Py_Void(void) +{ + PyObject *none = Py_None; + Py_INCREF(none); + return none; +} + +/* SwigPyClientData */ + +typedef struct { + PyObject *klass; + PyObject *newraw; + PyObject *newargs; + PyObject *destroy; + int delargs; + int implicitconv; + PyTypeObject *pytype; +} SwigPyClientData; + +SWIGRUNTIMEINLINE int +SWIG_Python_CheckImplicit(swig_type_info *ty) +{ + SwigPyClientData *data = (SwigPyClientData *)ty->clientdata; + return data ? data->implicitconv : 0; +} + +SWIGRUNTIMEINLINE PyObject * +SWIG_Python_ExceptionType(swig_type_info *desc) { + SwigPyClientData *data = desc ? (SwigPyClientData *) desc->clientdata : 0; + PyObject *klass = data ? data->klass : 0; + return (klass ? klass : PyExc_RuntimeError); +} + + +SWIGRUNTIME SwigPyClientData * +SwigPyClientData_New(PyObject* obj) +{ + if (!obj) { + return 0; + } else { + SwigPyClientData *data = (SwigPyClientData *)malloc(sizeof(SwigPyClientData)); + /* the klass element */ + data->klass = obj; + Py_INCREF(data->klass); + /* the newraw method and newargs arguments used to create a new raw instance */ + if (PyClass_Check(obj)) { + data->newraw = 0; + data->newargs = obj; + Py_INCREF(obj); + } else { +#if (PY_VERSION_HEX < 0x02020000) + data->newraw = 0; +#else + data->newraw = PyObject_GetAttrString(data->klass, (char *)"__new__"); +#endif + if (data->newraw) { + Py_INCREF(data->newraw); + data->newargs = PyTuple_New(1); + PyTuple_SetItem(data->newargs, 0, obj); + } else { + data->newargs = obj; + } + Py_INCREF(data->newargs); + } + /* the destroy method, aka as the C++ delete method */ + data->destroy = PyObject_GetAttrString(data->klass, (char *)"__swig_destroy__"); + if (PyErr_Occurred()) { + PyErr_Clear(); + data->destroy = 0; + } + if (data->destroy) { + int flags; + Py_INCREF(data->destroy); + flags = PyCFunction_GET_FLAGS(data->destroy); +#ifdef METH_O + data->delargs = !(flags & (METH_O)); +#else + data->delargs = 0; +#endif + } else { + data->delargs = 0; + } + data->implicitconv = 0; + data->pytype = 0; + return data; + } +} + +SWIGRUNTIME void +SwigPyClientData_Del(SwigPyClientData *data) { + Py_XDECREF(data->newraw); + Py_XDECREF(data->newargs); + Py_XDECREF(data->destroy); +} + +/* =============== SwigPyObject =====================*/ + +typedef struct { + PyObject_HEAD + void *ptr; + swig_type_info *ty; + int own; + PyObject *next; +#ifdef SWIGPYTHON_BUILTIN + PyObject *dict; +#endif +} SwigPyObject; + + +#ifdef SWIGPYTHON_BUILTIN + +SWIGRUNTIME PyObject * +SwigPyObject_get___dict__(PyObject *v, PyObject *SWIGUNUSEDPARM(args)) +{ + SwigPyObject *sobj = (SwigPyObject *)v; + + if (!sobj->dict) + sobj->dict = PyDict_New(); + + Py_INCREF(sobj->dict); + return sobj->dict; +} + +#endif + +SWIGRUNTIME PyObject * +SwigPyObject_long(SwigPyObject *v) +{ + return PyLong_FromVoidPtr(v->ptr); +} + +SWIGRUNTIME PyObject * +SwigPyObject_format(const char* fmt, SwigPyObject *v) +{ + PyObject *res = NULL; + PyObject *args = PyTuple_New(1); + if (args) { + if (PyTuple_SetItem(args, 0, SwigPyObject_long(v)) == 0) { + PyObject *ofmt = SWIG_Python_str_FromChar(fmt); + if (ofmt) { +#if PY_VERSION_HEX >= 0x03000000 + res = PyUnicode_Format(ofmt,args); +#else + res = PyString_Format(ofmt,args); +#endif + Py_DECREF(ofmt); + } + Py_DECREF(args); + } + } + return res; +} + +SWIGRUNTIME PyObject * +SwigPyObject_oct(SwigPyObject *v) +{ + return SwigPyObject_format("%o",v); +} + +SWIGRUNTIME PyObject * +SwigPyObject_hex(SwigPyObject *v) +{ + return SwigPyObject_format("%x",v); +} + +SWIGRUNTIME PyObject * +#ifdef METH_NOARGS +SwigPyObject_repr(SwigPyObject *v) +#else +SwigPyObject_repr(SwigPyObject *v, PyObject *args) +#endif +{ + const char *name = SWIG_TypePrettyName(v->ty); + PyObject *repr = SWIG_Python_str_FromFormat("", (name ? name : "unknown"), (void *)v); + if (v->next) { +# ifdef METH_NOARGS + PyObject *nrep = SwigPyObject_repr((SwigPyObject *)v->next); +# else + PyObject *nrep = SwigPyObject_repr((SwigPyObject *)v->next, args); +# endif +# if PY_VERSION_HEX >= 0x03000000 + PyObject *joined = PyUnicode_Concat(repr, nrep); + Py_DecRef(repr); + Py_DecRef(nrep); + repr = joined; +# else + PyString_ConcatAndDel(&repr,nrep); +# endif + } + return repr; +} + +SWIGRUNTIME int +SwigPyObject_compare(SwigPyObject *v, SwigPyObject *w) +{ + void *i = v->ptr; + void *j = w->ptr; + return (i < j) ? -1 : ((i > j) ? 1 : 0); +} + +/* Added for Python 3.x, would it also be useful for Python 2.x? */ +SWIGRUNTIME PyObject* +SwigPyObject_richcompare(SwigPyObject *v, SwigPyObject *w, int op) +{ + PyObject* res; + if( op != Py_EQ && op != Py_NE ) { + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + res = PyBool_FromLong( (SwigPyObject_compare(v, w)==0) == (op == Py_EQ) ? 1 : 0); + return res; +} + + +SWIGRUNTIME PyTypeObject* SwigPyObject_TypeOnce(void); + +#ifdef SWIGPYTHON_BUILTIN +static swig_type_info *SwigPyObject_stype = 0; +SWIGRUNTIME PyTypeObject* +SwigPyObject_type(void) { + SwigPyClientData *cd; + assert(SwigPyObject_stype); + cd = (SwigPyClientData*) SwigPyObject_stype->clientdata; + assert(cd); + assert(cd->pytype); + return cd->pytype; +} +#else +SWIGRUNTIME PyTypeObject* +SwigPyObject_type(void) { + static PyTypeObject *SWIG_STATIC_POINTER(type) = SwigPyObject_TypeOnce(); + return type; +} +#endif + +SWIGRUNTIMEINLINE int +SwigPyObject_Check(PyObject *op) { +#ifdef SWIGPYTHON_BUILTIN + PyTypeObject *target_tp = SwigPyObject_type(); + if (PyType_IsSubtype(op->ob_type, target_tp)) + return 1; + return (strcmp(op->ob_type->tp_name, "SwigPyObject") == 0); +#else + return (Py_TYPE(op) == SwigPyObject_type()) + || (strcmp(Py_TYPE(op)->tp_name,"SwigPyObject") == 0); +#endif +} + +SWIGRUNTIME PyObject * +SwigPyObject_New(void *ptr, swig_type_info *ty, int own); + +SWIGRUNTIME void +SwigPyObject_dealloc(PyObject *v) +{ + SwigPyObject *sobj = (SwigPyObject *) v; + PyObject *next = sobj->next; + if (sobj->own == SWIG_POINTER_OWN) { + swig_type_info *ty = sobj->ty; + SwigPyClientData *data = ty ? (SwigPyClientData *) ty->clientdata : 0; + PyObject *destroy = data ? data->destroy : 0; + if (destroy) { + /* destroy is always a VARARGS method */ + PyObject *res; + + /* PyObject_CallFunction() has the potential to silently drop + the active active exception. In cases of unnamed temporary + variable or where we just finished iterating over a generator + StopIteration will be active right now, and this needs to + remain true upon return from SwigPyObject_dealloc. So save + and restore. */ + + PyObject *val = NULL, *type = NULL, *tb = NULL; + PyErr_Fetch(&val, &type, &tb); + + if (data->delargs) { + /* we need to create a temporary object to carry the destroy operation */ + PyObject *tmp = SwigPyObject_New(sobj->ptr, ty, 0); + res = SWIG_Python_CallFunctor(destroy, tmp); + Py_DECREF(tmp); + } else { + PyCFunction meth = PyCFunction_GET_FUNCTION(destroy); + PyObject *mself = PyCFunction_GET_SELF(destroy); + res = ((*meth)(mself, v)); + } + if (!res) + PyErr_WriteUnraisable(destroy); + + PyErr_Restore(val, type, tb); + + Py_XDECREF(res); + } +#if !defined(SWIG_PYTHON_SILENT_MEMLEAK) + else { + const char *name = SWIG_TypePrettyName(ty); + printf("swig/python detected a memory leak of type '%s', no destructor found.\n", (name ? name : "unknown")); + } +#endif + } + Py_XDECREF(next); + PyObject_DEL(v); +} + +SWIGRUNTIME PyObject* +SwigPyObject_append(PyObject* v, PyObject* next) +{ + SwigPyObject *sobj = (SwigPyObject *) v; +#ifndef METH_O + PyObject *tmp = 0; + if (!PyArg_ParseTuple(next,(char *)"O:append", &tmp)) return NULL; + next = tmp; +#endif + if (!SwigPyObject_Check(next)) { + PyErr_SetString(PyExc_TypeError, "Attempt to append a non SwigPyObject"); + return NULL; + } + sobj->next = next; + Py_INCREF(next); + return SWIG_Py_Void(); +} + +SWIGRUNTIME PyObject* +#ifdef METH_NOARGS +SwigPyObject_next(PyObject* v) +#else +SwigPyObject_next(PyObject* v, PyObject *SWIGUNUSEDPARM(args)) +#endif +{ + SwigPyObject *sobj = (SwigPyObject *) v; + if (sobj->next) { + Py_INCREF(sobj->next); + return sobj->next; + } else { + return SWIG_Py_Void(); + } +} + +SWIGINTERN PyObject* +#ifdef METH_NOARGS +SwigPyObject_disown(PyObject *v) +#else +SwigPyObject_disown(PyObject* v, PyObject *SWIGUNUSEDPARM(args)) +#endif +{ + SwigPyObject *sobj = (SwigPyObject *)v; + sobj->own = 0; + return SWIG_Py_Void(); +} + +SWIGINTERN PyObject* +#ifdef METH_NOARGS +SwigPyObject_acquire(PyObject *v) +#else +SwigPyObject_acquire(PyObject* v, PyObject *SWIGUNUSEDPARM(args)) +#endif +{ + SwigPyObject *sobj = (SwigPyObject *)v; + sobj->own = SWIG_POINTER_OWN; + return SWIG_Py_Void(); +} + +SWIGINTERN PyObject* +SwigPyObject_own(PyObject *v, PyObject *args) +{ + PyObject *val = 0; +#if (PY_VERSION_HEX < 0x02020000) + if (!PyArg_ParseTuple(args,(char *)"|O:own",&val)) +#elif (PY_VERSION_HEX < 0x02050000) + if (!PyArg_UnpackTuple(args, (char *)"own", 0, 1, &val)) +#else + if (!PyArg_UnpackTuple(args, "own", 0, 1, &val)) +#endif + { + return NULL; + } + else + { + SwigPyObject *sobj = (SwigPyObject *)v; + PyObject *obj = PyBool_FromLong(sobj->own); + if (val) { +#ifdef METH_NOARGS + if (PyObject_IsTrue(val)) { + SwigPyObject_acquire(v); + } else { + SwigPyObject_disown(v); + } +#else + if (PyObject_IsTrue(val)) { + SwigPyObject_acquire(v,args); + } else { + SwigPyObject_disown(v,args); + } +#endif + } + return obj; + } +} + +#ifdef METH_O +static PyMethodDef +swigobject_methods[] = { + {(char *)"disown", (PyCFunction)SwigPyObject_disown, METH_NOARGS, (char *)"releases ownership of the pointer"}, + {(char *)"acquire", (PyCFunction)SwigPyObject_acquire, METH_NOARGS, (char *)"acquires ownership of the pointer"}, + {(char *)"own", (PyCFunction)SwigPyObject_own, METH_VARARGS, (char *)"returns/sets ownership of the pointer"}, + {(char *)"append", (PyCFunction)SwigPyObject_append, METH_O, (char *)"appends another 'this' object"}, + {(char *)"next", (PyCFunction)SwigPyObject_next, METH_NOARGS, (char *)"returns the next 'this' object"}, + {(char *)"__repr__",(PyCFunction)SwigPyObject_repr, METH_NOARGS, (char *)"returns object representation"}, + {0, 0, 0, 0} +}; +#else +static PyMethodDef +swigobject_methods[] = { + {(char *)"disown", (PyCFunction)SwigPyObject_disown, METH_VARARGS, (char *)"releases ownership of the pointer"}, + {(char *)"acquire", (PyCFunction)SwigPyObject_acquire, METH_VARARGS, (char *)"acquires ownership of the pointer"}, + {(char *)"own", (PyCFunction)SwigPyObject_own, METH_VARARGS, (char *)"returns/sets ownership of the pointer"}, + {(char *)"append", (PyCFunction)SwigPyObject_append, METH_VARARGS, (char *)"appends another 'this' object"}, + {(char *)"next", (PyCFunction)SwigPyObject_next, METH_VARARGS, (char *)"returns the next 'this' object"}, + {(char *)"__repr__",(PyCFunction)SwigPyObject_repr, METH_VARARGS, (char *)"returns object representation"}, + {0, 0, 0, 0} +}; +#endif + +#if PY_VERSION_HEX < 0x02020000 +SWIGINTERN PyObject * +SwigPyObject_getattr(SwigPyObject *sobj,char *name) +{ + return Py_FindMethod(swigobject_methods, (PyObject *)sobj, name); +} +#endif + +SWIGRUNTIME PyTypeObject* +SwigPyObject_TypeOnce(void) { + static char swigobject_doc[] = "Swig object carries a C/C++ instance pointer"; + + static PyNumberMethods SwigPyObject_as_number = { + (binaryfunc)0, /*nb_add*/ + (binaryfunc)0, /*nb_subtract*/ + (binaryfunc)0, /*nb_multiply*/ + /* nb_divide removed in Python 3 */ +#if PY_VERSION_HEX < 0x03000000 + (binaryfunc)0, /*nb_divide*/ +#endif + (binaryfunc)0, /*nb_remainder*/ + (binaryfunc)0, /*nb_divmod*/ + (ternaryfunc)0,/*nb_power*/ + (unaryfunc)0, /*nb_negative*/ + (unaryfunc)0, /*nb_positive*/ + (unaryfunc)0, /*nb_absolute*/ + (inquiry)0, /*nb_nonzero*/ + 0, /*nb_invert*/ + 0, /*nb_lshift*/ + 0, /*nb_rshift*/ + 0, /*nb_and*/ + 0, /*nb_xor*/ + 0, /*nb_or*/ +#if PY_VERSION_HEX < 0x03000000 + 0, /*nb_coerce*/ +#endif + (unaryfunc)SwigPyObject_long, /*nb_int*/ +#if PY_VERSION_HEX < 0x03000000 + (unaryfunc)SwigPyObject_long, /*nb_long*/ +#else + 0, /*nb_reserved*/ +#endif + (unaryfunc)0, /*nb_float*/ +#if PY_VERSION_HEX < 0x03000000 + (unaryfunc)SwigPyObject_oct, /*nb_oct*/ + (unaryfunc)SwigPyObject_hex, /*nb_hex*/ +#endif +#if PY_VERSION_HEX >= 0x03050000 /* 3.5 */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* nb_inplace_add -> nb_inplace_matrix_multiply */ +#elif PY_VERSION_HEX >= 0x03000000 /* 3.0 */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* nb_inplace_add -> nb_index, nb_inplace_divide removed */ +#elif PY_VERSION_HEX >= 0x02050000 /* 2.5.0 */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* nb_inplace_add -> nb_index */ +#elif PY_VERSION_HEX >= 0x02020000 /* 2.2.0 */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* nb_inplace_add -> nb_inplace_true_divide */ +#elif PY_VERSION_HEX >= 0x02000000 /* 2.0.0 */ + 0,0,0,0,0,0,0,0,0,0,0 /* nb_inplace_add -> nb_inplace_or */ +#endif + }; + + static PyTypeObject swigpyobject_type; + static int type_init = 0; + if (!type_init) { + const PyTypeObject tmp = { +#if PY_VERSION_HEX >= 0x03000000 + PyVarObject_HEAD_INIT(NULL, 0) +#else + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ +#endif + (char *)"SwigPyObject", /* tp_name */ + sizeof(SwigPyObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)SwigPyObject_dealloc, /* tp_dealloc */ + 0, /* tp_print */ +#if PY_VERSION_HEX < 0x02020000 + (getattrfunc)SwigPyObject_getattr, /* tp_getattr */ +#else + (getattrfunc)0, /* tp_getattr */ +#endif + (setattrfunc)0, /* tp_setattr */ +#if PY_VERSION_HEX >= 0x03000000 + 0, /* tp_reserved in 3.0.1, tp_compare in 3.0.0 but not used */ +#else + (cmpfunc)SwigPyObject_compare, /* tp_compare */ +#endif + (reprfunc)SwigPyObject_repr, /* tp_repr */ + &SwigPyObject_as_number, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + (hashfunc)0, /* tp_hash */ + (ternaryfunc)0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + swigobject_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + (richcmpfunc)SwigPyObject_richcompare,/* tp_richcompare */ + 0, /* tp_weaklistoffset */ +#if PY_VERSION_HEX >= 0x02020000 + 0, /* tp_iter */ + 0, /* tp_iternext */ + swigobject_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ +#endif +#if PY_VERSION_HEX >= 0x02030000 + 0, /* tp_del */ +#endif +#if PY_VERSION_HEX >= 0x02060000 + 0, /* tp_version_tag */ +#endif +#if PY_VERSION_HEX >= 0x03040000 + 0, /* tp_finalize */ +#endif +#ifdef COUNT_ALLOCS + 0, /* tp_allocs */ + 0, /* tp_frees */ + 0, /* tp_maxalloc */ +#if PY_VERSION_HEX >= 0x02050000 + 0, /* tp_prev */ +#endif + 0 /* tp_next */ +#endif + }; + swigpyobject_type = tmp; + type_init = 1; +#if PY_VERSION_HEX < 0x02020000 + swigpyobject_type.ob_type = &PyType_Type; +#else + if (PyType_Ready(&swigpyobject_type) < 0) + return NULL; +#endif + } + return &swigpyobject_type; +} + +SWIGRUNTIME PyObject * +SwigPyObject_New(void *ptr, swig_type_info *ty, int own) +{ + SwigPyObject *sobj = PyObject_NEW(SwigPyObject, SwigPyObject_type()); + if (sobj) { + sobj->ptr = ptr; + sobj->ty = ty; + sobj->own = own; + sobj->next = 0; + } + return (PyObject *)sobj; +} + +/* ----------------------------------------------------------------------------- + * Implements a simple Swig Packed type, and use it instead of string + * ----------------------------------------------------------------------------- */ + +typedef struct { + PyObject_HEAD + void *pack; + swig_type_info *ty; + size_t size; +} SwigPyPacked; + +SWIGRUNTIME int +SwigPyPacked_print(SwigPyPacked *v, FILE *fp, int SWIGUNUSEDPARM(flags)) +{ + char result[SWIG_BUFFER_SIZE]; + fputs("pack, v->size, 0, sizeof(result))) { + fputs("at ", fp); + fputs(result, fp); + } + fputs(v->ty->name,fp); + fputs(">", fp); + return 0; +} + +SWIGRUNTIME PyObject * +SwigPyPacked_repr(SwigPyPacked *v) +{ + char result[SWIG_BUFFER_SIZE]; + if (SWIG_PackDataName(result, v->pack, v->size, 0, sizeof(result))) { + return SWIG_Python_str_FromFormat("", result, v->ty->name); + } else { + return SWIG_Python_str_FromFormat("", v->ty->name); + } +} + +SWIGRUNTIME PyObject * +SwigPyPacked_str(SwigPyPacked *v) +{ + char result[SWIG_BUFFER_SIZE]; + if (SWIG_PackDataName(result, v->pack, v->size, 0, sizeof(result))){ + return SWIG_Python_str_FromFormat("%s%s", result, v->ty->name); + } else { + return SWIG_Python_str_FromChar(v->ty->name); + } +} + +SWIGRUNTIME int +SwigPyPacked_compare(SwigPyPacked *v, SwigPyPacked *w) +{ + size_t i = v->size; + size_t j = w->size; + int s = (i < j) ? -1 : ((i > j) ? 1 : 0); + return s ? s : strncmp((char *)v->pack, (char *)w->pack, 2*v->size); +} + +SWIGRUNTIME PyTypeObject* SwigPyPacked_TypeOnce(void); + +SWIGRUNTIME PyTypeObject* +SwigPyPacked_type(void) { + static PyTypeObject *SWIG_STATIC_POINTER(type) = SwigPyPacked_TypeOnce(); + return type; +} + +SWIGRUNTIMEINLINE int +SwigPyPacked_Check(PyObject *op) { + return ((op)->ob_type == SwigPyPacked_TypeOnce()) + || (strcmp((op)->ob_type->tp_name,"SwigPyPacked") == 0); +} + +SWIGRUNTIME void +SwigPyPacked_dealloc(PyObject *v) +{ + if (SwigPyPacked_Check(v)) { + SwigPyPacked *sobj = (SwigPyPacked *) v; + free(sobj->pack); + } + PyObject_DEL(v); +} + +SWIGRUNTIME PyTypeObject* +SwigPyPacked_TypeOnce(void) { + static char swigpacked_doc[] = "Swig object carries a C/C++ instance pointer"; + static PyTypeObject swigpypacked_type; + static int type_init = 0; + if (!type_init) { + const PyTypeObject tmp = { +#if PY_VERSION_HEX>=0x03000000 + PyVarObject_HEAD_INIT(NULL, 0) +#else + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ +#endif + (char *)"SwigPyPacked", /* tp_name */ + sizeof(SwigPyPacked), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)SwigPyPacked_dealloc, /* tp_dealloc */ + (printfunc)SwigPyPacked_print, /* tp_print */ + (getattrfunc)0, /* tp_getattr */ + (setattrfunc)0, /* tp_setattr */ +#if PY_VERSION_HEX>=0x03000000 + 0, /* tp_reserved in 3.0.1 */ +#else + (cmpfunc)SwigPyPacked_compare, /* tp_compare */ +#endif + (reprfunc)SwigPyPacked_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + (hashfunc)0, /* tp_hash */ + (ternaryfunc)0, /* tp_call */ + (reprfunc)SwigPyPacked_str, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + swigpacked_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ +#if PY_VERSION_HEX >= 0x02020000 + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ +#endif +#if PY_VERSION_HEX >= 0x02030000 + 0, /* tp_del */ +#endif +#if PY_VERSION_HEX >= 0x02060000 + 0, /* tp_version_tag */ +#endif +#if PY_VERSION_HEX >= 0x03040000 + 0, /* tp_finalize */ +#endif +#ifdef COUNT_ALLOCS + 0, /* tp_allocs */ + 0, /* tp_frees */ + 0, /* tp_maxalloc */ +#if PY_VERSION_HEX >= 0x02050000 + 0, /* tp_prev */ +#endif + 0 /* tp_next */ +#endif + }; + swigpypacked_type = tmp; + type_init = 1; +#if PY_VERSION_HEX < 0x02020000 + swigpypacked_type.ob_type = &PyType_Type; +#else + if (PyType_Ready(&swigpypacked_type) < 0) + return NULL; +#endif + } + return &swigpypacked_type; +} + +SWIGRUNTIME PyObject * +SwigPyPacked_New(void *ptr, size_t size, swig_type_info *ty) +{ + SwigPyPacked *sobj = PyObject_NEW(SwigPyPacked, SwigPyPacked_type()); + if (sobj) { + void *pack = malloc(size); + if (pack) { + memcpy(pack, ptr, size); + sobj->pack = pack; + sobj->ty = ty; + sobj->size = size; + } else { + PyObject_DEL((PyObject *) sobj); + sobj = 0; + } + } + return (PyObject *) sobj; +} + +SWIGRUNTIME swig_type_info * +SwigPyPacked_UnpackData(PyObject *obj, void *ptr, size_t size) +{ + if (SwigPyPacked_Check(obj)) { + SwigPyPacked *sobj = (SwigPyPacked *)obj; + if (sobj->size != size) return 0; + memcpy(ptr, sobj->pack, size); + return sobj->ty; + } else { + return 0; + } +} + +/* ----------------------------------------------------------------------------- + * pointers/data manipulation + * ----------------------------------------------------------------------------- */ + +SWIGRUNTIMEINLINE PyObject * +_SWIG_This(void) +{ + return SWIG_Python_str_FromChar("this"); +} + +static PyObject *swig_this = NULL; + +SWIGRUNTIME PyObject * +SWIG_This(void) +{ + if (swig_this == NULL) + swig_this = _SWIG_This(); + return swig_this; +} + +/* #define SWIG_PYTHON_SLOW_GETSET_THIS */ + +/* TODO: I don't know how to implement the fast getset in Python 3 right now */ +#if PY_VERSION_HEX>=0x03000000 +#define SWIG_PYTHON_SLOW_GETSET_THIS +#endif + +SWIGRUNTIME SwigPyObject * +SWIG_Python_GetSwigThis(PyObject *pyobj) +{ + PyObject *obj; + + if (SwigPyObject_Check(pyobj)) + return (SwigPyObject *) pyobj; + +#ifdef SWIGPYTHON_BUILTIN + (void)obj; +# ifdef PyWeakref_CheckProxy + if (PyWeakref_CheckProxy(pyobj)) { + pyobj = PyWeakref_GET_OBJECT(pyobj); + if (pyobj && SwigPyObject_Check(pyobj)) + return (SwigPyObject*) pyobj; + } +# endif + return NULL; +#else + + obj = 0; + +#if (!defined(SWIG_PYTHON_SLOW_GETSET_THIS) && (PY_VERSION_HEX >= 0x02030000)) + if (PyInstance_Check(pyobj)) { + obj = _PyInstance_Lookup(pyobj, SWIG_This()); + } else { + PyObject **dictptr = _PyObject_GetDictPtr(pyobj); + if (dictptr != NULL) { + PyObject *dict = *dictptr; + obj = dict ? PyDict_GetItem(dict, SWIG_This()) : 0; + } else { +#ifdef PyWeakref_CheckProxy + if (PyWeakref_CheckProxy(pyobj)) { + PyObject *wobj = PyWeakref_GET_OBJECT(pyobj); + return wobj ? SWIG_Python_GetSwigThis(wobj) : 0; + } +#endif + obj = PyObject_GetAttr(pyobj,SWIG_This()); + if (obj) { + Py_DECREF(obj); + } else { + if (PyErr_Occurred()) PyErr_Clear(); + return 0; + } + } + } +#else + obj = PyObject_GetAttr(pyobj,SWIG_This()); + if (obj) { + Py_DECREF(obj); + } else { + if (PyErr_Occurred()) PyErr_Clear(); + return 0; + } +#endif + if (obj && !SwigPyObject_Check(obj)) { + /* a PyObject is called 'this', try to get the 'real this' + SwigPyObject from it */ + return SWIG_Python_GetSwigThis(obj); + } + return (SwigPyObject *)obj; +#endif +} + +/* Acquire a pointer value */ + +SWIGRUNTIME int +SWIG_Python_AcquirePtr(PyObject *obj, int own) { + if (own == SWIG_POINTER_OWN) { + SwigPyObject *sobj = SWIG_Python_GetSwigThis(obj); + if (sobj) { + int oldown = sobj->own; + sobj->own = own; + return oldown; + } + } + return 0; +} + +/* Convert a pointer value */ + +SWIGRUNTIME int +SWIG_Python_ConvertPtrAndOwn(PyObject *obj, void **ptr, swig_type_info *ty, int flags, int *own) { + int res; + SwigPyObject *sobj; + int implicit_conv = (flags & SWIG_POINTER_IMPLICIT_CONV) != 0; + + if (!obj) + return SWIG_ERROR; + if (obj == Py_None && !implicit_conv) { + if (ptr) + *ptr = 0; + return SWIG_OK; + } + + res = SWIG_ERROR; + + sobj = SWIG_Python_GetSwigThis(obj); + if (own) + *own = 0; + while (sobj) { + void *vptr = sobj->ptr; + if (ty) { + swig_type_info *to = sobj->ty; + if (to == ty) { + /* no type cast needed */ + if (ptr) *ptr = vptr; + break; + } else { + swig_cast_info *tc = SWIG_TypeCheck(to->name,ty); + if (!tc) { + sobj = (SwigPyObject *)sobj->next; + } else { + if (ptr) { + int newmemory = 0; + *ptr = SWIG_TypeCast(tc,vptr,&newmemory); + if (newmemory == SWIG_CAST_NEW_MEMORY) { + assert(own); /* badly formed typemap which will lead to a memory leak - it must set and use own to delete *ptr */ + if (own) + *own = *own | SWIG_CAST_NEW_MEMORY; + } + } + break; + } + } + } else { + if (ptr) *ptr = vptr; + break; + } + } + if (sobj) { + if (own) + *own = *own | sobj->own; + if (flags & SWIG_POINTER_DISOWN) { + sobj->own = 0; + } + res = SWIG_OK; + } else { + if (implicit_conv) { + SwigPyClientData *data = ty ? (SwigPyClientData *) ty->clientdata : 0; + if (data && !data->implicitconv) { + PyObject *klass = data->klass; + if (klass) { + PyObject *impconv; + data->implicitconv = 1; /* avoid recursion and call 'explicit' constructors*/ + impconv = SWIG_Python_CallFunctor(klass, obj); + data->implicitconv = 0; + if (PyErr_Occurred()) { + PyErr_Clear(); + impconv = 0; + } + if (impconv) { + SwigPyObject *iobj = SWIG_Python_GetSwigThis(impconv); + if (iobj) { + void *vptr; + res = SWIG_Python_ConvertPtrAndOwn((PyObject*)iobj, &vptr, ty, 0, 0); + if (SWIG_IsOK(res)) { + if (ptr) { + *ptr = vptr; + /* transfer the ownership to 'ptr' */ + iobj->own = 0; + res = SWIG_AddCast(res); + res = SWIG_AddNewMask(res); + } else { + res = SWIG_AddCast(res); + } + } + } + Py_DECREF(impconv); + } + } + } + } + if (!SWIG_IsOK(res) && obj == Py_None) { + if (ptr) + *ptr = 0; + if (PyErr_Occurred()) + PyErr_Clear(); + res = SWIG_OK; + } + } + return res; +} + +/* Convert a function ptr value */ + +SWIGRUNTIME int +SWIG_Python_ConvertFunctionPtr(PyObject *obj, void **ptr, swig_type_info *ty) { + if (!PyCFunction_Check(obj)) { + return SWIG_ConvertPtr(obj, ptr, ty, 0); + } else { + void *vptr = 0; + + /* here we get the method pointer for callbacks */ + const char *doc = (((PyCFunctionObject *)obj) -> m_ml -> ml_doc); + const char *desc = doc ? strstr(doc, "swig_ptr: ") : 0; + if (desc) + desc = ty ? SWIG_UnpackVoidPtr(desc + 10, &vptr, ty->name) : 0; + if (!desc) + return SWIG_ERROR; + if (ty) { + swig_cast_info *tc = SWIG_TypeCheck(desc,ty); + if (tc) { + int newmemory = 0; + *ptr = SWIG_TypeCast(tc,vptr,&newmemory); + assert(!newmemory); /* newmemory handling not yet implemented */ + } else { + return SWIG_ERROR; + } + } else { + *ptr = vptr; + } + return SWIG_OK; + } +} + +/* Convert a packed value value */ + +SWIGRUNTIME int +SWIG_Python_ConvertPacked(PyObject *obj, void *ptr, size_t sz, swig_type_info *ty) { + swig_type_info *to = SwigPyPacked_UnpackData(obj, ptr, sz); + if (!to) return SWIG_ERROR; + if (ty) { + if (to != ty) { + /* check type cast? */ + swig_cast_info *tc = SWIG_TypeCheck(to->name,ty); + if (!tc) return SWIG_ERROR; + } + } + return SWIG_OK; +} + +/* ----------------------------------------------------------------------------- + * Create a new pointer object + * ----------------------------------------------------------------------------- */ + +/* + Create a new instance object, without calling __init__, and set the + 'this' attribute. +*/ + +SWIGRUNTIME PyObject* +SWIG_Python_NewShadowInstance(SwigPyClientData *data, PyObject *swig_this) +{ +#if (PY_VERSION_HEX >= 0x02020000) + PyObject *inst = 0; + PyObject *newraw = data->newraw; + if (newraw) { + inst = PyObject_Call(newraw, data->newargs, NULL); + if (inst) { +#if !defined(SWIG_PYTHON_SLOW_GETSET_THIS) + PyObject **dictptr = _PyObject_GetDictPtr(inst); + if (dictptr != NULL) { + PyObject *dict = *dictptr; + if (dict == NULL) { + dict = PyDict_New(); + *dictptr = dict; + PyDict_SetItem(dict, SWIG_This(), swig_this); + } + } +#else + PyObject *key = SWIG_This(); + PyObject_SetAttr(inst, key, swig_this); +#endif + } + } else { +#if PY_VERSION_HEX >= 0x03000000 + inst = ((PyTypeObject*) data->newargs)->tp_new((PyTypeObject*) data->newargs, Py_None, Py_None); + if (inst) { + PyObject_SetAttr(inst, SWIG_This(), swig_this); + Py_TYPE(inst)->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; + } +#else + PyObject *dict = PyDict_New(); + if (dict) { + PyDict_SetItem(dict, SWIG_This(), swig_this); + inst = PyInstance_NewRaw(data->newargs, dict); + Py_DECREF(dict); + } +#endif + } + return inst; +#else +#if (PY_VERSION_HEX >= 0x02010000) + PyObject *inst = 0; + PyObject *dict = PyDict_New(); + if (dict) { + PyDict_SetItem(dict, SWIG_This(), swig_this); + inst = PyInstance_NewRaw(data->newargs, dict); + Py_DECREF(dict); + } + return (PyObject *) inst; +#else + PyInstanceObject *inst = PyObject_NEW(PyInstanceObject, &PyInstance_Type); + if (inst == NULL) { + return NULL; + } + inst->in_class = (PyClassObject *)data->newargs; + Py_INCREF(inst->in_class); + inst->in_dict = PyDict_New(); + if (inst->in_dict == NULL) { + Py_DECREF(inst); + return NULL; + } +#ifdef Py_TPFLAGS_HAVE_WEAKREFS + inst->in_weakreflist = NULL; +#endif +#ifdef Py_TPFLAGS_GC + PyObject_GC_Init(inst); +#endif + PyDict_SetItem(inst->in_dict, SWIG_This(), swig_this); + return (PyObject *) inst; +#endif +#endif +} + +SWIGRUNTIME void +SWIG_Python_SetSwigThis(PyObject *inst, PyObject *swig_this) +{ + PyObject *dict; +#if (PY_VERSION_HEX >= 0x02020000) && !defined(SWIG_PYTHON_SLOW_GETSET_THIS) + PyObject **dictptr = _PyObject_GetDictPtr(inst); + if (dictptr != NULL) { + dict = *dictptr; + if (dict == NULL) { + dict = PyDict_New(); + *dictptr = dict; + } + PyDict_SetItem(dict, SWIG_This(), swig_this); + return; + } +#endif + dict = PyObject_GetAttrString(inst, (char*)"__dict__"); + PyDict_SetItem(dict, SWIG_This(), swig_this); + Py_DECREF(dict); +} + + +SWIGINTERN PyObject * +SWIG_Python_InitShadowInstance(PyObject *args) { + PyObject *obj[2]; + if (!SWIG_Python_UnpackTuple(args, "swiginit", 2, 2, obj)) { + return NULL; + } else { + SwigPyObject *sthis = SWIG_Python_GetSwigThis(obj[0]); + if (sthis) { + SwigPyObject_append((PyObject*) sthis, obj[1]); + } else { + SWIG_Python_SetSwigThis(obj[0], obj[1]); + } + return SWIG_Py_Void(); + } +} + +/* Create a new pointer object */ + +SWIGRUNTIME PyObject * +SWIG_Python_NewPointerObj(PyObject *self, void *ptr, swig_type_info *type, int flags) { + SwigPyClientData *clientdata; + PyObject * robj; + int own; + + if (!ptr) + return SWIG_Py_Void(); + + clientdata = type ? (SwigPyClientData *)(type->clientdata) : 0; + own = (flags & SWIG_POINTER_OWN) ? SWIG_POINTER_OWN : 0; + if (clientdata && clientdata->pytype) { + SwigPyObject *newobj; + if (flags & SWIG_BUILTIN_TP_INIT) { + newobj = (SwigPyObject*) self; + if (newobj->ptr) { + PyObject *next_self = clientdata->pytype->tp_alloc(clientdata->pytype, 0); + while (newobj->next) + newobj = (SwigPyObject *) newobj->next; + newobj->next = next_self; + newobj = (SwigPyObject *)next_self; +#ifdef SWIGPYTHON_BUILTIN + newobj->dict = 0; +#endif + } + } else { + newobj = PyObject_New(SwigPyObject, clientdata->pytype); +#ifdef SWIGPYTHON_BUILTIN + newobj->dict = 0; +#endif + } + if (newobj) { + newobj->ptr = ptr; + newobj->ty = type; + newobj->own = own; + newobj->next = 0; + return (PyObject*) newobj; + } + return SWIG_Py_Void(); + } + + assert(!(flags & SWIG_BUILTIN_TP_INIT)); + + robj = SwigPyObject_New(ptr, type, own); + if (robj && clientdata && !(flags & SWIG_POINTER_NOSHADOW)) { + PyObject *inst = SWIG_Python_NewShadowInstance(clientdata, robj); + Py_DECREF(robj); + robj = inst; + } + return robj; +} + +/* Create a new packed object */ + +SWIGRUNTIMEINLINE PyObject * +SWIG_Python_NewPackedObj(void *ptr, size_t sz, swig_type_info *type) { + return ptr ? SwigPyPacked_New((void *) ptr, sz, type) : SWIG_Py_Void(); +} + +/* -----------------------------------------------------------------------------* + * Get type list + * -----------------------------------------------------------------------------*/ + +#ifdef SWIG_LINK_RUNTIME +void *SWIG_ReturnGlobalTypeList(void *); +#endif + +SWIGRUNTIME swig_module_info * +SWIG_Python_GetModule(void *SWIGUNUSEDPARM(clientdata)) { + static void *type_pointer = (void *)0; + /* first check if module already created */ + if (!type_pointer) { +#ifdef SWIG_LINK_RUNTIME + type_pointer = SWIG_ReturnGlobalTypeList((void *)0); +#else +# ifdef SWIGPY_USE_CAPSULE + type_pointer = PyCapsule_Import(SWIGPY_CAPSULE_NAME, 0); +# else + type_pointer = PyCObject_Import((char*)"swig_runtime_data" SWIG_RUNTIME_VERSION, + (char*)"type_pointer" SWIG_TYPE_TABLE_NAME); +# endif + if (PyErr_Occurred()) { + PyErr_Clear(); + type_pointer = (void *)0; + } +#endif + } + return (swig_module_info *) type_pointer; +} + +#if PY_MAJOR_VERSION < 2 +/* PyModule_AddObject function was introduced in Python 2.0. The following function + is copied out of Python/modsupport.c in python version 2.3.4 */ +SWIGINTERN int +PyModule_AddObject(PyObject *m, char *name, PyObject *o) +{ + PyObject *dict; + if (!PyModule_Check(m)) { + PyErr_SetString(PyExc_TypeError, "PyModule_AddObject() needs module as first arg"); + return SWIG_ERROR; + } + if (!o) { + PyErr_SetString(PyExc_TypeError, "PyModule_AddObject() needs non-NULL value"); + return SWIG_ERROR; + } + + dict = PyModule_GetDict(m); + if (dict == NULL) { + /* Internal error -- modules must have a dict! */ + PyErr_Format(PyExc_SystemError, "module '%s' has no __dict__", + PyModule_GetName(m)); + return SWIG_ERROR; + } + if (PyDict_SetItemString(dict, name, o)) + return SWIG_ERROR; + Py_DECREF(o); + return SWIG_OK; +} +#endif + +SWIGRUNTIME void +#ifdef SWIGPY_USE_CAPSULE +SWIG_Python_DestroyModule(PyObject *obj) +#else +SWIG_Python_DestroyModule(void *vptr) +#endif +{ +#ifdef SWIGPY_USE_CAPSULE + swig_module_info *swig_module = (swig_module_info *) PyCapsule_GetPointer(obj, SWIGPY_CAPSULE_NAME); +#else + swig_module_info *swig_module = (swig_module_info *) vptr; +#endif + swig_type_info **types = swig_module->types; + size_t i; + for (i =0; i < swig_module->size; ++i) { + swig_type_info *ty = types[i]; + if (ty->owndata) { + SwigPyClientData *data = (SwigPyClientData *) ty->clientdata; + if (data) SwigPyClientData_Del(data); + } + } + Py_DECREF(SWIG_This()); + swig_this = NULL; +} + +SWIGRUNTIME void +SWIG_Python_SetModule(swig_module_info *swig_module) { +#if PY_VERSION_HEX >= 0x03000000 + /* Add a dummy module object into sys.modules */ + PyObject *module = PyImport_AddModule((char*)"swig_runtime_data" SWIG_RUNTIME_VERSION); +#else + static PyMethodDef swig_empty_runtime_method_table[] = { {NULL, NULL, 0, NULL} }; /* Sentinel */ + PyObject *module = Py_InitModule((char*)"swig_runtime_data" SWIG_RUNTIME_VERSION, swig_empty_runtime_method_table); +#endif +#ifdef SWIGPY_USE_CAPSULE + PyObject *pointer = PyCapsule_New((void *) swig_module, SWIGPY_CAPSULE_NAME, SWIG_Python_DestroyModule); + if (pointer && module) { + PyModule_AddObject(module, (char*)"type_pointer_capsule" SWIG_TYPE_TABLE_NAME, pointer); + } else { + Py_XDECREF(pointer); + } +#else + PyObject *pointer = PyCObject_FromVoidPtr((void *) swig_module, SWIG_Python_DestroyModule); + if (pointer && module) { + PyModule_AddObject(module, (char*)"type_pointer" SWIG_TYPE_TABLE_NAME, pointer); + } else { + Py_XDECREF(pointer); + } +#endif +} + +/* The python cached type query */ +SWIGRUNTIME PyObject * +SWIG_Python_TypeCache(void) { + static PyObject *SWIG_STATIC_POINTER(cache) = PyDict_New(); + return cache; +} + +SWIGRUNTIME swig_type_info * +SWIG_Python_TypeQuery(const char *type) +{ + PyObject *cache = SWIG_Python_TypeCache(); + PyObject *key = SWIG_Python_str_FromChar(type); + PyObject *obj = PyDict_GetItem(cache, key); + swig_type_info *descriptor; + if (obj) { +#ifdef SWIGPY_USE_CAPSULE + descriptor = (swig_type_info *) PyCapsule_GetPointer(obj, NULL); +#else + descriptor = (swig_type_info *) PyCObject_AsVoidPtr(obj); +#endif + } else { + swig_module_info *swig_module = SWIG_GetModule(0); + descriptor = SWIG_TypeQueryModule(swig_module, swig_module, type); + if (descriptor) { +#ifdef SWIGPY_USE_CAPSULE + obj = PyCapsule_New((void*) descriptor, NULL, NULL); +#else + obj = PyCObject_FromVoidPtr(descriptor, NULL); +#endif + PyDict_SetItem(cache, key, obj); + Py_DECREF(obj); + } + } + Py_DECREF(key); + return descriptor; +} + +/* + For backward compatibility only +*/ +#define SWIG_POINTER_EXCEPTION 0 +#define SWIG_arg_fail(arg) SWIG_Python_ArgFail(arg) +#define SWIG_MustGetPtr(p, type, argnum, flags) SWIG_Python_MustGetPtr(p, type, argnum, flags) + +SWIGRUNTIME int +SWIG_Python_AddErrMesg(const char* mesg, int infront) +{ + if (PyErr_Occurred()) { + PyObject *type = 0; + PyObject *value = 0; + PyObject *traceback = 0; + PyErr_Fetch(&type, &value, &traceback); + if (value) { + char *tmp; + PyObject *old_str = PyObject_Str(value); + Py_XINCREF(type); + PyErr_Clear(); + if (infront) { + PyErr_Format(type, "%s %s", mesg, tmp = SWIG_Python_str_AsChar(old_str)); + } else { + PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg); + } + SWIG_Python_str_DelForPy3(tmp); + Py_DECREF(old_str); + } + return 1; + } else { + return 0; + } +} + +SWIGRUNTIME int +SWIG_Python_ArgFail(int argnum) +{ + if (PyErr_Occurred()) { + /* add information about failing argument */ + char mesg[256]; + PyOS_snprintf(mesg, sizeof(mesg), "argument number %d:", argnum); + return SWIG_Python_AddErrMesg(mesg, 1); + } else { + return 0; + } +} + +SWIGRUNTIMEINLINE const char * +SwigPyObject_GetDesc(PyObject *self) +{ + SwigPyObject *v = (SwigPyObject *)self; + swig_type_info *ty = v ? v->ty : 0; + return ty ? ty->str : ""; +} + +SWIGRUNTIME void +SWIG_Python_TypeError(const char *type, PyObject *obj) +{ + if (type) { +#if defined(SWIG_COBJECT_TYPES) + if (obj && SwigPyObject_Check(obj)) { + const char *otype = (const char *) SwigPyObject_GetDesc(obj); + if (otype) { + PyErr_Format(PyExc_TypeError, "a '%s' is expected, 'SwigPyObject(%s)' is received", + type, otype); + return; + } + } else +#endif + { + const char *otype = (obj ? obj->ob_type->tp_name : 0); + if (otype) { + PyObject *str = PyObject_Str(obj); + const char *cstr = str ? SWIG_Python_str_AsChar(str) : 0; + if (cstr) { + PyErr_Format(PyExc_TypeError, "a '%s' is expected, '%s(%s)' is received", + type, otype, cstr); + SWIG_Python_str_DelForPy3(cstr); + } else { + PyErr_Format(PyExc_TypeError, "a '%s' is expected, '%s' is received", + type, otype); + } + Py_XDECREF(str); + return; + } + } + PyErr_Format(PyExc_TypeError, "a '%s' is expected", type); + } else { + PyErr_Format(PyExc_TypeError, "unexpected type is received"); + } +} + + +/* Convert a pointer value, signal an exception on a type mismatch */ +SWIGRUNTIME void * +SWIG_Python_MustGetPtr(PyObject *obj, swig_type_info *ty, int SWIGUNUSEDPARM(argnum), int flags) { + void *result; + if (SWIG_Python_ConvertPtr(obj, &result, ty, flags) == -1) { + PyErr_Clear(); +#if SWIG_POINTER_EXCEPTION + if (flags) { + SWIG_Python_TypeError(SWIG_TypePrettyName(ty), obj); + SWIG_Python_ArgFail(argnum); + } +#endif + } + return result; +} + +#ifdef SWIGPYTHON_BUILTIN +SWIGRUNTIME int +SWIG_Python_NonDynamicSetAttr(PyObject *obj, PyObject *name, PyObject *value) { + PyTypeObject *tp = obj->ob_type; + PyObject *descr; + PyObject *encoded_name; + descrsetfunc f; + int res = -1; + +# ifdef Py_USING_UNICODE + if (PyString_Check(name)) { + name = PyUnicode_Decode(PyString_AsString(name), PyString_Size(name), NULL, NULL); + if (!name) + return -1; + } else if (!PyUnicode_Check(name)) +# else + if (!PyString_Check(name)) +# endif + { + PyErr_Format(PyExc_TypeError, "attribute name must be string, not '%.200s'", name->ob_type->tp_name); + return -1; + } else { + Py_INCREF(name); + } + + if (!tp->tp_dict) { + if (PyType_Ready(tp) < 0) + goto done; + } + + descr = _PyType_Lookup(tp, name); + f = NULL; + if (descr != NULL) + f = descr->ob_type->tp_descr_set; + if (!f) { + if (PyString_Check(name)) { + encoded_name = name; + Py_INCREF(name); + } else { + encoded_name = PyUnicode_AsUTF8String(name); + } + PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%.200s'", tp->tp_name, PyString_AsString(encoded_name)); + Py_DECREF(encoded_name); + } else { + res = f(descr, obj, value); + } + + done: + Py_DECREF(name); + return res; +} +#endif + + +#ifdef __cplusplus +} +#endif +/* -----------------------------------------------------------------------------* + Standard SWIG API for use inside user code. + + Don't include this file directly, run the command + swig -python -external-runtime + Also, read the Modules chapter of the SWIG Manual. + + * -----------------------------------------------------------------------------*/ + +#ifdef SWIG_MODULE_CLIENTDATA_TYPE + +SWIGRUNTIMEINLINE swig_type_info * +SWIG_TypeQuery(SWIG_MODULE_CLIENTDATA_TYPE clientdata, const char *name) { + swig_module_info *module = SWIG_GetModule(clientdata); + return SWIG_TypeQueryModule(module, module, name); +} + +SWIGRUNTIMEINLINE swig_type_info * +SWIG_MangledTypeQuery(SWIG_MODULE_CLIENTDATA_TYPE clientdata, const char *name) { + swig_module_info *module = SWIG_GetModule(clientdata); + return SWIG_MangledTypeQueryModule(module, module, name); +} + +#else + +SWIGRUNTIMEINLINE swig_type_info * +SWIG_TypeQuery(const char *name) { + swig_module_info *module = SWIG_GetModule(NULL); + return SWIG_TypeQueryModule(module, module, name); +} + +SWIGRUNTIMEINLINE swig_type_info * +SWIG_MangledTypeQuery(const char *name) { + swig_module_info *module = SWIG_GetModule(NULL); + return SWIG_MangledTypeQueryModule(module, module, name); +} + +#endif +#endif diff --git a/python/ideep4py/mm/mem.cc b/python/ideep4py/mm/mem.cc new file mode 100644 index 00000000..878c75f7 --- /dev/null +++ b/python/ideep4py/mm/mem.cc @@ -0,0 +1,130 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "mem.h" + +using namespace std; + +#define MALLOC_FREE_IMPL(prefix) \ + static Memory prefix##_pool(#prefix); \ + static avx::byte* prefix##_malloc(size_t size) { \ + return (avx::byte *)prefix##_pool.malloc(size); \ + } \ + static void prefix##_free(avx::byte *p) { \ + return prefix##_pool.free((void *)p); \ + } + +MALLOC_FREE_IMPL(anon) +MALLOC_FREE_IMPL(reorder) +MALLOC_FREE_IMPL(relu_fwd) +MALLOC_FREE_IMPL(relu_bwd) +MALLOC_FREE_IMPL(bn_fwd) +MALLOC_FREE_IMPL(bn_bwd) +MALLOC_FREE_IMPL(lrn_fwd) +MALLOC_FREE_IMPL(lrn_bwd) +MALLOC_FREE_IMPL(conv_fwd) +MALLOC_FREE_IMPL(conv_bwd) +MALLOC_FREE_IMPL(pooling_fwd) +MALLOC_FREE_IMPL(pooling_bwd) +MALLOC_FREE_IMPL(ip_fwd) +MALLOC_FREE_IMPL(ip_bwd) +MALLOC_FREE_IMPL(concat_fwd) +MALLOC_FREE_IMPL(concat_bwd) + +std::shared_ptr Allocator::malloc(size_t len, mem_pool_t mpool) +{ + std::shared_ptr data; + switch(mpool) { + case MPOOL_REORDER: + data = std::shared_ptr(reorder_malloc(len), reorder_free); + break; + case MPOOL_ELTWISE_FWD: + data = std::shared_ptr(relu_fwd_malloc(len), relu_fwd_free); + break; + case MPOOL_ELTWISE_BWD: + data = std::shared_ptr(relu_bwd_malloc(len), relu_bwd_free); + break; + case MPOOL_BN_FWD: + data = std::shared_ptr(bn_fwd_malloc(len), bn_fwd_free); + break; + case MPOOL_BN_BWD: + data = std::shared_ptr(bn_bwd_malloc(len), bn_bwd_free); + break; + case MPOOL_LRN_FWD: + data = std::shared_ptr(lrn_fwd_malloc(len), lrn_fwd_free); + break; + case MPOOL_LRN_BWD: + data = std::shared_ptr(lrn_bwd_malloc(len), lrn_bwd_free); + break; + case MPOOL_CONV_FWD: + data = std::shared_ptr(conv_fwd_malloc(len), conv_fwd_free); + break; + case MPOOL_CONV_BWD: + data = std::shared_ptr(conv_bwd_malloc(len), conv_bwd_free); + break; + case MPOOL_POOLING_FWD: + data = std::shared_ptr(pooling_fwd_malloc(len), pooling_fwd_free); + break; + case MPOOL_POOLING_BWD: + data = std::shared_ptr(pooling_bwd_malloc(len), pooling_bwd_free); + break; + case MPOOL_IP_FWD: + data = std::shared_ptr(ip_fwd_malloc(len), ip_fwd_free); + break; + case MPOOL_IP_BWD: + data = std::shared_ptr(ip_bwd_malloc(len), ip_bwd_free); + break; + case MPOOL_CONCAT_FWD: + data = std::shared_ptr(concat_fwd_malloc(len), concat_fwd_free); + break; + case MPOOL_CONCAT_BWD: + data = std::shared_ptr(concat_bwd_malloc(len), concat_bwd_free); + break; + default: + data = std::shared_ptr(anon_malloc(len), anon_free); + break; + } + + return data; +} + +std::shared_ptr Allocator::malloc(vector dims, int element_sz, mem_pool_t mpool) +{ + auto len = std::accumulate(dims.begin(), dims.end(), 1 + , std::multiplies()) * element_sz; + + return Allocator::malloc(len, mpool); +} + +void* dnn_malloc(size_t size, mem_pool_t mpool) +{ + return anon_pool.malloc(size); +} + +void dnn_free(void *p, mem_pool_t mpool) +{ + return anon_pool.free(p); +} diff --git a/python/ideep4py/mm/tensor.cc b/python/ideep4py/mm/tensor.cc new file mode 100644 index 00000000..d608f47c --- /dev/null +++ b/python/ideep4py/mm/tensor.cc @@ -0,0 +1,32 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "tensor.h" +#include "blas.h" + +Tensor *Tensor::sum(vector axis) { + return blas_sum(this, axis); +} diff --git a/python/ideep4py/primitives/bn.cc b/python/ideep4py/primitives/bn.cc new file mode 100644 index 00000000..a464fdc1 --- /dev/null +++ b/python/ideep4py/primitives/bn.cc @@ -0,0 +1,222 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include +#include +#include "mkl_vml_functions.h" +#include "layer.h" +#include "tensor.h" +#include "bn.h" +#include "bn_fwd.h" +#include "bn_bwd.h" +#include "prim_factory.h" +#include "reorder_op.h" + +template +void batch_normalization_inv(T *var, float eps, int size, T *inv) { + int blk_nthr = omp_get_max_threads(), + blk_num = blk_nthr, + blk_len = size / blk_num, + blk_len_ex = size % blk_num; + + if (!blk_len) + blk_nthr = size; + + T *var_eps = reinterpret_cast(new avx::byte[size * sizeof(T)]); + + # pragma omp parallel num_threads(blk_nthr) + { + int ithr = omp_get_thread_num(); + int blen = ithr < blk_len_ex ? blk_len + 1 : blk_len; + int bstart = ithr <= blk_len_ex ? (blk_len + 1) * ithr : + blk_len_ex * (blk_len + 1) + (ithr - blk_len_ex) * blk_len; + int bend = bstart + blen; + + for (int b = bstart; b < bend; b++) + var_eps[b] = var[b] + eps; + } + + vsPowx(size, var_eps, -0.5, inv); + delete(reinterpret_cast(var_eps)); + return; +} + +template +std::vector batch_normalization::Forward( + Tensor *src, Tensor *w, Tensor *mean, Tensor *var, float eps) { + + assert(memory_data_type() == src.cxx_data_type()); + + bool scale_shift = w ? true : false; + bool global_stats = mean ? true : false; + bool training = mean ? false : true; + + auto bn_fwd = batch_normalization_fwd_factory::get( + (mkldnn::memory::dims)src->dims(), + eps, scale_shift, global_stats, training); + + void *src_data = src->data(); + shared_ptr src_itnl; + if (src->cxx_format() != bn_fwd->get_src_fmt()) { + auto reorder = ReorderFactory::get( + (mkldnn::memory::dims)src->dims(), + (mkldnn::memory::format)src->cxx_format(), + (mkldnn::memory::format)bn_fwd->get_src_fmt()); + src_itnl= Allocator::malloc(src->len(), MPOOL_REORDER); + //src_itnl = new avx::byte[src->len()]; + reorder->execute(src_data, src_itnl.get()); + src_data = src_itnl.get(); + } + +#if 0 + auto dst = new Tensor(src->ndims(), src->dims(), + (mkldnn_memory_format_t)bn_fwd->get_dst_fmt(), + src->type()); + mean = training ? + new Tensor(bn_fwd->get_mean_ndims(), bn_fwd->get_mean_dims(), + (mkldnn_memory_format_t)bn_fwd->get_mean_fmt(), + src->type()) : mean; + var = training ? + new Tensor(bn_fwd->get_var_ndims(), bn_fwd->get_var_dims(), + (mkldnn_memory_format_t)bn_fwd->get_var_fmt(), + src->type()) : var; +#else + auto data = Allocator::malloc(src->dims(), type2size(src->type()), MPOOL_BN_FWD); + auto dst = new Tensor(src->ndims(), src->dims(), data, + (mkldnn_memory_format_t)bn_fwd->get_dst_fmt(), + src->type()); + + Tensor *inv; + if (training) { + auto data_mean = Allocator::malloc(bn_fwd->get_mean_dims(), type2size(src->type()), MPOOL_BN_FWD); + mean = new Tensor(bn_fwd->get_mean_ndims(), bn_fwd->get_mean_dims(), data_mean, + (mkldnn_memory_format_t)bn_fwd->get_mean_fmt(), + src->type()); + auto data_var = Allocator::malloc(bn_fwd->get_var_dims(), type2size(src->type()), MPOOL_BN_FWD); + var = new Tensor(bn_fwd->get_var_ndims(), bn_fwd->get_var_dims(), data_var, + (mkldnn_memory_format_t)bn_fwd->get_var_fmt(), + src->type()); + auto data_inv = Allocator::malloc(bn_fwd->get_var_dims(), type2size(src->type()), MPOOL_BN_FWD); + inv = new Tensor(bn_fwd->get_var_ndims(), bn_fwd->get_var_dims(), data_inv, + (mkldnn_memory_format_t)bn_fwd->get_var_fmt(), + src->type()); + } +#endif + + bn_fwd->execute(src_data, (w ? w->data() : nullptr), + dst->data(), (mean ? mean->data() : nullptr), + (var ? var->data() : nullptr)); + + std::vector outs; + outs.push_back(dst); + if (training) { + outs.push_back(mean); + outs.push_back(var); + + batch_normalization_inv(reinterpret_cast(var->data()), eps, + var->desc().data.dims[0], + reinterpret_cast(inv->data())); + outs.push_back(inv); + } + + return outs; +} + +template +std::vector batch_normalization::Backward( + Tensor *src, Tensor *diff_dst, Tensor *mean, + Tensor *var, Tensor *w, float eps) { + + assert(memory_data_type() == src.cxx_data_type()); + + bool scale_shift = w ? true : false; + + auto bn_bwd = batch_normalization_bwd_factory::get( + (mkldnn::memory::dims)src->dims(), + (mkldnn::memory::dims)diff_dst->dims(), + eps, scale_shift); + + void *src_data = src->data(); + shared_ptr src_itnl; + if (src->cxx_format() != bn_bwd->get_src_fmt()) { + auto reorder = ReorderFactory::get( + (mkldnn::memory::dims)src->dims(), + (mkldnn::memory::format)src->cxx_format(), + (mkldnn::memory::format)bn_bwd->get_src_fmt()); + //src_itnl = new avx::byte[src->len()]; + src_itnl= Allocator::malloc(src->len(), MPOOL_REORDER); + reorder->execute(src_data, src_itnl.get()); + src_data = src_itnl.get(); + } + + void *diff_dst_data = diff_dst->data(); + shared_ptr diff_dst_itnl; + if (diff_dst->cxx_format() != bn_bwd->get_diff_dst_fmt()) { + auto reorder = ReorderFactory::get( + (mkldnn::memory::dims)diff_dst->dims(), + (mkldnn::memory::format)diff_dst->cxx_format(), + (mkldnn::memory::format)bn_bwd->get_diff_dst_fmt()); + diff_dst_itnl = Allocator::malloc(diff_dst->len(), MPOOL_REORDER); + //diff_dst_itnl = new avx::byte[diff_dst->len()]; + reorder->execute(diff_dst_data, diff_dst_itnl.get()); + diff_dst_data = diff_dst_itnl.get(); + } + +#if 0 + auto diff_src = new Tensor(src->ndims(), src->dims(), + (mkldnn_memory_format_t)bn_bwd->get_diff_src_fmt(), + src->type()); + auto diff_w = scale_shift ? + new Tensor(w->ndims(), w->dims(), + (mkldnn_memory_format_t)bn_bwd->get_diff_w_fmt(), + w->type()) : (Tensor *)(nullptr); +#else + auto data = Allocator::malloc(src->dims(), type2size(src->type()), MPOOL_BN_BWD); + auto diff_src = new Tensor(src->ndims(), src->dims(), data, + (mkldnn_memory_format_t)bn_bwd->get_diff_src_fmt(), + src->type()); + Tensor *diff_w = nullptr; + if (scale_shift) { + auto data_diff_w = Allocator::malloc(w->dims(), type2size(src->type()), MPOOL_BN_BWD); + diff_w = new Tensor(w->ndims(), w->dims(), data_diff_w, + (mkldnn_memory_format_t)bn_bwd->get_diff_w_fmt(), + w->type()); + } +#endif + + bn_bwd->execute(src_data, diff_dst_data, mean->data(), var->data(), + (w ? w->data() : nullptr), diff_src->data(), + (diff_w ? diff_w->data() : nullptr)); + + std::vector outs; + outs.push_back(diff_src); + if (scale_shift) + outs.push_back(diff_w); + + return outs; +} + +template class batch_normalization; diff --git a/python/ideep4py/primitives/concat.cc b/python/ideep4py/primitives/concat.cc new file mode 100644 index 00000000..ee6c42dd --- /dev/null +++ b/python/ideep4py/primitives/concat.cc @@ -0,0 +1,229 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "common.h" +#include "mkldnn.hpp" +#include "tensor.h" +#include "mem.h" +#include "concat.h" +#include "utils.h" +#include "concat_fwd.h" +#include "prim_factory.h" +#include "reorder_op.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +Concat::Concat() +{ +} + +template +Concat::~Concat() +{ +} + +template +Tensor *Concat::Forward( + std::vector src, + int axis) +{ + // sanity check + assert (src.size() > 0); + + std::vector src_fmts; + std::vector expected_fmts; + std::vector src_datas; + std::vector src_reorder; + + std::vector src_ds; + mkldnn::memory::dims dst_d; + + //get output channel + int out_channel = 0; + for (int i = 0; i < src.size(); i++) { + //get relate infor from src + src_fmts.push_back(src[i]->cxx_format()); + src_datas.push_back(src[i]->data()); + src_reorder.push_back(src[i]->data()); + + src_ds.push_back(src[i]->cxx_dims()); + out_channel += (src[i]->cxx_dims())[axis]; + } + + for (int i = 0; i < src_ds[0].size(); i++){ + if (i == axis) + dst_d.push_back(out_channel); + else + dst_d.push_back(src_ds[0][i]); + } + + //LOG(INFO) << "dst_d={" << dst_d[0] << "," << dst_d[1] << "," << dst_d[2] << "," << dst_d[3] << "}"; + + // get a concat fwd from primitive pool + ConcatFwd *concat_forward = NULL; + concat_forward = ConcatFwdFactory::get(src_ds, dst_d, axis); + + // check wehther fmt is same + expected_fmts = concat_forward->src_fmts_; + assert(src_fmts.size() == expected_fmts.size()); + + for (int i = 0; i < expected_fmts.size(); i++) { + if ( src_fmts[i] != expected_fmts[i]) { + //LOG(INFO) << "Concat src fmt not match ("<< i << "):" + //"src_fmt=" << src_fmts[i] << + //"; expected_fmt="<< expected_fmts[i]; + // From reorder factory to find one reorder + ReorderOp* reorder_src_op = ReorderFactory::get(src_ds[i], src_fmts[i], expected_fmts[i]); + src_reorder[i] = new avx::byte[src[i]->len()]; + reorder_src_op->execute(src_datas[i], src_reorder[i]); + } + } + + // create tensor based on primitive's dst + // assume dst and src have same data type + // Tensor *dst_tensor = new Tensor(dst_d, src[0]->cxx_data_type(), concat_forward->dst_fmt_, cpu_engine); + auto data = Allocator::malloc(dst_d, type2size(src[0]->type()), MPOOL_CONCAT_FWD); + Tensor *dst_tensor = new Tensor(dst_d.size(), dst_d, data, + (mkldnn_memory_format_t)concat_forward->dst_fmt_, + src[0]->type()); + + // do forward + concat_forward->execute(src_reorder, dst_tensor->data()); + + //FIXME here may cause performance issue + for (int i = 0; i < src_reorder.size(); i++) { + if (src_reorder[i] != src_datas[i]) { + // means reorder happen + delete static_cast(src_reorder[i]); + } + } + + return dst_tensor; +} + + +template +std::vector Concat::Backward( + Tensor *diff_dst, + std::vector offsets, + int axis) +{ + // + assert (offsets.size() > 0); + + std::vector gxs; + std::vector gxs_data; + + mkldnn::memory::format expected_dst_fmt; // expected format + void *diff_dst_data = NULL; + void *diff_dst_reorder = NULL; + + // get diff src fmts + // offset store the offsets of concat + // Example + // inputs: [2, 2, 3, 3], [2, 3, 3, 3], [2, 1, 3, 3], [2, 1, 3, 3] + // outputs: [2, 7, 3, 3] + // offsets: [2, 5, 6] + std::vector diff_src_d; + mkldnn::memory::dims diff_dst_d = diff_dst->cxx_dims(); + + // get elements + mkldnn::memory::dims tmp; + for (int i = 0; i < offsets.size(); i++) { + int axis_value = -1; + if (i == 0) + axis_value = offsets[0]; + else + axis_value = offsets[i] - offsets[i-1]; + + for (int j = 0; j < diff_dst_d.size(); j++) { + if (j == axis) + tmp.push_back(axis_value); + else + tmp.push_back(diff_dst_d[j]); + + } + diff_src_d.push_back(tmp); + tmp.clear(); + } + + // get last element + for (int i = 0; i < diff_dst_d.size(); i++){ + if (i == axis) + tmp.push_back(diff_dst_d[axis]-offsets.back()); + else + tmp.push_back(diff_dst_d[i]); + } + diff_src_d.push_back(tmp); + tmp.clear(); + + // get a concat bwd from primitive pool + ConcatBwd *concat_backward = NULL; + concat_backward = ConcatBwdFactory::get(diff_src_d, diff_dst_d, axis); + + //check whether diff dst fmt is same + expected_dst_fmt = concat_backward->diff_dst_fmt_; + diff_dst_data = diff_dst->data(); + if (expected_dst_fmt != diff_dst->cxx_format()) { + //LOG(INFO) << "Concat diff dst fmt not match: diff_dst_fmt=" + // << diff_dst->cxx_format() << "; expected fmt = " << expected_dst_fmt; + + // From reorder factory to find one reorder + ReorderOp* reorder_diff_dst_op = ReorderFactory::get(diff_dst->cxx_dims(), diff_dst->cxx_format(), expected_dst_fmt); + diff_dst_reorder = new avx::byte[diff_dst->len()]; + reorder_diff_dst_op->execute(diff_dst_data, diff_dst_reorder); + diff_dst_data = diff_dst_reorder; + } + + // create diff src tensors to execute concat backward + assert(diff_src_d.szie() == concat_backward->diff_src_fmts_.size()); + for (int i = 0; i < diff_src_d.size(); i++) { + // Tensor *diff_src_tensor = new Tensor(diff_src_d[i], diff_dst->cxx_data_type(), concat_backward->diff_src_fmts_[i], cpu_engine); + auto data = Allocator::malloc(diff_src_d[i], type2size(diff_dst->type()), MPOOL_CONCAT_BWD); + Tensor *diff_src_tensor = new Tensor(diff_src_d[i].size(), diff_src_d[i], data, + (mkldnn_memory_format_t)concat_backward->diff_src_fmts_[i], + diff_dst->type()); + gxs.push_back(diff_src_tensor); + gxs_data.push_back(diff_src_tensor->data()); + } + + // do concat backward + concat_backward->execute(gxs_data, diff_dst_data); + + // + if (diff_dst_reorder != NULL) + delete static_cast(diff_dst_reorder); + + return gxs; +} + +template class Concat; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/primitives/conv.cc b/python/ideep4py/primitives/conv.cc new file mode 100644 index 00000000..5cc808b6 --- /dev/null +++ b/python/ideep4py/primitives/conv.cc @@ -0,0 +1,373 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "common.h" +#include "mkldnn.hpp" +#include "tensor.h" +#include "mem.h" +#include "conv.h" +#include "utils.h" +#include "conv_fwd.h" +#include "conv_bwd_data.h" +#include "conv_bwd_weights.h" +#include "prim_factory.h" +#include "reorder_op.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +Convolution2D::Convolution2D() +{ +} + +template +Convolution2D::~Convolution2D() +{ +} + +template +Tensor *Convolution2D::Forward( + Tensor *src, Tensor *weights, + Tensor *bias, + conv_param_t *cp) +{ + // sanity check + mkldnn::memory::dims src_dims = (mkldnn::memory::dims)src->dims(); + mkldnn::memory::dims w_dims = (mkldnn::memory::dims)weights->dims(); + mkldnn::memory::dims dst_dims = (mkldnn::memory::dims)cp->out_dims; + mkldnn::memory::dims b_dims; + if (bias) + b_dims = (mkldnn::memory::dims)bias->dims(); + + //sanity check for data type + //assuem all x/w/b should have same data type as T + //FIXME + //yli135: Is it possible x and w have different data type???? + assert(memory_data_type() == src->cxx_data_type()); + assert(memory_data_type() == weights->cxx_data_type()); + if (bias) + assert(memory_data_type() == bias->cxx_data_type()); + + // get a conv2d fwd from primitive pool + Convolution2DFwd *conv2d_forward = NULL; + if (bias) + conv2d_forward = Convolution2DFwdFactory::get(src_dims, w_dims, b_dims, dst_dims, + cp->dilate_y, cp->dilate_x, cp->sy, cp->sx, cp->pad_lh, cp->pad_lw, cp->pad_rh, cp->pad_rw); + else + conv2d_forward = Convolution2DFwdFactory::get(src_dims, w_dims, NONE_DIMS, dst_dims, + cp->dilate_y, cp->dilate_x, cp->sy, cp->sx, cp->pad_lh, cp->pad_lw, cp->pad_rh, cp->pad_rw); + + // FIXME: in this model, every call to conv_forward will create a new tensor, when to free??? + mkldnn::memory::format src_fmt = src->cxx_format(); // src fmt in tensor + mkldnn::memory::format w_fmt = weights->cxx_format(); // weight fmt in tensor + + void *src_tmp = src->data(); + void *w_tmp = weights->data(); + shared_ptr src_reorder; + shared_ptr w_reorder; + + // check wehther fmt is same + if (src_fmt == conv2d_forward->src_fmt_ && w_fmt == conv2d_forward->weights_fmt_) { + //LOG(INFO) << "primitive fmt matched"; + } else { + //LOG(INFO) << "fmt not match, need to reorder"; + + if (src_fmt != conv2d_forward->src_fmt_) { + //LOG(INFO) << "src_fmt=" << src_fmt <<", conv2d_forward->src_fmt_=" << conv2d_forward->src_fmt_; + // FIXME: when to free the reordered memory + ReorderOp* reorder_src_op = ReorderFactory::get(src_dims, src_fmt, conv2d_forward->src_fmt_); + src_reorder = Allocator::malloc(src->len(), MPOOL_REORDER); + //src_reorder = new avx::byte[src->len()]; + reorder_src_op->execute(src_tmp, src_reorder.get()); + src_tmp = src_reorder.get(); + } + + if (w_fmt != conv2d_forward->weights_fmt_) { + //LOG(INFO) << "weight_fmt=" << w_fmt <<", conv2d_forward->weight_fmt_=" << conv2d_forward->weights_fmt_; + // FIXME: when to free the reordered memory + ReorderOp* reorder_w_op = ReorderFactory::get(w_dims, w_fmt, conv2d_forward->weights_fmt_); + w_reorder = Allocator::malloc(weights->len(), MPOOL_REORDER); + //w_reorder = new avx::byte[weights->len()]; + reorder_w_op->execute(w_tmp, w_reorder.get()); + w_tmp = w_reorder.get(); + + + // set internal fmt back to weight tensor + weights->reset_memory( + static_cast(conv2d_forward->weights_fmt_), + w_reorder); + } + } + + // create tensor based on primitive's dst + // assume dst and src have same data type + //Tensor *dst_tensor = new Tensor(dst_dims, src->cxx_data_type(), conv2d_forward->dst_fmt_, cpu_engine); + auto data = Allocator::malloc(dst_dims, type2size(src->type()), MPOOL_CONV_FWD); + Tensor *dst_tensor = new Tensor(dst_dims.size(), dst_dims, data, + (mkldnn_memory_format_t)conv2d_forward->dst_fmt_, + src->type()); + + // do forward + if (bias) { + conv2d_forward->execute(src_tmp, w_tmp, bias->data(), dst_tensor->data()); + } else { + conv2d_forward->execute(src_tmp, w_tmp, dst_tensor->data()); + } + + return dst_tensor; +} + +/* + * gW = gy *x + */ +template +Tensor *Convolution2D::BackwardWeights( + Tensor *src, Tensor *diff_dst, + conv_param_t *cp) +{ + std::vector bwd_weight_vec; + + // sanity check + mkldnn::memory::dims src_dims = (mkldnn::memory::dims)src->dims(); + mkldnn::memory::dims diff_dst_dims = (mkldnn::memory::dims)diff_dst->dims(); + mkldnn::memory::dims diff_w_dims = (mkldnn::memory::dims)cp->out_dims; + + assert(src_dims == src->cxx_dims() && diff_dst_dims = diff_dst->cxx_dims()); + + // sanity check for data type + // FIXME + // is it possible y and w have different data type?? + assert(memory_data_type() == src->cxx_data_type()); + assert(memory_data_type() == diff_dst->cxx_data_type()); + + // get a conv2d bwd weights from primitive pool + Convolution2DBwdWeights *conv2d_bwd_weights = NULL; + conv2d_bwd_weights = Convolution2DBwdWeightsFactory::get(src_dims, diff_w_dims, NONE_DIMS, diff_dst_dims, + cp->dilate_y, cp->dilate_x, cp->sy, cp->sx, cp->pad_lh, cp->pad_lw, cp->pad_rh, cp->pad_rw); + + // create tensor based on selected primitive + mkldnn::memory::format src_fmt = src->cxx_format(); + mkldnn::memory::format diff_dst_fmt = diff_dst->cxx_format(); + + //assum dst and src have same data type + void* src_tmp = src->data(); + void* diff_dst_tmp = diff_dst->data(); + shared_ptr src_reorder; + shared_ptr diff_dst_reorder; + + //check whether fmt is same + if (src_fmt == conv2d_bwd_weights->src_fmt_ && diff_dst_fmt == conv2d_bwd_weights->diff_dst_fmt_) { + // LOG(INFO) << "primitive fmt matched"; + } else { + // LOG(INFO) << "fmt not match, need to reorder"; + + if (src_fmt != conv2d_bwd_weights->src_fmt_) { + //LOG(INFO) << "src_fmt=" << src_fmt << ", conv2d_bwd_weights->src_fmt_=" << conv2d_bwd_weights->src_fmt_; + // FIXME: when to free the reordered memory + ReorderOp* reorder_src_op = ReorderFactory::get(src_dims, src_fmt, conv2d_bwd_weights->src_fmt_); + src_reorder = Allocator::malloc(src->len(), MPOOL_REORDER); + //src_reorder = new avx::byte[src->len()]; + reorder_src_op->execute(src_tmp, src_reorder.get()); + src_tmp = src_reorder.get(); + } + if (diff_dst_fmt != conv2d_bwd_weights->diff_dst_fmt_) { + // LOG(INFO) << "diff_dst_fmt=" << diff_dst_fmt <<", conv2d_bwd_weights->diff_dst_fmt_=" << conv2d_bwd_weights->diff_dst_fmt_; + // FIXME: when to free the reordered memory + ReorderOp* reorder_diff_dst_op = ReorderFactory::get(diff_dst_dims, diff_dst_fmt, conv2d_bwd_weights->diff_dst_fmt_); + diff_dst_reorder = Allocator::malloc(diff_dst->len(), MPOOL_REORDER); + //diff_dst_reorder = new avx::byte[diff_dst->len()]; + reorder_diff_dst_op->execute(diff_dst_tmp, diff_dst_reorder.get()); + diff_dst_tmp = diff_dst_reorder.get(); + } + } + + //assum dst and src have same data type + //Tensor *diff_w_tensor = new Tensor(diff_w_dims, src->cxx_data_type(), conv2d_bwd_weights->diff_weights_fmt_, cpu_engine); + auto w_data = Allocator::malloc(diff_w_dims, type2size(src->type()), MPOOL_CONV_BWD); + Tensor *diff_w_tensor = new Tensor(diff_w_dims.size(), diff_w_dims, w_data, + (mkldnn_memory_format_t)conv2d_bwd_weights->diff_weights_fmt_, + src->type()); + + // do execute + conv2d_bwd_weights->execute(src_tmp, diff_w_tensor->data(), diff_dst_tmp); + return diff_w_tensor; +} + +template +std::vector Convolution2D::BackwardWeightsBias( + Tensor *src, Tensor *diff_dst, + conv_param_t *cp) +{ + std::vector bwd_weight_vec; + + // sanity check + mkldnn::memory::dims src_dims = (mkldnn::memory::dims)src->dims(); + mkldnn::memory::dims diff_dst_dims = (mkldnn::memory::dims)diff_dst->dims(); + mkldnn::memory::dims diff_w_dims = (mkldnn::memory::dims)cp->out_dims; + mkldnn::memory::dims diff_b_dims = {diff_w_dims[0]}; + + assert(src_dims == src->cxx_dims() && diff_dst_dims = diff_dst->cxx_dims()); + + // sanity check for data type + // FIXME + // is it possible y and w have different data type?? + assert(memory_data_type() == src->cxx_data_type()); + assert(memory_data_type() == diff_dst->cxx_data_type()); + + // get a conv2d bwd weights from primitive pool + Convolution2DBwdWeights *conv2d_bwd_weights = NULL; + conv2d_bwd_weights = Convolution2DBwdWeightsFactory::get(src_dims, diff_w_dims, diff_b_dims, diff_dst_dims, + cp->dilate_y, cp->dilate_x, cp->sy, cp->sx, cp->pad_lh, cp->pad_lw, cp->pad_rh, cp->pad_rw); + + // create tensor based on selected primitive + mkldnn::memory::format src_fmt = src->cxx_format(); + mkldnn::memory::format diff_dst_fmt = diff_dst->cxx_format(); + + //assum dst and src have same data type + void* src_tmp = src->data(); + void* diff_dst_tmp = diff_dst->data(); + shared_ptr src_reorder; + shared_ptr diff_dst_reorder; + + //check whether fmt is same + if (src_fmt == conv2d_bwd_weights->src_fmt_ && diff_dst_fmt == conv2d_bwd_weights->diff_dst_fmt_) { + // LOG(INFO) << "primitive fmt matched"; + } else { + // LOG(INFO) << "fmt not match, need to reorder"; + + if (src_fmt != conv2d_bwd_weights->src_fmt_) { + //LOG(INFO) << "src_fmt=" << src_fmt << ", conv2d_bwd_weights->src_fmt_=" << conv2d_bwd_weights->src_fmt_; + // FIXME: when to free the reordered memory + ReorderOp* reorder_src_op = ReorderFactory::get(src_dims, src_fmt, conv2d_bwd_weights->src_fmt_); + src_reorder = Allocator::malloc(src->len(), MPOOL_REORDER); + //src_reorder = new avx::byte[src->len()]; + reorder_src_op->execute(src_tmp, src_reorder.get()); + src_tmp = src_reorder.get(); + } + if (diff_dst_fmt != conv2d_bwd_weights->diff_dst_fmt_) { + // LOG(INFO) << "diff_dst_fmt=" << diff_dst_fmt <<", conv2d_bwd_weights->diff_dst_fmt_=" << conv2d_bwd_weights->diff_dst_fmt_; + // FIXME: when to free the reordered memory + ReorderOp* reorder_diff_dst_op = ReorderFactory::get(diff_dst_dims, diff_dst_fmt, conv2d_bwd_weights->diff_dst_fmt_); + diff_dst_reorder = Allocator::malloc(diff_dst->len(), MPOOL_REORDER); + //diff_dst_reorder = new avx::byte[diff_dst->len()]; + reorder_diff_dst_op->execute(diff_dst_tmp, diff_dst_reorder.get()); + diff_dst_tmp = diff_dst_reorder.get(); + } + } + + //assum dst and src have same data type + //Tensor *diff_w_tensor = new Tensor(diff_w_dims, src->cxx_data_type(), conv2d_bwd_weights->diff_weights_fmt_, cpu_engine); + auto w_data = Allocator::malloc(diff_w_dims, type2size(src->type()), MPOOL_CONV_BWD); + Tensor *diff_w_tensor = new Tensor(diff_w_dims.size(), diff_w_dims, w_data, + (mkldnn_memory_format_t)conv2d_bwd_weights->diff_weights_fmt_, + src->type()); + + auto b_data = Allocator::malloc(diff_b_dims, type2size(src->type()), MPOOL_CONV_BWD); + Tensor *diff_b_tensor = new Tensor(diff_b_dims.size(), diff_b_dims, b_data, + (mkldnn_memory_format_t)mkldnn::memory::format::x, src->type()); + + conv2d_bwd_weights->execute(src_tmp, diff_w_tensor->data(), diff_b_tensor->data(), diff_dst_tmp); + bwd_weight_vec.push_back(diff_w_tensor); + bwd_weight_vec.push_back(diff_b_tensor); + + return bwd_weight_vec; +} + +template +Tensor *Convolution2D::BackwardData( + Tensor *weights, Tensor *diff_dst, + conv_param_t *cp) +{ + //sanity check + mkldnn::memory::dims diff_src_dims = (mkldnn::memory::dims)cp->out_dims; + mkldnn::memory::dims w_dims = (mkldnn::memory::dims)weights->dims(); + mkldnn::memory::dims diff_dst_dims = (mkldnn::memory::dims)diff_dst->dims(); + assert(w_dims == weights->cxx_dims() && diff_dst_dims == diff_dst->cxx_dims()); + + // sanity check for data type + // assuem all x/w/b should have same data type as T + // FIXME + // yli135: Is it possible x and w have different data type???? + assert(memory_data_type() == weights->cxx_data_type()); + assert(memory_data_type() == diff_dst->cxx_data_type()); + + // get a conv2d bwd data from primitive pool + Convolution2DBwdData *conv2d_bwd_data = NULL; + conv2d_bwd_data = Convolution2DBwdDataFactory::get( diff_src_dims, w_dims, diff_dst_dims, + cp->dilate_y, cp->dilate_x, cp->sy, cp->sx, cp->pad_lh, cp->pad_lw, cp->pad_rh, cp->pad_rw); + + // FIXME: in this model, every call to conv_forward will create a new tensor, when to free??? + mkldnn::memory::format w_fmt = weights->cxx_format(); + mkldnn::memory::format diff_dst_fmt = diff_dst->cxx_format(); + + void* w_tmp = weights->data(); + void* diff_dst_tmp = diff_dst->data(); + shared_ptr w_reorder; + shared_ptr diff_dst_reorder; + + if (w_fmt == conv2d_bwd_data->weights_fmt_ && diff_dst_fmt == conv2d_bwd_data->diff_dst_fmt_) { + //LOG(INFO) << "conv2d bwd data primitive fmt matched"; + } else { + //LOG(INFO) << "conv2d bwd data fmt not match, need to reorder"; + + if (w_fmt != conv2d_bwd_data->weights_fmt_) { + //LOG(INFO) << "weight_fmt=" << w_fmt << ", conv2d_bwd_data->weights_fmt_="<< conv2d_bwd_data->weights_fmt_; + ReorderOp* reorder_w_op = ReorderFactory::get(w_dims, w_fmt, conv2d_bwd_data->weights_fmt_); + w_reorder = Allocator::malloc(weights->len(), MPOOL_REORDER); + //w_reorder = new avx::byte[weights->len()]; + reorder_w_op->execute(w_tmp, w_reorder.get()); + w_tmp = w_reorder.get(); + } + if (diff_dst_fmt != conv2d_bwd_data->diff_dst_fmt_) { + //LOG(INFO) << "diff_dst_fmt=" << diff_dst_fmt <<", conv2d_bwd_data->diff_dst_fmt_=" << conv2d_bwd_data->diff_dst_fmt_; + ReorderOp* reorder_diff_dst_op = ReorderFactory::get(diff_dst_dims, diff_dst_fmt, conv2d_bwd_data->diff_dst_fmt_); + diff_dst_reorder = Allocator::malloc(diff_dst->len(), MPOOL_REORDER); + //diff_dst_reorder = new avx::byte[diff_dst->len()]; + reorder_diff_dst_op->execute(diff_dst_tmp, diff_dst_reorder.get()); + diff_dst_tmp = diff_dst_reorder.get(); + } + } + + // create tensor based on selected primitive + // assume dst and src have same data type + //Tensor *diff_src_tensor = new Tensor(diff_src_dims, diff_dst->cxx_data_type(), conv2d_bwd_data->diff_src_fmt_, cpu_engine); + auto data = Allocator::malloc(diff_src_dims, type2size(diff_dst->type()), MPOOL_CONV_BWD); + Tensor *diff_src_tensor = new Tensor(diff_src_dims.size(), diff_src_dims, data, + (mkldnn_memory_format_t)conv2d_bwd_data->diff_src_fmt_, + diff_dst->type()); + + conv2d_bwd_data->execute(diff_src_tensor->data(), w_tmp, diff_dst_tmp); + + return diff_src_tensor; +} + + +template class Convolution2D; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/primitives/dropout.cc b/python/ideep4py/primitives/dropout.cc new file mode 100644 index 00000000..3a975b19 --- /dev/null +++ b/python/ideep4py/primitives/dropout.cc @@ -0,0 +1,138 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include +#include +#include + +#ifdef _OPENMP +#include +#endif + +#include "cpu_info.h" +#include "dropout.h" +#include "layer.h" +#include "mkl_vsl.h" +#include "prim_factory.h" +#include "reorder_op.h" +#include "tensor.h" + +static void bernoulli_generate(const long n, const double p, int* r) { + std::srand(std::time(0)); + const int seed = 17 + std::rand() % 4096; + +#ifdef _OPENMP + int nthr = omp_get_max_threads(); + const int threshold = nthr * OpenMpManager::getProcessorSpeedMHz() / 3; + const bool run_parallel = (omp_in_parallel() == 0) && (n >= threshold); + if (!run_parallel) { + nthr = 1; + } + +# pragma omp parallel num_threads(nthr) + { + const int ithr = omp_get_thread_num(); + const long avg_amount = (n + nthr - 1) / nthr; + const long my_offset = ithr * avg_amount; + const long my_amount = std::min(my_offset + avg_amount, n) - my_offset; +#else + { + const long my_amount = n; + const long my_offset = 0; +#endif + if (my_amount > 0) { + VSLStreamStatePtr stream; + vslNewStream(&stream, VSL_BRNG_MCG31, seed); + vslSkipAheadStream(stream, my_offset); + viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, stream, my_amount, r + my_offset, p); + vslDeleteStream(&stream); + } + } +} + +template +std::vector Dropout::Forward(Tensor* x, float ratio) { + const auto scale = 1.0 / (1.0 - ratio); + const auto x_buf = static_cast(x->data()); + const auto size = x->size(); + const auto mask = new Tensor(x->ndims(), x->dims(), x->format(), x->type()); + const auto y = new Tensor(x->ndims(), x->dims(), x->format(), x->type()); + + // Init the mask + std::unique_ptr bernouli_nums(new int[size]); + bernoulli_generate(size, 1.0 - ratio, bernouli_nums.get()); + + const auto mask_buf = static_cast(mask->data()); + const auto y_buf = static_cast(y->data()); + +#pragma omp parallel for schedule(static) + for (size_t i = 0; i < size; ++i) { + mask_buf[i] = bernouli_nums[i] * scale; + y_buf[i] = mask_buf[i] * x_buf[i]; + } + + return std::vector{mask, y}; +} + +template +Tensor* Dropout::Backward(Tensor* mask, Tensor* gy) { + assert(mask->size() == gy->size()); + + // Reorder mask if needed + auto gy_fmt = gy->cxx_format(); + auto mask_fmt = mask->cxx_format(); + void* mask_data = mask->data(); + shared_ptr mask_reorder; + + if (gy_fmt == mask_fmt) { + //LOG(INFO) << "mask fmt matched"; + } else { + // LOG(INFO) << "mask fmt not match, need to reorder"; + // LOG(INFO) << "mask_fmt=" << mask_fmt <<", gy_fmt=" << gy_fmt; + auto reorder_op = ReorderFactory::get(mask->dims(), mask_fmt, gy_fmt); + mask_reorder = Allocator::malloc(mask->len(), MPOOL_REORDER); + //mask_reorder = new avx::byte[mask->len()]; + reorder_op->execute(mask->data(), mask_reorder.get()); + mask_data = mask_reorder.get(); + } + + const auto size = mask->size(); + const auto gx = new Tensor(gy->ndims(), gy->dims(), gy->format(), gy->type()); + + //const auto mask_buf = static_cast(mask_reorder ? mask_reorder : mask->data()); + const auto mask_buf = static_cast(mask_data); + const auto gy_buf = static_cast(gy->data()); + const auto gx_buf = static_cast(gx->data()); + +#pragma omp parallel for schedule(static) + for (size_t i = 0; i < size; ++i) { + gx_buf[i] = mask_buf[i] * gy_buf[i]; + } + + return gx; +} + +template class Dropout; diff --git a/python/ideep4py/primitives/eltwise.cc b/python/ideep4py/primitives/eltwise.cc new file mode 100644 index 00000000..939fe8b9 --- /dev/null +++ b/python/ideep4py/primitives/eltwise.cc @@ -0,0 +1,116 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include +#include "layer.h" +#include "tensor.h" +#include "eltwise.h" +#include "eltwise_fwd.h" +#include "eltwise_bwd.h" +#include "prim_factory.h" +#include "reorder_op.h" + +using namespace mkldnn; + +const mkldnn::memory::dims NONE_DIMS = {}; +extern engine cpu_engine; + +template +Eltwise::Eltwise() +{ +} + +template +Eltwise::~Eltwise() +{ +} + +template +Tensor *Eltwise::Forward(Tensor *src, eltwise_algorithm_t alg_kind, T2 alpha, T2 beta) +{ + //sanity check for data type + assert(memory_data_type() == src.cxx_data_type()); + + // get a eltwise fwd from primitive pool + EltwiseFwd *eltwise_fwd = nullptr; + // FIXME: in this model, every call to eltwise_fwd will create a new tensor, when to free??? + mkldnn::memory::format src_fmt = src->cxx_format(); // src fmt in tensor + mkldnn::algorithm malg_kind = ideepy2mkldnn_eltwise_algorithm(alg_kind); + eltwise_fwd = EltwiseFwdFactory::get(src->dims(), malg_kind, src_fmt, alpha, beta); + + // create tensor based on primitive's dst + // assume dst and src have same data type + auto data = Allocator::malloc(src->dims(), type2size(src->type()), MPOOL_ELTWISE_FWD); + Tensor *dst_tensor = new Tensor(src->ndims(), src->dims(), data, + (mkldnn_memory_format_t)eltwise_fwd->dst_fmt_, + src->type()); + + // do forward + eltwise_fwd->execute(src->data(), dst_tensor->data()); + + return dst_tensor; +} + +template +Tensor *Eltwise::Backward(Tensor *src, Tensor *diff_dst, eltwise_algorithm_t alg_kind, T2 alpha, T2 beta) +{ + // sanity check for data type + assert(memory_data_type() == diff_dst->cxx_data_type()); + assert(src->ndims() == diff_dst->ndims()); + assert(src->size() == diff_dst->size()); + + // get a eltwise bwd data from primitive pool + EltwiseBwd *eltwise_bwd = nullptr; + mkldnn::algorithm malg_kind = ideepy2mkldnn_eltwise_algorithm(alg_kind); + eltwise_bwd = EltwiseBwdFactory::get(diff_dst->dims(), malg_kind, diff_dst->cxx_format(), alpha, beta); + + void *src_buf = src->data(); + + if (src->cxx_format() != diff_dst->cxx_format()) { + //LOG(INFO) << "eltwise bwd data fmt not match, need to reorder"; + //LOG(INFO) << "diff_dst_fmt=" << diff_dst->cxx_format() <<", src format=" << src->cxx_format(); + ReorderOp* reorder_src_op = ReorderFactory::get(src->dims(), src->cxx_format(), diff_dst->cxx_format()); + //src_reorder = new avx::byte[diff_dst->len()]; + auto src_reorder = Allocator::malloc(diff_dst->len(), MPOOL_REORDER); + reorder_src_op->execute(src_buf, src_reorder.get()); + src_buf = static_cast(src_reorder.get()); + } + + // create tensor based on selected primitive + // assume dst and src have same data type + auto data = Allocator::malloc(src->dims(), type2size(src->type()), MPOOL_ELTWISE_BWD); + Tensor *diff_src = new Tensor(src->ndims(), src->dims(), data, + (mkldnn_memory_format_t)eltwise_bwd->src_diff_fmt_, + src->type()); + + eltwise_bwd->execute(src_buf, diff_dst->data(), diff_src->data()); + + return diff_src; +} + +template class Eltwise; + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/primitives/linear.cc b/python/ideep4py/primitives/linear.cc new file mode 100644 index 00000000..1b7f6f94 --- /dev/null +++ b/python/ideep4py/primitives/linear.cc @@ -0,0 +1,305 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "common.h" +#include "mkldnn.hpp" +#include "linear.h" +#include "utils.h" +#include "linear_fwd.h" +#include "linear_bwd_data.h" +#include "linear_bwd_weights.h" +#include "linear_fwd_factory.h" +#include "linear_bwd_data_factory.h" +#include "linear_bwd_weights_factory.h" +#include "reorder_op.h" +#include "reorder_factory.h" +using namespace mkldnn; + +extern const mkldnn::memory::dims NONE_DIMS; +extern engine cpu_engine; + +template +Linear::Linear() +{ +} + +template +Linear::~Linear() +{ +} + + + +template +Tensor *Linear::Forward( + Tensor *src, Tensor *weights, + Tensor *bias) +{ + //sanity check + mkldnn::memory::dims src_dims = src->cxx_dims(); + mkldnn::memory::dims w_dims = weights->cxx_dims(); + mkldnn::memory::dims b_dims; + mkldnn::memory::dims dst_dims ; + if (bias) { + b_dims = bias->cxx_dims(); + assert(b_dims == bias->cxx_dims()); + } + + if (src->ndims() != weights->ndims()) { + assert(weights->ndims() == 2 && src->ndims() == 4); + w_dims = {w_dims[0], src_dims[1], src_dims[2], src_dims[3]}; + weights->reset_memory(format_2_as_4(weights->format()), w_dims); + } + dst_dims = {src_dims[0], w_dims[0]}; + + //sanity check for data type + //FIXME + //is it possible y and w have different data type? + assert(memory_data_type() == src->cxx_data_type()); + assert(memory_data_type() == weights->cxx_data_type()); + if (bias) { + assert(memory_data_type() == bias->cxx_data_type()); + } + //get a linear from primitive pool + LinearFwd *linear_forward = NULL; + if (bias) + linear_forward = LinearFwdFactory::get(src_dims, w_dims, b_dims, dst_dims); + else + linear_forward = LinearFwdFactory::get(src_dims, w_dims, NONE_DIMS, dst_dims); + //FIXME: in this model, every call to conv_forward will create a new mdarray, when to free? + mkldnn::memory::format src_fmt = src->cxx_format(); + mkldnn::memory::format w_fmt = weights->cxx_format(); + void *src_tmp = src->data(); + void *w_tmp = weights->data(); + shared_ptr src_reorder; + shared_ptr w_reorder; + //check wheter format is match + if(src_fmt == linear_forward->src_fmt_ && w_fmt == linear_forward->weights_fmt_) { + //LOG(INFO) << "primitive fmt matched"; + } else { + //LOG(INFO) << "format not matched, need to do reorder"; + if (src_fmt != linear_forward->src_fmt_) { + //LOG(INFO) << "src_fmt" << src_fmt << ", linear_forward->src_fmt_" << linear_forward->src_fmt_; + ReorderOp* reorder_src_op = ReorderFactory::get(src_dims, src_fmt, linear_forward->src_fmt_); + src_reorder = Allocator::malloc(src->len(), MPOOL_REORDER); + //src_reorder = new avx::byte[src->len()]; + reorder_src_op->execute(src_tmp, src_reorder.get()); + src_tmp = src_reorder.get(); + } + if (w_fmt != linear_forward->weights_fmt_) { + //LOG(INFO) << "weight_fmt = "<< w_fmt << ", linear_forward->weights_fmt_=" << linear_forward->weights_fmt_; + //FIXME: when to free the reordered memory + ReorderOp* reorder_w_op = ReorderFactory::get(w_dims, w_fmt, linear_forward->weights_fmt_); + w_reorder = Allocator::malloc(weights->len(), MPOOL_REORDER); + //w_reorder = new avx::byte[weights->len()]; + reorder_w_op->execute(w_tmp, w_reorder.get()); + w_tmp = w_reorder.get(); + //set internal fmt back to weight tensor + weights->reset_memory( + static_cast(linear_forward->weights_fmt_), + w_reorder); + } + } + //create mdarray based on primitive's dst + //Tensor *dst_tensor = new Tensor(dst_dims, src->cxx_data_type(), linear_forward->dst_fmt_, cpu_engine); + auto data = Allocator::malloc(dst_dims, type2size(src->type()), MPOOL_IP_FWD); + Tensor *dst_tensor = new Tensor(dst_dims.size(), dst_dims, data, + (mkldnn_memory_format_t)linear_forward->dst_fmt_, + src->type()); + // do forward + if (bias) { + linear_forward->execute(src_tmp, w_tmp, bias->data(), dst_tensor->data()); + } else { + linear_forward->execute(src_tmp, w_tmp, dst_tensor->data()); + } + + return dst_tensor; +} + +/* + * gW = gy * x + */ +template +std::vector Linear::BackwardWeights( + Tensor *src, Tensor* diff_dst, bool need_bias) +{ + std::vector bwd_weight_vec; + mkldnn::memory::dims src_dims = src->cxx_dims(); + mkldnn::memory::dims diff_dst_dims = diff_dst->cxx_dims(); + mkldnn::memory::dims diff_w_dims; + mkldnn::memory::dims diff_b_dims; + diff_w_dims = {diff_dst_dims[1], src_dims[1]}; + /* + if (src->ndims() == 4) { + diff_w_dims = {diff_dst_dims[1], src_dims[1], src_dims[2], src_dims[3]}; + } else if (src->ndims() == 2){ + diff_w_dims = {diff_dst_dims[1], src_dims[1]}; + } else { + LOG(INFO) << "Error:: src only support 2 dims or 4 dims"; + }*/ + if (need_bias) + diff_b_dims = {diff_w_dims[0]}; + // sanity check for data type + // FIXME + // is it possible y and w ave different data type? + assert(memory_data_type() == src->cxx_data_type()); + assert(memory_data_type() == diff_dst->cxx_data_type()); + //get a linear bwd weights from primitive pool + LinearBwdWeights *linear_bwd_weights = NULL; + if (need_bias) { + linear_bwd_weights = LinearBwdWeightsFactory::get(src_dims, diff_w_dims, diff_b_dims, diff_dst_dims); + } else { + linear_bwd_weights = LinearBwdWeightsFactory::get(src_dims, diff_w_dims, NONE_DIMS, diff_dst_dims); + } + //create tensor based on selected primitive + mkldnn::memory::format src_fmt = src->cxx_format(); + mkldnn::memory::format diff_dst_fmt = diff_dst->cxx_format(); + //assum dst and src have same data type + void* src_tmp = src->data(); + void* diff_dst_tmp = diff_dst->data(); + shared_ptr src_reorder; + shared_ptr diff_dst_reorder; + //check whether fmt is same + if (src_fmt == linear_bwd_weights->src_fmt_ && diff_dst_fmt == linear_bwd_weights->diff_dst_fmt_) { + //LOG(INFO) << "primitive fmt matched"; + } else { + //LOG(INFO) << "fmt not match, need to reorder"; + if (src_fmt != linear_bwd_weights->src_fmt_) { + // LOG(INFO) << "src_fmt = " << src_fmt << ", linear_bwd_weights->src_fmt_=" << linear_bwd_weights->src_fmt_; + ReorderOp* reorder_src_op = ReorderFactory::get(src_dims, src_fmt, linear_bwd_weights->src_fmt_); + src_reorder = Allocator::malloc(src->len(), MPOOL_REORDER); + //src_reorder = new avx::byte[src->len()]; + reorder_src_op->execute(src_tmp, src_reorder.get()); + src_tmp = src_reorder.get(); + } + if (diff_dst_fmt != linear_bwd_weights->diff_dst_fmt_) { + //LOG(INFO) << "diff_dst_fmt = " << diff_dst_fmt << ", linear_bwd_weights->diff_dst_fmt = " << linear_bwd_weights->diff_dst_fmt_; + //FIXME when to free the reordered memory + ReorderOp* reorder_diff_dst_op = ReorderFactory::get(diff_dst_dims, diff_dst_fmt, linear_bwd_weights->diff_dst_fmt_); + diff_dst_reorder = Allocator::malloc(diff_dst->len(), MPOOL_REORDER); + //diff_dst_reorder = new avx::byte[diff_dst->len()]; + reorder_diff_dst_op->execute(diff_dst_tmp, diff_dst_reorder.get()); + diff_dst_tmp = diff_dst_reorder.get(); + } + } + //assume dst and src have the same data type + //Tensor *diff_w_tensor = new Tensor(diff_w_dims, src->cxx_data_type(), linear_bwd_weights->diff_weights_fmt_, cpu_engine); + auto w_data = Allocator::malloc(diff_w_dims, type2size(src->type()), MPOOL_IP_BWD); + Tensor *diff_w_tensor = new Tensor(diff_w_dims.size(), diff_w_dims, w_data, + (mkldnn_memory_format_t)linear_bwd_weights->diff_weights_fmt_, + src->type()); + //do execute + if (need_bias) { + //assume bias's format is always mkldnn::memory::format::x + //Tensor *diff_b_tensor = new Tensor(diff_b_dims, src->cxx_data_type(), mkldnn::memory::format::x, cpu_engine); + auto b_data = Allocator::malloc(diff_b_dims, type2size(src->type()), MPOOL_IP_BWD); + Tensor *diff_b_tensor = new Tensor(diff_b_dims.size(), diff_b_dims, b_data, + (mkldnn_memory_format_t)mkldnn::memory::format::x, src->type()); + linear_bwd_weights->execute(src_tmp, diff_w_tensor->data(), diff_b_tensor->data(), diff_dst_tmp); + bwd_weight_vec.push_back(diff_w_tensor); + bwd_weight_vec.push_back(diff_b_tensor); + } else { + linear_bwd_weights->execute(src_tmp, diff_w_tensor->data(), diff_dst_tmp); + bwd_weight_vec.push_back(diff_w_tensor); + } + + return bwd_weight_vec; +} + +template +Tensor *Linear::BackwardData( + Tensor *weights, Tensor *diff_dst) +{ + //sanity check + mkldnn::memory::dims w_dims = weights->cxx_dims(); + mkldnn::memory::dims diff_dst_dims = diff_dst->cxx_dims(); + mkldnn::memory::dims diff_src_dims; + diff_src_dims = {diff_dst_dims[0], w_dims[1]}; + /* + if (lp->src_ndims == 2) { + assert(weights->ndims() == 2); + diff_src_dims = {lp->src_d1, lp->src_d2}; + } else if (lp->src_ndims == 4) { + diff_src_dims = {lp->src_d1, lp->src_d2, lp->src_d3, lp->src_d4}; + if (weights->ndims() != 4) { + w_dims = {w_dims[0], diff_src_dims[1], diff_src_dims[2], diff_src_dims[3]}; + weights->reset_memory(format_2_as_4(weights->format()), w_dims); + } + } else { + LOG(INFO) << "Error:: src ndim not support(2 or 4 only)"; + }*/ + //sanity check for data type + //assume all a/w/b should have the same type as T + //FIXME + //is it possible x and w have different data type??? + assert(memory_data_type() == weights->cxx_data_type()); + assert(memory_data_type() == diff_dst->cxx_data_type()); + //get a linear bwd data from primitive pool + LinearBwdData *linear_bwd_data = NULL; + linear_bwd_data = LinearBwdDataFactory::get(diff_src_dims, w_dims, diff_dst_dims); + //FIXME: in this model, every call to linear_forward will create a new tensor, when to free?? + mkldnn::memory::format w_fmt = weights->cxx_format(); + mkldnn::memory::format diff_dst_fmt = diff_dst->cxx_format(); + + void* w_tmp = weights->data(); + void* diff_dst_tmp = diff_dst->data(); + shared_ptr w_reorder; + shared_ptr diff_dst_reorder; + + if (w_fmt == linear_bwd_data->weights_fmt_ && diff_dst_fmt == linear_bwd_data->diff_dst_fmt_) { + //LOG(INFO) << "linear bwd data primitive fmt matched"; + } else { + //LOG(INFO) << "linear bwd data fmt not match, need to reorder"; + if (w_fmt != linear_bwd_data->weights_fmt_) { + // LOG(INFO) << "weights_fmt_ = " << w_fmt << ", linear_bwd_data->weights_fmt_ = " << linear_bwd_data->weights_fmt_; + ReorderOp* reorder_w_op = ReorderFactory::get(w_dims, w_fmt, linear_bwd_data->weights_fmt_); + w_reorder = Allocator::malloc(weights->len(), MPOOL_REORDER); + //w_reorder = new avx::byte[weights->len()]; + reorder_w_op->execute(w_tmp, w_reorder.get()); + w_tmp = w_reorder.get(); + } + if (diff_dst_fmt != linear_bwd_data->diff_dst_fmt_) { + //LOG(INFO) << "diff_dst_fmt = " << diff_dst_fmt << ", linear_bwd_data->diff_dst_fmt = " << linear_bwd_data->diff_dst_fmt_; + ReorderOp* reorder_diff_dst_op = ReorderFactory::get(diff_dst_dims, diff_dst_fmt, linear_bwd_data->diff_dst_fmt_); + diff_dst_reorder = Allocator::malloc(diff_dst->len(), MPOOL_REORDER); + //diff_dst_reorder = new avx::byte[diff_dst->len()]; + reorder_diff_dst_op->execute(diff_dst_tmp, diff_dst_reorder.get()); + diff_dst_tmp = diff_dst_reorder.get(); + } + } + //create tensor based on selected primitive + //assume dst and src have the same data type + //Tensor* diff_src_tensor = new Tensor(diff_src_dims, diff_dst->cxx_data_type(), linear_bwd_data->diff_src_fmt_, cpu_engine); + auto data = Allocator::malloc(diff_src_dims, type2size(diff_dst->type()), MPOOL_IP_BWD); + Tensor *diff_src_tensor = new Tensor(diff_src_dims.size(), diff_src_dims, data, + (mkldnn_memory_format_t)linear_bwd_data->diff_src_fmt_, + diff_dst->type()); + linear_bwd_data->execute(diff_src_tensor->data(), w_tmp, diff_dst_tmp); + + return diff_src_tensor; +} +template class Linear; + diff --git a/python/ideep4py/primitives/lrn.cc b/python/ideep4py/primitives/lrn.cc new file mode 100755 index 00000000..a35738e4 --- /dev/null +++ b/python/ideep4py/primitives/lrn.cc @@ -0,0 +1,190 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "common.h" +#include "mkldnn.hpp" +#include "tensor.h" +#include "mem.h" +#include "lrn.h" +#include "utils.h" +#include "lrn_fwd.h" +#include "lrn_bwd.h" +#include "prim_factory.h" +#include "reorder_op.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +LocalResponseNormalization::LocalResponseNormalization() +{ +} + +template +LocalResponseNormalization::~LocalResponseNormalization() +{ +} + +template +std::vector LocalResponseNormalization::Forward( + Tensor *src, lrn_param_t* pp) +{ + //sanity check for data type + assert(memory_data_type() == src.cxx_data_type()); + + // get a conv2d fwd from primitive pool + mkldnn::memory::format src_fmt = src->cxx_format(); // src fmt in tensor + LocalResponseNormalizationFwd *lrn_forward = NULL; + lrn_forward = LocalResponseNormalizationFwdFactory::get( + src->dims(), src_fmt, + pp->n, pp->k, + pp->alpha, pp->beta, + lrn_algo_convert(pp->algo_kind)); + + // mkldnn::memory::format src_fmt = src->cxx_format(); // src fmt in tensor + + void *src_tmp = src->data(); + shared_ptr src_reorder; + + // check wehther fmt is same + if (src_fmt == lrn_forward->src_fmt_) { + //LOG(INFO) << "lrn forward fmt matched"; + } else { + //LOG(INFO) << "lrn fwd fmt not match, need to reorder"; + // LOG(INFO) << "src_fmt=" << src_fmt <<", lrn_forward->src_fmt_=" << lrn_forward->src_fmt_; + // FIXME: when to free the reordered memory + ReorderOp* reorder_src_op = ReorderFactory::get(src->dims(), src_fmt, lrn_forward->src_fmt_); + src_reorder = Allocator::malloc(src->len(), MPOOL_REORDER); + //src_reorder = new avx::byte[src->len()]; + reorder_src_op->execute(src_tmp, src_reorder.get()); + src_tmp = src_reorder.get(); + } + + // create tensor based on primitive's dst + // assume dst and src have same data type + //Tensor *dst_tensor = new Tensor(src->dims(), src->cxx_data_type(), lrn_forward->dst_fmt_, cpu_engine); + auto data = Allocator::malloc(src->dims(), type2size(src->type()), MPOOL_LRN_FWD); + Tensor *dst_tensor = new Tensor(src->ndims(), src->dims(), data, + (mkldnn_memory_format_t)lrn_forward->dst_fmt_, + src->type()); + + // do forward + // to return workspace + // LOG(INFO) << "ws_dt_=" << lrn_forward->ws_dt_; + // workspace must be int tensor + //Tensor *ws_tensor = new Tensor((lrn_forward->ws_dims_), lrn_forward->ws_dt_, lrn_forward->ws_fmt_, cpu_engine); + auto ws_data = Allocator::malloc(lrn_forward->ws_size_, MPOOL_LRN_FWD); + Tensor *ws_tensor = new Tensor(lrn_forward->ws_dims_, + static_cast(lrn_forward->ws_dt_), + lrn_forward->ws_fmt_, ws_data); + + lrn_forward->execute(src_tmp, dst_tensor->data(), ws_tensor->data()); + std::vector outputs; + outputs.push_back(dst_tensor); + outputs.push_back(ws_tensor); + + //LOG(INFO) << "Succ exec lrn forward"; + return outputs; +} + +template +Tensor *LocalResponseNormalization::Backward( + Tensor *src, Tensor *diff_dst, Tensor *ws, lrn_param_t* pp) +{ + //sanity check + assert(src->ndims() == diff_dst->ndims()); + assert(src->size() == diff_dst->size()); + assert(memory_data_type() == diff_dst->cxx_data_type()); + + mkldnn::memory::dims ws_dims; + mkldnn::memory::data_type ws_dt; + ws_dims = ws->cxx_dims(); + ws_dt = ws->cxx_data_type(); + + // get a conv2d bwd data from primitive pool + LocalResponseNormalizationBwd *lrn_bwd = NULL; + lrn_bwd = LocalResponseNormalizationBwdFactory::get(src->dims(), diff_dst->dims(), ws_dims, ws_dt, + pp->n, pp->k, pp->alpha, pp->beta, lrn_algo_convert(pp->algo_kind)); + + // FIXME: in this model, every call to conv_forward will create a new tensor, when to free??? + shared_ptr ws_reorder; + mkldnn::memory::format ws_fmt = ws->cxx_format(); + void* ws_tmp = ws->data(); + assert(ws_tmp == NULL); + + mkldnn::memory::format diff_dst_fmt = diff_dst->cxx_format(); + void* diff_dst_tmp = diff_dst->data(); + shared_ptr diff_dst_reorder; + + if (ws_fmt != lrn_bwd->ws_fmt_) { + //LOG(INFO) << "lrn bwd data ws fmt not match, need to reorder"; + //LOG(INFO) << "ws_fmt=" << ws_fmt << ", lrn_bwd->ws_fmt_="<< lrn_bwd->ws_fmt_; + ReorderOp* reorder_ws_op = ReorderFactory::get(ws_dims, ws_fmt, lrn_bwd->ws_fmt_); + ws_reorder = Allocator::malloc(ws->len(), MPOOL_REORDER); + //ws_reorder = new avx::byte[ws->len()]; + reorder_ws_op->execute(ws_tmp, ws_reorder.get()); + ws_tmp = ws_reorder.get(); + } + if (diff_dst_fmt != lrn_bwd->diff_dst_fmt_) { + //LOG(INFO) << "lrn bwd data diff dst fmt not match, need to reorder"; + //LOG(INFO) << "diff_dst_fmt=" << diff_dst_fmt <<", lrn_bwd->diff_dst_fmt_=" << lrn_bwd->diff_dst_fmt_; + ReorderOp* reorder_diff_dst_op = ReorderFactory::get(diff_dst->dims(), diff_dst_fmt, lrn_bwd->diff_dst_fmt_); + diff_dst_reorder = Allocator::malloc(diff_dst->len(), MPOOL_REORDER); + //diff_dst_reorder = new avx::byte[diff_dst->len()]; + reorder_diff_dst_op->execute(diff_dst_tmp, diff_dst_reorder.get()); + diff_dst_tmp = diff_dst_reorder.get(); + } + void *src_buf = src->data(); + shared_ptr src_reorder; + if (src->cxx_format() != diff_dst->cxx_format()) { + //LOG(INFO) << "lrn bwd data src fmt not match, need to reorder"; + // LOG(INFO) << "diff_dst_fmt=" << diff_dst->cxx_format() <<", src format=" << src->cxx_format(); + ReorderOp* reorder_src_op = ReorderFactory::get(src->dims(), src->cxx_format(), diff_dst->cxx_format()); + //src_reorder = new avx::byte[diff_dst->len()]; + src_reorder = Allocator::malloc(src->len(), MPOOL_REORDER); + reorder_src_op->execute(src_buf, src_reorder.get()); + src_buf = src_reorder.get(); + } + + // create tensor based on selected primitive + // assume dst and src have same data type + //Tensor *diff_src_tensor = new Tensor(src->dims(), diff_dst->cxx_data_type(), lrn_bwd->diff_src_fmt_, cpu_engine); + auto data = Allocator::malloc(src->dims(), type2size(src->type()), MPOOL_LRN_BWD); + Tensor *diff_src_tensor = new Tensor(src->ndims(), src->dims(), data, + (mkldnn_memory_format_t)lrn_bwd->diff_src_fmt_, + src->type()); + + lrn_bwd->execute(src_buf, diff_src_tensor->data(), diff_dst_tmp, ws_tmp); + + return diff_src_tensor; +} + + +template class LocalResponseNormalization; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/primitives/ops/bn_bwd.cc b/python/ideep4py/primitives/ops/bn_bwd.cc new file mode 100644 index 00000000..70dc8fb0 --- /dev/null +++ b/python/ideep4py/primitives/ops/bn_bwd.cc @@ -0,0 +1,118 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include "mkldnn.hpp" +#include "bn_bwd.h" +#include "utils.h" +#include "common.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +void batch_normalization_bwd::setup(mkldnn::memory::dims src_d, + mkldnn::memory::dims diff_dst_d, + float eps, bool scale_shift) { + flags_ |= scale_shift ? use_scale_shift : 0; + + // memory desc + auto src_md = memory::desc({src_d}, memory_data_type(), + get_desired_format(bn_size_)); + auto diff_dst_md = memory::desc({diff_dst_d}, memory_data_type(), + get_desired_format(bn_size_)); + + // fwd desc & primitive desc + auto fwd_desc = batch_normalization_forward::desc(prop_kind::forward_training, src_md, eps, flags_); + auto fwd_pd = batch_normalization_forward::primitive_desc(fwd_desc, cpu_engine); + + // bwd desc & primitive desc + auto bwd_desc = batch_normalization_backward::desc( + scale_shift ? prop_kind::backward : prop_kind::backward_data, + diff_dst_md, src_md, eps, flags_); + auto bwd_pd = batch_normalization_backward::primitive_desc( + bwd_desc, cpu_engine, fwd_pd); + + // memory primitive + src_mem_.reset(new memory({src_md, cpu_engine}, dummy)); + diff_dst_mem_.reset(new memory({diff_dst_md, cpu_engine}, dummy)); + mean_mem_.reset(new memory(bwd_pd.mean_primitive_desc(), dummy)); + var_mem_.reset(new memory(bwd_pd.variance_primitive_desc(), dummy)); + diff_src_mem_.reset(new memory({src_md, cpu_engine}, dummy)); + + // bn bwd primitive + if ((flags_ & use_scale_shift) && mkldnn_use_scaleshift) { + w_mem_.reset(new memory(bwd_pd.weights_primitive_desc(), dummy)); + diff_w_mem_.reset(new memory(bwd_pd.diff_weights_primitive_desc(), dummy)); + + bn_bwd_.reset(new batch_normalization_backward(bwd_pd, *src_mem_, *mean_mem_, + *var_mem_, *diff_dst_mem_, *w_mem_, *diff_src_mem_, *diff_w_mem_)); + } else { + bn_bwd_.reset(new batch_normalization_backward(bwd_pd, *src_mem_, *mean_mem_, + *var_mem_, *diff_dst_mem_, *diff_src_mem_)); + } + + bwd_primitives_.push_back(*bn_bwd_); + + return; +} + +template +void batch_normalization_bwd::execute(void *src, void *diff_dst, + void *mean, void *var, + void *w, void *diff_src, + void *diff_w) { + // couple with buffer + src_mem_->set_data_handle(src); + diff_dst_mem_->set_data_handle(diff_dst); + mean_mem_->set_data_handle(mean); + var_mem_->set_data_handle(var); + + if (flags_ & use_scale_shift) { + w_mem_->set_data_handle(w); + diff_w_mem_->set_data_handle(diff_w); + } + + diff_src_mem_->set_data_handle(diff_src); + + // exec + bwd_stream_->submit(bwd_primitives_); + + // decouple + src_mem_->set_data_handle(dummy); + diff_dst_mem_->set_data_handle(dummy); + mean_mem_->set_data_handle(dummy); + var_mem_->set_data_handle(dummy); + + if (flags_ & use_scale_shift) { + w_mem_->set_data_handle(dummy); + diff_w_mem_->set_data_handle(dummy); + } + + diff_src_mem_->set_data_handle(dummy); + + return; +} + +template class batch_normalization_bwd; diff --git a/python/ideep4py/primitives/ops/bn_fwd.cc b/python/ideep4py/primitives/ops/bn_fwd.cc new file mode 100644 index 00000000..b94fafcb --- /dev/null +++ b/python/ideep4py/primitives/ops/bn_fwd.cc @@ -0,0 +1,134 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include "mkldnn.hpp" +#include "bn_fwd.h" +#include "utils.h" +#include "common.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +void batch_normalization_fwd::setup(mkldnn::memory::dims src_d, + float eps, bool scale_shift, + bool global_stats, bool training) { + + flags_ |= scale_shift ? use_scale_shift : 0; + flags_ |= global_stats ? use_global_stats : 0; + + pkind_ = training ? + prop_kind::forward_training : + prop_kind::forward_scoring; + + // memory desc + auto src_md = memory::desc({src_d}, memory_data_type(), get_desired_format(src_d[1])); + + // fwd desc & primitive desc + auto fwd_desc = batch_normalization_forward::desc(pkind_, src_md, eps, flags_); + auto fwd_pd = batch_normalization_forward::primitive_desc(fwd_desc, cpu_engine); + + // memory primitive + src_mem_.reset(new memory({src_md, cpu_engine}, dummy)); + dst_mem_.reset(new memory(fwd_pd.dst_primitive_desc(), dummy)); + + if (flags_ & use_scale_shift) + w_mem_.reset(new memory(fwd_pd.weights_primitive_desc(), dummy)); + + if (training || (flags_ & use_global_stats)) { + mean_mem_.reset(new memory(fwd_pd.mean_primitive_desc(), dummy)); + var_mem_.reset(new memory(fwd_pd.variance_primitive_desc(), dummy)); + } + + // bn fwd primitive + if (!training && !(flags_ & use_global_stats)) { + if ((flags_ & use_scale_shift) && mkldnn_use_scaleshift) { + bn_fwd_.reset(new batch_normalization_forward( + fwd_pd, *src_mem_, *w_mem_, *dst_mem_)); + } else { + bn_fwd_.reset(new batch_normalization_forward( + fwd_pd, *src_mem_, *dst_mem_)); + } + } else if (flags_ & use_global_stats) { + if ((flags_ & use_scale_shift) && mkldnn_use_scaleshift) { + bn_fwd_.reset(new batch_normalization_forward( + fwd_pd, *src_mem_, (const primitive::at)*mean_mem_, + (const primitive::at)*var_mem_, *w_mem_, *dst_mem_)); + } else { + bn_fwd_.reset(new batch_normalization_forward( + fwd_pd, *src_mem_, (const primitive::at)*mean_mem_, + (const primitive::at)*var_mem_, *dst_mem_)); + } + } else { + if ((flags_ & use_scale_shift) && mkldnn_use_scaleshift) { + bn_fwd_.reset(new batch_normalization_forward( + fwd_pd, *src_mem_, *w_mem_, *dst_mem_, *mean_mem_, *var_mem_)); + } else { + bn_fwd_.reset(new batch_normalization_forward( + fwd_pd, *src_mem_, *dst_mem_, *mean_mem_, *var_mem_)); + } + } + + fwd_primitives_.push_back(*bn_fwd_); + + return; +} + +template +void batch_normalization_fwd::execute(void *src, void *w, void *dst, + void *mean, void *var) { + // couple with buffer + src_mem_->set_data_handle(src); + dst_mem_->set_data_handle(dst); + + if (flags_ & use_scale_shift) + w_mem_->set_data_handle(w); + + if ((pkind_ == prop_kind::forward_training) || + (flags_ & use_global_stats)) { + mean_mem_->set_data_handle(mean); + var_mem_->set_data_handle(var); + } + + // exec + fwd_stream_->submit(fwd_primitives_); + + // decouple + src_mem_->set_data_handle(dummy); + dst_mem_->set_data_handle(dummy); + + if (flags_ & use_scale_shift) + w_mem_->set_data_handle(dummy); + + if ((pkind_ == prop_kind::forward_training) || + (flags_ & use_global_stats)) { + mean_mem_->set_data_handle(dummy); + var_mem_->set_data_handle(dummy); + } + + return; +} + +template class batch_normalization_fwd; diff --git a/python/ideep4py/primitives/ops/concat_bwd.cc b/python/ideep4py/primitives/ops/concat_bwd.cc new file mode 100644 index 00000000..09e31bb1 --- /dev/null +++ b/python/ideep4py/primitives/ops/concat_bwd.cc @@ -0,0 +1,137 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "mkldnn.hpp" +#include "concat_bwd.h" +#include "utils.h" +#include "common.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +ConcatBwd::ConcatBwd( std::vector diff_src_ds, + mkldnn::memory::dims diff_dst_d, + int axis) +{ + bwd_stream_.reset(new stream(stream::kind::eager)); + // create concat primitive + setup(diff_src_ds, diff_dst_d, axis); +} + +template +ConcatBwd::~ConcatBwd() +{ +} + +template +void ConcatBwd::setup( std::vector diff_src_ds, + mkldnn::memory::dims diff_dst_d, + int axis) +{ + //LOG(INFO) << "Concat backward_setup"; + + assert(diff_src_ds.size() > 0); + axis_ = axis; + + /* init the offset */ + memory::dims offsets = {0, 0, 0, 0}; + + //LOG(INFO) << "diff dst dims: [" << diff_dst_d[0] << "," << diff_dst_d[1] + // << "," << diff_dst_d[2] << "," << diff_dst_d[3] << "]"; + + //FIXME + // Currently, concat backward's diff_dst fmt is hard set, and store it + memory::format diff_dst_fmt = get_desired_format(diff_dst_d[1]); // + diff_dst_fmt_ = diff_dst_fmt; + + // create diff dst md/mpt/mem + diff_dst_mpd_.reset(new memory::primitive_desc( + {{diff_dst_d}, memory_data_type(), diff_dst_fmt}, cpu_engine)); + diff_dst_mem_.reset(new memory( + {{{diff_dst_d}, memory_data_type(), diff_dst_fmt}, cpu_engine}, dummy)); + + for (int i = 0; i < diff_src_ds.size(); i++) { + //FIXME + //Currently, concat's diff src fmt hard set as diff_dst fmt, need to pay attention in future for performance issue + memory::dims diff_src_tz = diff_src_ds[i]; + //LOG(INFO) << "diff src dims: [" << diff_src_tz[0] << "," << diff_src_tz[1] + // << "," << diff_src_tz[2] << "," << diff_src_tz[3] << "]"; + + auto diff_src_mpd = memory::primitive_desc( + {{diff_src_tz}, memory_data_type(), diff_dst_fmt}, cpu_engine); + auto diff_src_mem = memory({diff_src_mpd}, dummy); + + // store diff src fmt, same as diff dst + diff_src_fmts_.push_back(diff_dst_fmt); + + diff_src_mems_.push_back(diff_src_mem); + + // create view from gy to gxs[i] + std::shared_ptr view_pd; + view_pd.reset(new view::primitive_desc(*diff_dst_mpd_, diff_src_tz, offsets)); + // create reorder primitive from gy to gxs[i] + std::shared_ptr reorder_pd; + reorder_pd.reset(new reorder::primitive_desc(view_pd.get()->dst_primitive_desc(), diff_src_mpd)); + + std::shared_ptr reorder_prim; + reorder_prim.reset(new reorder(*reorder_pd, *diff_dst_mem_, diff_src_mems_[i])); + + bwd_primitives_.push_back(*reorder_prim); + + offsets[axis_] += diff_src_tz[axis_]; + } + + return; +} + +template +void ConcatBwd::execute(std::vector diff_src, void *diff_dst) +{ + //LOG(INFO) << "Concat backward"; + assert(diff_src.size() == diff_src_mems_.size()); + + for (int i = 0; i < diff_src_mems_.size(); i++) { + diff_src_mems_[i].set_data_handle(diff_src[i]); + } + diff_dst_mem_->set_data_handle(diff_dst); + + bwd_stream_->submit(bwd_primitives_); + + //after exec, set data handle back + for (int i = 0; i < diff_src_mems_.size(); i++) { + diff_src_mems_[i].set_data_handle(dummy); + } + diff_dst_mem_->set_data_handle(dummy); + + return; +} + +template class ConcatBwd; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/primitives/ops/concat_fwd.cc b/python/ideep4py/primitives/ops/concat_fwd.cc new file mode 100644 index 00000000..472703fa --- /dev/null +++ b/python/ideep4py/primitives/ops/concat_fwd.cc @@ -0,0 +1,131 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "mkldnn.hpp" +#include "concat_fwd.h" +#include "utils.h" +#include "common.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +ConcatFwd::ConcatFwd( std::vector src_ds, + mkldnn::memory::dims dst_d, int axis) +{ + fwd_stream_.reset(new stream(stream::kind::eager)); + // create concat primitive + if (concat_fwd_ == NULL) { + setup(src_ds, dst_d, axis); + } +} + +template +ConcatFwd::~ConcatFwd() +{ +} + +template +void ConcatFwd::setup( std::vector src_ds, + mkldnn::memory::dims dst_d, + int axis) +{ + //LOG(INFO) << "Concat forward_setup"; + + assert(src_ds.size() > 0); + axis_ = axis; + + //LOG(INFO) << "dst dims: [" << dst_d[0] << "," << dst_d[1] + //<< "," << dst_d[2] << "," << dst_d[3] << "]"; + + //FIXME + // Currently, concat's src fms is hard set + memory::format src_fmt = get_desired_format(src_ds[0][1]); // + + for (int i = 0; i < src_ds.size(); i++) { + //FIXME + //Currently, concat's src fmt hard set as nchw, need to pay attention in future for performance issue + memory::dims src_tz = src_ds[i]; + + auto src_mpd = memory::primitive_desc( + {{src_tz}, memory_data_type(), src_fmt}, cpu_engine); + auto src_mem = memory({src_mpd}, dummy); + + src_mpds_.push_back(src_mpd); + src_mems_.push_back(src_mem); + + // concat only accept mkldnn::primitive::at parameter + src_prim_at_.push_back(primitive::at(src_mem)); + + + // store src fmt + src_fmts_.push_back(src_fmt); + } + + // FIXME + // here, if set format as any, will create memory fail????? + dst_md_.reset(new memory::desc(dst_d, memory_data_type(), src_fmt)); + dst_mem_.reset(new memory({{{dst_d}, memory_data_type(), src_fmt}, cpu_engine}, dummy)); + //dst_md_.reset(new memory::desc(dst_d, memory_data_type(), mkldnn::memory::format::any)); + //dst_mem_.reset(new memory({{{dst_d}, memory_data_type(), mkldnn::memory::format::any}, cpu_engine}, dummy)); + + // create concat pd/primitive + concat_pd_.reset(new concat::primitive_desc(*dst_md_, axis_, src_mpds_)); + concat_fwd_.reset(new concat(*concat_pd_, src_prim_at_, *dst_mem_)); + + // store dst fmr + dst_fmt_ = static_cast(concat_pd_.get()->dst_primitive_desc().desc().data.format); + + return; +} + +template +void ConcatFwd::execute(std::vector src, void *dst) +{ + //LOG(INFO) << "Concat forward"; + assert(src.size() == src_mems_.size()); + + for (int i = 0; i < src_mems_.size(); i++) { + src_mems_[i].set_data_handle(src[i]); + } + dst_mem_->set_data_handle(dst); + + fwd_stream_->submit({*concat_fwd_}); + + //after exec, set data handle back + for (int i = 0; i < src_mems_.size(); i++) { + src_mems_[i].set_data_handle(dummy); + } + dst_mem_->set_data_handle(dummy); + + return; +} + +template class ConcatFwd; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/primitives/ops/conv_bwd_data.cc b/python/ideep4py/primitives/ops/conv_bwd_data.cc new file mode 100644 index 00000000..d6d79d81 --- /dev/null +++ b/python/ideep4py/primitives/ops/conv_bwd_data.cc @@ -0,0 +1,153 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "mkldnn.hpp" +#include "conv_bwd_data.h" +#include "utils.h" +#include "common.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +Convolution2DBwdData::Convolution2DBwdData( + mkldnn::memory::dims diff_src_d, + mkldnn::memory::dims w_d, + mkldnn::memory::dims diff_dst_d, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw) +{ + bwd_data_stream_.reset(new stream(stream::kind::eager)); + // create conv primitive + if (conv_bwd_data_ == NULL) { + setup(diff_src_d, w_d, diff_dst_d, + dilate_y, dilate_x, + sy, sx, + pad_lh, pad_lw, + pad_rh, pad_rw); + } +} + +template +Convolution2DBwdData::~Convolution2DBwdData() +{ +} + +template +void Convolution2DBwdData::setup( + mkldnn::memory::dims diff_src_d, + mkldnn::memory::dims w_d, + mkldnn::memory::dims diff_dst_d, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, + int pad_rh, int pad_rw) +{ + //LOG(INFO) << "Convolution backward data setup"; + assert(diff_src_d != NULL); + assert(w_d != NULL); + assert(diff_dst_d != NULL); + + dilates_ = {dilate_y, dilate_x}; + strides_ = {sy, sx}; + padding_l_ = {pad_lh, pad_lw}; + padding_r_ = {pad_rh, pad_rw}; + + //LOG(INFO) << "diff_src[0]=" << diff_src_d[0] << ", diff_src[1]=" << diff_src_d[1] << ", diff_src[2]=" << diff_src_d[2] << ", diff_src[3]=" << diff_src_d[3]; + //LOG(INFO) << "w[0]=" << w_d[0] << ", w[1]=" << w_d[1] << ", w=" << w_d[2] << ", w[3]=" << w_d[3]; + //LOG(INFO) << "diff_dst[0]=" << diff_dst_d[0] << ", diff_dst[1]=" << diff_dst_d[1] << ", diff_dst[2]=" << diff_dst_d[2] << ", diff_dst[3]=" << diff_dst_d[3]; + + //LOG(INFO) << "sy=" << sy << ", sx=" << sx; + // LOG(INFO) << "pl1=" << pad_lh << ", pl2=" << pad_lw << ", pr1=" << pad_rh << ", pr2=" << pad_rw; + + /* create memory descriptors for convolution data w/ no specified format */ + diff_src_md_.reset(new memory::desc({diff_src_d}, memory_data_type(), + memory::format::any)); + weights_md_.reset(new memory::desc({w_d}, + memory_data_type(), memory::format::any)); + diff_dst_md_.reset(new memory::desc({diff_dst_d}, memory_data_type(), + memory::format::any)); + /* create a convolution */ + bwd_data_desc_.reset(new convolution_backward_data::desc( + convolution_direct, *diff_src_md_, *weights_md_, + *diff_dst_md_, strides_, dilates_, padding_l_, padding_r_, padding_kind::zero)); + + // FIXME + // yli135: Current conv bwd need a fwd pd as hint, will remove in future + fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *diff_src_md_, *weights_md_, + *diff_dst_md_, strides_, dilates_, padding_l_, padding_r_, padding_kind::zero)); + fwd_pd_.reset(new convolution_forward::primitive_desc(*fwd_desc_, cpu_engine)); + + /* create backward conv prim desc*/ + bwd_data_pd_.reset(new convolution_backward_data::primitive_desc( + *bwd_data_desc_, cpu_engine, *fwd_pd_)); + + + //store the expected memory format + diff_src_fmt_ = static_cast(bwd_data_pd_.get()->diff_src_primitive_desc().desc().data.format); + weights_fmt_ = static_cast(bwd_data_pd_.get()->weights_primitive_desc().desc().data.format); + diff_dst_fmt_ = static_cast(bwd_data_pd_.get()->diff_dst_primitive_desc().desc().data.format); + + // create memory primitive based on dummy data + diff_src_mem_.reset(new memory(bwd_data_pd_.get()->diff_src_primitive_desc(), dummy)); + weights_mem_.reset(new memory(bwd_data_pd_.get()->weights_primitive_desc(), dummy)); + diff_dst_mem_.reset(new memory(bwd_data_pd_.get()->diff_dst_primitive_desc(), dummy)); + + /* create convolution primitive and add it to net */ + conv_bwd_data_.reset(new convolution_backward_data(*bwd_data_pd_, *diff_dst_mem_, + *weights_mem_, *diff_src_mem_)); + + bwd_data_primitives_.push_back(*conv_bwd_data_); + return; +} + +template +void Convolution2DBwdData::execute(void* diff_src, void* w, void* diff_dst) +{ +// LOG(INFO) << "Convolution forward without bias"; +// LOG(INFO) << conv_fwd_; + + diff_src_mem_->set_data_handle(diff_src); + weights_mem_->set_data_handle(w); + diff_dst_mem_->set_data_handle(diff_dst); + //conv_fwd_->execute(); + bwd_data_stream_->submit(bwd_data_primitives_); + + //set back data handke + diff_src_mem_->set_data_handle(dummy); + weights_mem_->set_data_handle(dummy); + diff_dst_mem_->set_data_handle(dummy); + + return; +} + +template class Convolution2DBwdData; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/primitives/ops/conv_bwd_weights.cc b/python/ideep4py/primitives/ops/conv_bwd_weights.cc new file mode 100644 index 00000000..69c00d8d --- /dev/null +++ b/python/ideep4py/primitives/ops/conv_bwd_weights.cc @@ -0,0 +1,176 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "mkldnn.hpp" +#include "conv_bwd_weights.h" +#include "utils.h" +#include "common.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +Convolution2DBwdWeights::Convolution2DBwdWeights( + mkldnn::memory::dims src_d, mkldnn::memory::dims diff_w_d, + mkldnn::memory::dims diff_b_d, mkldnn::memory::dims diff_dst_d, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw) +{ + bwd_weights_stream_.reset(new stream(stream::kind::eager)); + // create conv primitive + if (conv_bwd_weights_ == NULL) { + setup(src_d, diff_w_d, diff_b_d, diff_dst_d, + dilate_y, dilate_x, + sy, sx, + pad_lh, pad_lw, + pad_rh, pad_rw); + } +} + +template +Convolution2DBwdWeights::~Convolution2DBwdWeights() +{ +} + +template +void Convolution2DBwdWeights::setup(mkldnn::memory::dims src_d, mkldnn::memory::dims diff_w_d, + mkldnn::memory::dims diff_b_d, mkldnn::memory::dims diff_dst_d, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, + int pad_rh, int pad_rw) +{ + //LOG(INFO) << "Convolution backward_setup"; + assert(src_d != NULL); + assert(diff_w_d != NULL); + assert(diff_b_d != NULL); // no bias case, expect as NONE_DIMS, not NULL + assert(diff_dst_d != NULL); + + dilates_ = {dilate_y, dilate_x}; + strides_ = {sy, sx}; + padding_l_ = {pad_lh, pad_lw}; + padding_r_ = {pad_rh, pad_rw}; + + /* create memory descriptors for convolution data w/ no specified format */ + src_md_.reset(new memory::desc({src_d}, memory_data_type(), + memory::format::any)); + diff_weights_md_.reset(new memory::desc({diff_w_d}, + memory_data_type(), memory::format::any)); + diff_dst_md_.reset(new memory::desc({diff_dst_d}, memory_data_type(), + memory::format::any)); + if (!diff_b_d.empty()) + diff_bias_md_.reset(new memory::desc({diff_b_d}, memory_data_type(), + memory::format::any)); + /* create a convolution */ + if (!diff_b_d.empty()) { + bwd_weights_desc_.reset(new convolution_backward_weights::desc( + convolution_direct, *src_md_, *diff_weights_md_, + *diff_bias_md_, *diff_dst_md_, strides_, dilates_, padding_l_, padding_r_, padding_kind::zero)); + } else { + bwd_weights_desc_.reset(new convolution_backward_weights::desc( + convolution_direct, *src_md_, *diff_weights_md_, + *diff_dst_md_, strides_, dilates_, padding_l_, padding_r_, padding_kind::zero)); + + } + + // FIXME + // yli135: Current conv bwd need a fwd pd as hint, will remove in future + fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *src_md_, *diff_weights_md_, + *diff_dst_md_, strides_, dilates_, padding_l_, padding_r_, padding_kind::zero)); + fwd_pd_.reset(new convolution_forward::primitive_desc(*fwd_desc_, cpu_engine)); + + /* create backward conv prim desc*/ + bwd_weights_pd_.reset(new convolution_backward_weights::primitive_desc( + *bwd_weights_desc_, cpu_engine, *fwd_pd_)); + + + //store the expected memory format + src_fmt_ = static_cast(bwd_weights_pd_.get()->src_primitive_desc().desc().data.format); + diff_weights_fmt_ = static_cast(bwd_weights_pd_.get()->diff_weights_primitive_desc().desc().data.format); + diff_dst_fmt_ = static_cast(bwd_weights_pd_.get()->diff_dst_primitive_desc().desc().data.format); + + // create memory primitive based on dummy data + src_mem_.reset(new memory(bwd_weights_pd_.get()->src_primitive_desc(), dummy)); + diff_weights_mem_.reset(new memory(bwd_weights_pd_.get()->diff_weights_primitive_desc(), dummy)); + diff_dst_mem_.reset(new memory(bwd_weights_pd_.get()->diff_dst_primitive_desc(), dummy)); + + /* create convolution primitive and add it to net */ + if (!diff_b_d.empty()) { + diff_bias_mem_.reset(new memory({{{diff_b_d}, memory_data_type(), memory::format::x}, cpu_engine}, dummy)); + conv_bwd_weights_.reset(new convolution_backward_weights(*bwd_weights_pd_, *src_mem_, + *diff_dst_mem_, *diff_weights_mem_, *diff_bias_mem_)); + } else { + conv_bwd_weights_.reset(new convolution_backward_weights(*bwd_weights_pd_, *src_mem_, + *diff_dst_mem_, *diff_weights_mem_)); + } + + bwd_weights_primitives_.push_back(*conv_bwd_weights_); + return; +} + +template +void Convolution2DBwdWeights::execute(void* src, void* diff_w, void* diff_b, void* diff_dst) +{ +// LOG(INFO) << "Convolution forward"; + //LOG(INFO) << "conv_fwd_:" << conv_fwd_; + //LOG(INFO) << "x=" << x << "; x_size=" << x_d1*x_d2*x_d3*x_d4*4; + src_mem_->set_data_handle(src); + diff_weights_mem_->set_data_handle(diff_w); + diff_bias_mem_->set_data_handle(diff_b); + diff_dst_mem_->set_data_handle(diff_dst); + //conv_fwd_->execute(); + bwd_weights_stream_->submit(bwd_weights_primitives_); + src_mem_->set_data_handle(dummy); + diff_weights_mem_->set_data_handle(dummy); + diff_bias_mem_->set_data_handle(dummy); + diff_dst_mem_->set_data_handle(dummy); + return; +} + +template +void Convolution2DBwdWeights::execute(void* src, void* diff_w, void* diff_dst) +{ +// LOG(INFO) << "Convolution forward without bias"; +// LOG(INFO) << conv_fwd_; + + src_mem_->set_data_handle(src); + diff_weights_mem_->set_data_handle(diff_w); + diff_dst_mem_->set_data_handle(diff_dst); + //conv_fwd_->execute(); + bwd_weights_stream_->submit(bwd_weights_primitives_); + src_mem_->set_data_handle(dummy); + diff_weights_mem_->set_data_handle(dummy); + diff_dst_mem_->set_data_handle(dummy); + return; +} + +template class Convolution2DBwdWeights; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/primitives/ops/conv_fwd.cc b/python/ideep4py/primitives/ops/conv_fwd.cc new file mode 100644 index 00000000..9c83da3b --- /dev/null +++ b/python/ideep4py/primitives/ops/conv_fwd.cc @@ -0,0 +1,179 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "mkldnn.hpp" +#include "conv_fwd.h" +#include "utils.h" +#include "common.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +Convolution2DFwd::Convolution2DFwd( mkldnn::memory::dims src_d, mkldnn::memory::dims w_d, + mkldnn::memory::dims b_d, mkldnn::memory::dims dst_d, + int dilate_y, int dilate_x, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw) +{ + fwd_stream_.reset(new stream(stream::kind::eager)); + // create conv primitive + if (conv_fwd_ == NULL) { + setup(src_d, w_d, b_d, dst_d, + dilate_y, dilate_x, + sy, sx, + pad_lh, pad_lw, + pad_rh, pad_rw); + } +} + +template +Convolution2DFwd::~Convolution2DFwd() +{ +} + +template +void Convolution2DFwd::setup(mkldnn::memory::dims src_d, mkldnn::memory::dims w_d, + mkldnn::memory::dims b_d, mkldnn::memory::dims dst_d, + int dilate_y, int dilate_x, + int s1, int s2, + int pl1, int pl2, + int pr1, int pr2) +{ + //LOG(INFO) << "Convolution forward_setup"; + assert(src_d != NULL); + assert(w_d != NULL); + assert(bias_d != NULL); // no bias case, expect as NONE_DIMS, not NULL + assert(dst_d != NULL); + + dilates_ = {dilate_y, dilate_x}; + strides_ = {s1, s2}; + padding_l_ = {pl1, pl2}; + padding_r_ = {pr1, pr2}; + + //LOG(INFO) << "src_d1=" << src_d[0] << ", src_d2=" << src_d[1] << "; src_d3=" << src_d[2] << ", src_d4=" << src_d[3]; + //LOG(INFO) << "w_d1=" << w_d[0] << ", w_d2=" << w_d[1] << "; w_d3=" << w_d[2] << ", w_d4=" << w_d[3]; + //LOG(INFO) << "dst_d1=" << dst_d[0] << ", dst_d2=" << dst_d[1] << "; dst_d3=" << dst_d[2] << ", dst_d4=" << dst_d[3]; + //LOG(INFO) << "dialte_y=" << dilate_y << ", dilate_x=" << dilate_x; + //LOG(INFO) << "sy=" << s1 << ", sx=" << s2; + //LOG(INFO) << "pl1=" << pl1 << ", pl2=" << pl2 << ", pr1=" << pr1 << ", pr2=" << pr2; + + /* create memory descriptors for convolution data w/ no specified format */ + src_md_.reset(new memory::desc({src_d}, memory_data_type(), + memory::format::any)); + weights_md_.reset(new memory::desc({w_d}, + memory_data_type(), memory::format::any)); + dst_md_.reset(new memory::desc({dst_d}, memory_data_type(), + memory::format::any)); + if (!b_d.empty()) + bias_md_.reset(new memory::desc({b_d}, memory_data_type(), + memory::format::any)); + /* create a convolution */ + if (!b_d.empty()) { + fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *src_md_, *weights_md_, *bias_md_, + *dst_md_, strides_, dilates_, padding_l_, padding_r_, + padding_kind::zero)); + } else { + fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *src_md_, *weights_md_, + *dst_md_, strides_, dilates_, padding_l_, padding_r_, + padding_kind::zero)); + } + + fwd_pd_.reset(new convolution_forward::primitive_desc(*fwd_desc_, cpu_engine)); + + //store the expected memory format + src_fmt_ = static_cast(fwd_pd_.get()->src_primitive_desc().desc().data.format); + weights_fmt_ = static_cast(fwd_pd_.get()->weights_primitive_desc().desc().data.format); + dst_fmt_ = static_cast(fwd_pd_.get()->dst_primitive_desc().desc().data.format); + + // create memory primitive based on dummy data + src_mem_.reset(new memory(fwd_pd_.get()->src_primitive_desc(), dummy)); + weights_mem_.reset(new memory(fwd_pd_.get()->weights_primitive_desc(), dummy)); + dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), dummy)); + + /* create convolution primitive and add it to net */ + if (!b_d.empty()) { + bias_mem_.reset(new memory({{{b_d}, memory_data_type(), memory::format::x}, cpu_engine}, dummy)); + conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, + *weights_mem_, *bias_mem_, *dst_mem_)); + } else { + conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, + *weights_mem_, *dst_mem_)); + } + + fwd_primitives_.push_back(*conv_fwd_); + return; +} + +template +void Convolution2DFwd::execute(void* src, void* w, void* b, void* dst) +{ + //LOG(INFO) << "Convolution forward"; + //LOG(INFO) << "conv_fwd_:" << conv_fwd_; + //LOG(INFO) << "x=" << x << "; x_size=" << x_d1*x_d2*x_d3*x_d4*4; + src_mem_->set_data_handle(src); + weights_mem_->set_data_handle(w); + bias_mem_->set_data_handle(b); + dst_mem_->set_data_handle(dst); + //conv_fwd_->execute(); + fwd_stream_->submit(fwd_primitives_); + + //after exec, set data handle back + src_mem_->set_data_handle(dummy); + weights_mem_->set_data_handle(dummy); + bias_mem_->set_data_handle(dummy); + dst_mem_->set_data_handle(dummy); + + return; +} + +template +void Convolution2DFwd::execute(void* src, void* w, void* dst) +{ + //LOG(INFO) << "Convolution forward without bias"; +// LOG(INFO) << conv_fwd_; + + src_mem_->set_data_handle(src); + weights_mem_->set_data_handle(w); + dst_mem_->set_data_handle(dst); + //conv_fwd_->execute(); + fwd_stream_->submit(fwd_primitives_); + + //after exec, set data handle back + src_mem_->set_data_handle(dummy); + weights_mem_->set_data_handle(dummy); + dst_mem_->set_data_handle(dummy); + + return; +} + +template class Convolution2DFwd; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/primitives/ops/eltwise_bwd.cc b/python/ideep4py/primitives/ops/eltwise_bwd.cc new file mode 100644 index 00000000..f15df8eb --- /dev/null +++ b/python/ideep4py/primitives/ops/eltwise_bwd.cc @@ -0,0 +1,110 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "mkldnn.hpp" +#include "eltwise_bwd.h" +#include "utils.h" +#include "common.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +EltwiseBwd::EltwiseBwd(mkldnn::memory::dims src_d, mkldnn::algorithm alg_kind, mkldnn::memory::format dst_diff_fmt, T2 alpha, T2 beta) +{ + bwd_stream_.reset(new stream(stream::kind::eager)); + // create eltwise primitive + if (eltwise_bwd_ == nullptr) { + setup(src_d, alg_kind, dst_diff_fmt, alpha, beta); + } +} + +template +EltwiseBwd::~EltwiseBwd() +{ +} + +template +void EltwiseBwd::setup(mkldnn::memory::dims src_d, mkldnn::algorithm alg_kind, mkldnn::memory::format dst_diff_fmt, T2 alpha, T2 beta) +{ + //LOG(INFO) << "Eltwise backward_setup"; + assert(src_d != nullptr); + + /* create memory descriptors for eltwise data w/ no specified format */ + src_md_.reset(new memory::desc({src_d}, memory_data_type(), + dst_diff_fmt)); + dst_diff_md_.reset(new memory::desc({src_d}, memory_data_type(), + dst_diff_fmt)); + src_mpd_.reset(new memory::primitive_desc(*src_md_, cpu_engine)); + dst_diff_mpd_.reset(new memory::primitive_desc(*dst_diff_md_, cpu_engine)); + /* create a eltwise*/ + fwd_desc_.reset(new eltwise_forward::desc(prop_kind::forward, alg_kind, + *src_md_, alpha, beta)); + fwd_pd_.reset(new eltwise_forward::primitive_desc(*fwd_desc_, cpu_engine)); + + bwd_desc_.reset(new eltwise_backward::desc(alg_kind, + *dst_diff_md_, *src_md_, alpha, beta)); + + bwd_pd_.reset(new eltwise_backward::primitive_desc(*bwd_desc_, cpu_engine, *fwd_pd_)); + + //store the expected memory format + src_diff_fmt_ = static_cast(bwd_pd_.get()->diff_src_primitive_desc().desc().data.format); + + // create memory primitive based on dummy data + src_mem_.reset(new memory(*src_mpd_, dummy)); + dst_diff_mem_.reset(new memory(*dst_diff_mpd_, dummy)); + src_diff_mem_.reset(new memory(bwd_pd_.get()->diff_src_primitive_desc(), dummy)); + + /* create eltwise primitive and add it to net */ + eltwise_bwd_.reset(new eltwise_backward(*bwd_pd_, *src_mem_, *dst_diff_mem_, *src_diff_mem_)); + + bwd_primitives_.push_back(*eltwise_bwd_); + return; +} + +template +void EltwiseBwd::execute(void* src, void* dst_diff, void* src_diff) +{ + //LOG(INFO) << "Eltwise backward"; + + src_mem_->set_data_handle(src); + dst_diff_mem_->set_data_handle(dst_diff); + src_diff_mem_->set_data_handle(src_diff); + bwd_stream_->submit(bwd_primitives_); + + //after exec, set data handle back + src_mem_->set_data_handle(dummy); + dst_diff_mem_->set_data_handle(dummy); + src_diff_mem_->set_data_handle(dummy); + + return; +} + +template class EltwiseBwd; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/primitives/ops/eltwise_fwd.cc b/python/ideep4py/primitives/ops/eltwise_fwd.cc new file mode 100644 index 00000000..0b7431c8 --- /dev/null +++ b/python/ideep4py/primitives/ops/eltwise_fwd.cc @@ -0,0 +1,101 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "mkldnn.hpp" +#include "eltwise_fwd.h" +#include "utils.h" +#include "common.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +EltwiseFwd::EltwiseFwd(mkldnn::memory::dims src_d, mkldnn::algorithm alg_kind, mkldnn::memory::format src_fmt, T2 alpha, T2 beta) +{ + fwd_stream_.reset(new stream(stream::kind::eager)); + // create eltwise primitive + if (eltwise_fwd_ == nullptr) { + setup(src_d, alg_kind, src_fmt, alpha, beta); + } +} + +template +EltwiseFwd::~EltwiseFwd() +{ +} + +template +void EltwiseFwd::setup(mkldnn::memory::dims src_d, mkldnn::algorithm alg_kind, mkldnn::memory::format src_fmt, T2 alpha, T2 beta) +{ + //LOG(INFO) << "Eltwise forward_setup"; + assert(src_d != nullptr); + + /* create memory descriptors for eltwise data w/ no specified format */ + src_md_.reset(new memory::desc({src_d}, memory_data_type(), + src_fmt)); + src_mpd_.reset(new memory::primitive_desc(*src_md_, cpu_engine)); + /* create a eltwise*/ + fwd_desc_.reset(new eltwise_forward::desc(prop_kind::forward, alg_kind, + *src_md_, alpha, beta)); + + fwd_pd_.reset(new eltwise_forward::primitive_desc(*fwd_desc_, cpu_engine)); + + //store the expected memory format + src_fmt_ = src_fmt; + dst_fmt_ = static_cast(fwd_pd_.get()->dst_primitive_desc().desc().data.format); + + // create memory primitive based on dummy data + src_mem_.reset(new memory(*src_mpd_, dummy)); + dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), dummy)); + + /* create eltwise primitive and add it to net */ + eltwise_fwd_.reset(new eltwise_forward(*fwd_pd_, *src_mem_, *dst_mem_)); + + fwd_primitives_.push_back(*eltwise_fwd_); + return; +} + +template +void EltwiseFwd::execute(void* src, void* dst) +{ + //LOG(INFO) << "Eltwise forward"; + + src_mem_->set_data_handle(src); + dst_mem_->set_data_handle(dst); + fwd_stream_->submit(fwd_primitives_); + + //after exec, set data handle back + src_mem_->set_data_handle(dummy); + dst_mem_->set_data_handle(dummy); + + return; +} + +template class EltwiseFwd; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/primitives/ops/linear_bwd_data.cc b/python/ideep4py/primitives/ops/linear_bwd_data.cc new file mode 100644 index 00000000..018accb9 --- /dev/null +++ b/python/ideep4py/primitives/ops/linear_bwd_data.cc @@ -0,0 +1,114 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "mkldnn.hpp" +#include "linear_bwd_data.h" +#include "utils.h" +#include "common.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +LinearBwdData::LinearBwdData( + mkldnn::memory::dims diff_src_d, + mkldnn::memory::dims w_d, + mkldnn::memory::dims diff_dst_d + ) +{ + bwd_data_stream_.reset(new stream(stream::kind::eager)); + //create linear primitive + if (linear_bwd_data_ == NULL) { + setup(diff_src_d, w_d, diff_dst_d); + } +} +template +LinearBwdData::~LinearBwdData() +{ +} + +template +void LinearBwdData::setup( + mkldnn::memory::dims diff_src_d, + mkldnn::memory::dims w_d, + mkldnn::memory::dims diff_dst_d + ) +{ + assert(diff_src_d != NULL); + assert(w_d != NULL); + assert(diff_dst_d != NULL); + + diff_src_md_.reset(new memory::desc({diff_src_d}, memory_data_type(), memory::format::any)); + weights_md_.reset(new memory::desc({w_d}, memory_data_type(), memory::format::any)); + diff_dst_md_.reset(new memory::desc({diff_dst_d}, memory_data_type(), memory::format::any)); + //LOG(INFO) << "diff_src_d" << diff_src_d[0]<<", "<(bwd_data_pd_.get()->diff_src_primitive_desc().desc().data.format); + weights_fmt_ = static_cast(bwd_data_pd_.get()->weights_primitive_desc().desc().data.format); + diff_dst_fmt_ = static_cast(bwd_data_pd_.get()->diff_dst_primitive_desc().desc().data.format); + + // create memory primitive based on dummy data + diff_src_mem_.reset(new memory(bwd_data_pd_.get()->diff_src_primitive_desc(), dummy)); + weights_mem_.reset(new memory(bwd_data_pd_.get()->weights_primitive_desc(), dummy)); + diff_dst_mem_.reset(new memory(bwd_data_pd_.get()->diff_dst_primitive_desc(), dummy)); + + //create linear primitive and add it to net + linear_bwd_data_.reset(new inner_product_backward_data(*bwd_data_pd_, *diff_dst_mem_, *weights_mem_, *diff_src_mem_)); + bwd_data_primitives_.push_back(*linear_bwd_data_); + return; +} + +template +void LinearBwdData::execute(void* diff_src, void* w, void* diff_dst) +{ + //LOG(INFO) << "linear fwd without bias" + diff_src_mem_->set_data_handle(diff_src); + weights_mem_->set_data_handle(w); + diff_dst_mem_->set_data_handle(diff_dst); + //linear_bwd->execute(); + bwd_data_stream_->submit(bwd_data_primitives_); + diff_src_mem_->set_data_handle(dummy); + weights_mem_->set_data_handle(dummy); + diff_dst_mem_->set_data_handle(dummy); + return; +} + +template class LinearBwdData; + + diff --git a/python/ideep4py/primitives/ops/linear_bwd_weights.cc b/python/ideep4py/primitives/ops/linear_bwd_weights.cc new file mode 100644 index 00000000..eb97cdb2 --- /dev/null +++ b/python/ideep4py/primitives/ops/linear_bwd_weights.cc @@ -0,0 +1,138 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "mkldnn.hpp" +#include "linear_bwd_weights.h" +#include "utils.h" +#include "common.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +LinearBwdWeights::LinearBwdWeights( + mkldnn::memory::dims src_d, mkldnn::memory::dims diff_w_d, + mkldnn::memory::dims diff_b_d, mkldnn::memory::dims diff_dst_d) +{ + bwd_weights_stream_.reset(new stream(stream::kind::eager)); + //create linear primitive + if (linear_bwd_weights_ == NULL) { + setup(src_d, diff_w_d, diff_b_d, diff_dst_d); + } +} + +template +LinearBwdWeights::~LinearBwdWeights() +{ +} + +template +void LinearBwdWeights::setup(mkldnn::memory::dims src_d, mkldnn::memory::dims diff_w_d, + mkldnn::memory::dims diff_b_d, mkldnn::memory::dims diff_dst_d) +{ + assert(src_d != NULL); + assert(diff_w_d != NULL); + assert(diff_b_d != NULL); + assert(diff_dst_d != NULL); + + src_md_.reset(new memory::desc({src_d}, memory_data_type(), memory::format::any)); + diff_weights_md_.reset(new memory::desc({diff_w_d}, memory_data_type(), memory::format::any)); + diff_dst_md_.reset(new memory::desc({diff_dst_d}, memory_data_type(), memory::format::any)); + //LOG(INFO) << "src_d"<(), memory::format::any)); + bwd_weights_desc_.reset(new inner_product_backward_weights::desc(*src_md_, *diff_weights_md_, + *diff_bias_md_, *diff_dst_md_)); + } else { + bwd_weights_desc_.reset(new inner_product_backward_weights::desc(*src_md_, *diff_weights_md_, + *diff_dst_md_)); + } + + //FIXME + //jiangzho, Current linear bwd need a fwd pd as hint, will remove in future + fwd_desc_.reset(new inner_product_forward::desc(prop_kind::forward, *src_md_, + *diff_weights_md_, *diff_dst_md_)); + fwd_pd_.reset(new inner_product_forward::primitive_desc(*fwd_desc_, cpu_engine)); + bwd_weights_pd_.reset(new inner_product_backward_weights::primitive_desc(*bwd_weights_desc_, cpu_engine, *fwd_pd_)); + + //store the expected memory format + src_fmt_ = static_cast(bwd_weights_pd_.get()->src_primitive_desc().desc().data.format); + diff_weights_fmt_ = static_cast(bwd_weights_pd_.get()->diff_weights_primitive_desc().desc().data.format); + diff_dst_fmt_ = static_cast(bwd_weights_pd_.get()->diff_dst_primitive_desc().desc().data.format); + + //create linear primitive and add it to net + src_mem_.reset(new memory(bwd_weights_pd_.get()->src_primitive_desc(), dummy)); + diff_weights_mem_.reset(new memory(bwd_weights_pd_.get()->diff_weights_primitive_desc(), dummy)); + diff_dst_mem_.reset(new memory(bwd_weights_pd_.get()->diff_dst_primitive_desc(), dummy)); + //create linear primitive and add it to net + if (!diff_b_d.empty()) { + diff_bias_mem_.reset(new memory({{{diff_b_d}, memory_data_type(), memory::format::x}, cpu_engine}, dummy)); + linear_bwd_weights_.reset(new inner_product_backward_weights(*bwd_weights_pd_, *src_mem_, *diff_dst_mem_, + *diff_weights_mem_, *diff_bias_mem_)); + } else { + linear_bwd_weights_.reset(new inner_product_backward_weights(*bwd_weights_pd_, *src_mem_, *diff_dst_mem_, + *diff_weights_mem_)); + } + bwd_weights_primitives_.push_back(*linear_bwd_weights_); + return; +} + +template +void LinearBwdWeights::execute(void* src, void* diff_w, void* diff_b, void* diff_dst) +{ + //LOG(INFO) << "linear backward weights"; + src_mem_->set_data_handle(src); + diff_weights_mem_->set_data_handle(diff_w); + diff_bias_mem_->set_data_handle(diff_b); + diff_dst_mem_->set_data_handle(diff_dst); + bwd_weights_stream_->submit(bwd_weights_primitives_); + src_mem_->set_data_handle(dummy); + diff_weights_mem_->set_data_handle(dummy); + diff_bias_mem_->set_data_handle(dummy); + diff_dst_mem_->set_data_handle(dummy); + return; +} + +template +void LinearBwdWeights::execute(void* src, void* diff_w, void* diff_dst) +{ + // LOG(INFO) << "linear without bias"; + src_mem_->set_data_handle(src); + diff_weights_mem_->set_data_handle(diff_w); + diff_dst_mem_->set_data_handle(diff_dst); + bwd_weights_stream_->submit(bwd_weights_primitives_); + src_mem_->set_data_handle(dummy); + diff_weights_mem_->set_data_handle(dummy); + diff_dst_mem_->set_data_handle(dummy); + return; +} + +template class LinearBwdWeights; + diff --git a/python/ideep4py/primitives/ops/linear_fwd.cc b/python/ideep4py/primitives/ops/linear_fwd.cc new file mode 100644 index 00000000..220a33ed --- /dev/null +++ b/python/ideep4py/primitives/ops/linear_fwd.cc @@ -0,0 +1,142 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "mkldnn.hpp" +#include "linear_fwd.h" +#include "utils.h" +#include "common.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +LinearFwd::LinearFwd( + mkldnn::memory::dims src_d, mkldnn::memory::dims w_d, + mkldnn::memory::dims b_d, mkldnn::memory::dims dst_d) +{ + fwd_stream_.reset(new stream(stream::kind::eager)); + //create linear primitive + if (linear_fwd_ == NULL) { + setup(src_d, w_d, b_d, dst_d); + } +} + +template +LinearFwd::~LinearFwd() +{ +} + +template +void LinearFwd::setup(mkldnn::memory::dims src_d, mkldnn::memory::dims w_d, + mkldnn::memory::dims b_d, mkldnn::memory::dims dst_d) +{ + //LOG(INFO)<< "Linear forward setup"; + assert(src_d != NULL); + assert(w_d != NULL); + assert(b_d != NULL);//no bias case, expect as NONE_DIMS, not NULL + assert(dst_d != NULL); + src_md_.reset(new memory::desc({src_d}, memory_data_type(), + memory::format::any)); + weights_md_.reset(new memory::desc({w_d}, memory_data_type(), + memory::format::any)); + dst_md_.reset(new memory::desc({dst_d}, memory_data_type(), + memory::format::any)); + //LOG(INFO) << "src_d" << src_d[0]<<","<< src_d[1]; + //LOG(INFO) << "weight" << w_d[0] << "," << w_d[1]; + //LOG(INFO) << "dst_d" << dst_d[0] << "," << dst_d[1]; + //create linear layer descriptor + if(!b_d.empty()) { + bias_md_.reset(new memory::desc({b_d}, memory_data_type(), + memory::format::any)); + fwd_desc_.reset(new inner_product_forward::desc(prop_kind::forward, *src_md_, + *weights_md_, *bias_md_, *dst_md_)); + } else { + fwd_desc_.reset(new inner_product_forward::desc(prop_kind::forward, *src_md_, + *weights_md_, *dst_md_)); + } + //-----------Determing engine to use------------------ + //Current, treat the engine is MKLDNN::CPU + fwd_pd_.reset(new inner_product_forward::primitive_desc(*fwd_desc_, cpu_engine)); + //create user memory primtive + src_fmt_ = static_cast(fwd_pd_.get()->src_primitive_desc().desc().data.format); + weights_fmt_ = static_cast(fwd_pd_.get()->weights_primitive_desc().desc().data.format); + dst_fmt_ = static_cast(fwd_pd_.get()->dst_primitive_desc().desc().data.format); + + //create memory primitive based on dummy data + src_mem_.reset(new memory(fwd_pd_.get()->src_primitive_desc(), dummy)); + weights_mem_.reset(new memory(fwd_pd_.get()->weights_primitive_desc(), dummy)); + dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), dummy)); + + /*create linear primitive and add it to net*/ + if (!b_d.empty()) { + bias_mem_.reset(new memory({{{b_d}, memory_data_type(), memory::format::x}, cpu_engine}, dummy)); + linear_fwd_.reset(new inner_product_forward(*fwd_pd_, *src_mem_, + *weights_mem_, *bias_mem_, *dst_mem_)); + } else { + linear_fwd_.reset(new inner_product_forward(*fwd_pd_, *src_mem_, + *weights_mem_, *dst_mem_)); + } + fwd_primitives_.push_back(*linear_fwd_); + return; +} + +template +void LinearFwd::execute(void* src, void* w, void* b, void* dst) +{ + //LOG(INFO) << "Linear forward"; + src_mem_->set_data_handle(src); + weights_mem_->set_data_handle(w); + bias_mem_->set_data_handle(b); + dst_mem_->set_data_handle(dst); + //linear_fwd_->execute(); + fwd_stream_->submit(fwd_primitives_); + //after exec, set data handle bac + src_mem_->set_data_handle(dummy); + weights_mem_->set_data_handle(dummy); + bias_mem_->set_data_handle(dummy); + dst_mem_->set_data_handle(dummy); + return; +} + +template +void LinearFwd::execute(void* src, void* w, void* dst) +{ + //LOG(INFO) << "Linear forward"; + src_mem_->set_data_handle(src); + weights_mem_->set_data_handle(w); + dst_mem_->set_data_handle(dst); + //linear_fwd_->execute(); + fwd_stream_->submit(fwd_primitives_); + //after exec, set data handle bac + src_mem_->set_data_handle(dummy); + weights_mem_->set_data_handle(dummy); + dst_mem_->set_data_handle(dummy); + return; +} +template class LinearFwd; + + diff --git a/python/ideep4py/primitives/ops/lrn_bwd.cc b/python/ideep4py/primitives/ops/lrn_bwd.cc new file mode 100755 index 00000000..67832bed --- /dev/null +++ b/python/ideep4py/primitives/ops/lrn_bwd.cc @@ -0,0 +1,137 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "common.h" +#include "mkldnn.hpp" +#include "lrn_bwd.h" +#include "utils.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +LocalResponseNormalizationBwd::LocalResponseNormalizationBwd( + mkldnn::memory::dims src_d, + mkldnn::memory::dims diff_dst_d, + mkldnn::memory::dims ws_d, + mkldnn::memory::data_type ws_dt, + int n, double k, double alpha, double beta, + mkldnn::algorithm alg_kind):alg_kind_(mkldnn::algorithm::lrn_across_channels) +{ + bwd_stream_.reset(new stream(stream::kind::eager)); + // setup + if ( bwd_ == NULL){ + setup(src_d, diff_dst_d, ws_d, ws_dt, n, k, alpha, beta, alg_kind_); + } +} + +template +LocalResponseNormalizationBwd::~LocalResponseNormalizationBwd(){} + +template +void LocalResponseNormalizationBwd::setup( + mkldnn::memory::dims src_d, + mkldnn::memory::dims diff_dst_d, + mkldnn::memory::dims ws_d, + mkldnn::memory::data_type ws_dt, + int n, double k, double alpha, double beta, + mkldnn::algorithm alg_kind) +{ + //LOG(INFO) << "lrn backward_setup"; + + //LOG(INFO) << "src_d[0]=" << src_d[0] << "; src_d[1]" << src_d[1] << "; src_d[2]=" << src_d[2] << "; src_d[3]=" << src_d[3]; + // LOG(INFO) << "diff_dst_d[0]=" << diff_dst_d[0] << "; diff_dst_d[1]" << diff_dst_d[1] << "; diff_dst_d[2]=" << diff_dst_d[2] << "; diff_dst_d[3]=" << diff_dst_d[3]; + // LOG(INFO) << "ws_d[0]=" << ws_d[0] << "; ws_d[1]" << ws_d[1] << "; ws_d[2]=" << ws_d[2] << "; ws_d[3]=" << ws_d[3]; + + alg_kind_ = alg_kind; + + // create memory desc + src_md_.reset(new memory::desc({src_d}, memory_data_type(), + get_desired_format(src_d[1]))); + + diff_dst_md_.reset(new memory::desc({diff_dst_d}, memory_data_type(), + get_desired_format(diff_dst_d[1]))); // use diff dst chanel to decide fmt + + //Need a forward hint to create backward, will be removed in future + // create a lrn descriptor + fwd_desc_.reset(new lrn_forward::desc(prop_kind::forward_training, alg_kind_, + *diff_dst_md_, n, alpha, beta, k)); + fwd_pd_.reset(new lrn_forward::primitive_desc( *fwd_desc_, cpu_engine)); + + bwd_desc_.reset(new lrn_backward::desc(alg_kind_, + *src_md_, *diff_dst_md_,n, alpha, beta, k)); + bwd_pd_.reset(new lrn_backward::primitive_desc(*bwd_desc_, cpu_engine, + *fwd_pd_)); + + // store expected primitive format + diff_src_fmt_ = static_cast(bwd_pd_.get()->diff_src_primitive_desc().desc().data.format); + diff_dst_fmt_ = get_desired_format(diff_dst_d[1]); + src_fmt_ = get_desired_format(diff_dst_d[1]); + + // create MKL-DNN internal memory object with dummy data + src_mem_.reset(new memory({{{src_d}, memory_data_type(), src_fmt_}, cpu_engine}, dummy)); + diff_src_mem_.reset(new memory(bwd_pd_.get()->diff_src_primitive_desc(), dummy)); + diff_dst_mem_.reset(new memory({{{diff_dst_d}, memory_data_type(), diff_dst_fmt_}, cpu_engine}, dummy)); + + // store workspace's dims and fmt to create ws tensor + ws_fmt_ = get_desired_format(ws_d[1]); + ws_mem_.reset(new memory({{{ws_d}, ws_dt, ws_fmt_}, cpu_engine}, dummy)); // use ws dims's channel to decide format + + bwd_.reset(new lrn_backward( + *bwd_pd_, *src_mem_, *diff_dst_mem_, *ws_mem_, *diff_src_mem_)); + + bwd_primitives_.push_back(*bwd_); + return; +} + +template +void LocalResponseNormalizationBwd::execute(void*src, void *diff_src, void *diff_dst, void *ws) +{ + //LOG(INFO) << "lrn backward"; + + diff_src_mem_->set_data_handle(diff_src); // + diff_dst_mem_->set_data_handle(diff_dst); // + src_mem_->set_data_handle(src); + + assert(ws!=NULL); + ws_mem_->set_data_handle(ws); // output workspace + + + bwd_stream_->submit(bwd_primitives_); + + // set back data handle + diff_src_mem_->set_data_handle(dummy); + diff_dst_mem_->set_data_handle(dummy); + src_mem_->set_data_handle(dummy); + assert(ws!=NULL); + ws_mem_->set_data_handle(dummy); + + //LOG(INFO) << "lrn backward finish"; + return; +} + +template class LocalResponseNormalizationBwd; diff --git a/python/ideep4py/primitives/ops/lrn_fwd.cc b/python/ideep4py/primitives/ops/lrn_fwd.cc new file mode 100755 index 00000000..cc800358 --- /dev/null +++ b/python/ideep4py/primitives/ops/lrn_fwd.cc @@ -0,0 +1,126 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "common.h" +#include "mkldnn.hpp" +#include "lrn_fwd.h" +#include "utils.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +LocalResponseNormalizationFwd::LocalResponseNormalizationFwd( + mkldnn::memory::dims src_d, mkldnn::memory::format src_fmt, + int n, double k, double alpha, double beta, + mkldnn::algorithm) + :alg_kind_(algorithm::lrn_across_channels) +{ + + fwd_stream_.reset(new stream(stream::kind::eager)); + // setup + if (fwd_ == NULL){ + setup(src_d, src_fmt, n, k, alpha, beta, alg_kind_); + } +} + +template +LocalResponseNormalizationFwd::~LocalResponseNormalizationFwd(){} + +template +void LocalResponseNormalizationFwd::setup( + mkldnn::memory::dims src_d, mkldnn::memory::format src_fmt, + int n, double k, double alpha, double beta, + mkldnn::algorithm alg_kind) +{ + //LOG(INFO) << "lrn forward_setup"; + + //LOG(INFO) << "src_d[0]=" << src_d[0] << "; src_d[1]" << src_d[1] << "; src_d[2]=" << src_d[2] << "; src_d[3]=" << src_d[3]; + alg_kind_ = alg_kind; + // local_size_ = n; + + src_md_.reset(new memory::desc({src_d}, memory_data_type(), + get_desired_format(src_d[1]))); // use src's input channel to decide expected fmt + // src_md_.reset(new memory::desc({src_d}, memory_data_type(), + // src_fmt)); + + //LOG(INFO) << "lrn_fwd_desc_"; + fwd_desc_.reset(new lrn_forward::desc(prop_kind::forward_training, alg_kind_, + *src_md_, n, alpha, beta, k)); + fwd_pd_.reset(new lrn_forward::primitive_desc(*fwd_desc_, cpu_engine)); + + // store expected primitive format + src_fmt_ = get_desired_format(src_d[1]); + // src_fmt_ = src_fmt; + //LOG(INFO) << "src_fmt is " << src_fmt <<" desired src_fmt_ is "<(fwd_pd_.get()->dst_primitive_desc().desc().data.format); + + // create MKL-DNN internal memory object with dummy data + src_mem_.reset(new memory({{{src_d}, memory_data_type(), src_fmt_}, cpu_engine}, dummy)); + dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), dummy)); + + //need to return workspace for backward + auto ws_pd = fwd_pd_.get()->workspace_primitive_desc().desc().data; + // store workspace's dims and fmt to create ws tensor + ws_fmt_ = static_cast(ws_pd.format); + ws_dims_.assign(ws_pd.dims, ws_pd.dims + ws_pd.ndims); + ws_dt_ = static_cast(ws_pd.data_type); + ws_size_ = fwd_pd_.get()->workspace_primitive_desc().get_size(); + ws_mem_.reset(new memory(fwd_pd_.get()->workspace_primitive_desc(), dummy)); + + fwd_.reset(new lrn_forward( + *fwd_pd_, *src_mem_, *ws_mem_, *dst_mem_)); + + fwd_primitives_.push_back(*fwd_); + return; +} + +template +void LocalResponseNormalizationFwd::execute(void *src, void *dst, void *ws) +{ + //LOG(INFO) << "lrn forward"; + + src_mem_->set_data_handle(src); // input + dst_mem_->set_data_handle(dst); // output dst + + assert(ws!=NULL); + ws_mem_->set_data_handle(ws); // output workspace + + fwd_stream_->submit(fwd_primitives_); + + // set back data handle + src_mem_->set_data_handle(dummy); + dst_mem_->set_data_handle(dummy); + + assert(ws!=NULL); + ws_mem_->set_data_handle(dummy); + + //LOG(INFO) << "lrn forward finish"; + return; +} + +template class LocalResponseNormalizationFwd; diff --git a/python/ideep4py/primitives/ops/pooling_bwd.cc b/python/ideep4py/primitives/ops/pooling_bwd.cc new file mode 100644 index 00000000..47c90c66 --- /dev/null +++ b/python/ideep4py/primitives/ops/pooling_bwd.cc @@ -0,0 +1,167 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "common.h" +#include "mkldnn.hpp" +#include "pooling_bwd.h" +#include "utils.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +Pooling2DBwd::Pooling2DBwd(mkldnn::memory::dims diff_src_d, + mkldnn::memory::dims diff_dst_d, + mkldnn::memory::dims ws_d, + mkldnn::memory::data_type ws_dt, + int ker_h, int ker_w, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw, + mkldnn::algorithm alg_kind ) +{ + bwd_stream_.reset(new stream(stream::kind::eager)); + // setup + if ( bwd_ == NULL) + setup(diff_src_d, diff_dst_d, ws_d, ws_dt, ker_h, ker_w, sy, sx, + pad_lh, pad_lw, pad_rh, pad_rw, alg_kind); +} + +template +Pooling2DBwd::~Pooling2DBwd() +{ +} + +template +void Pooling2DBwd::setup(mkldnn::memory::dims diff_src_d, + mkldnn::memory::dims diff_dst_d, + mkldnn::memory::dims ws_d, + mkldnn::memory::data_type ws_dt, + int ker_h, int ker_w, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw, + mkldnn::algorithm alg_kind ) +{ + //LOG(INFO) << "Pooling backward_setup"; + + if (alg_kind != pooling_max && alg_kind != pooling_avg + && alg_kind != pooling_avg_include_padding && alg_kind != pooling_avg_exclude_padding) { + //LOG(ERROR) << "alg_kind must be either pooling_max or " + // << "pooling_avg"; + } + + alg_kind_ = alg_kind; + memory::dims strides = {sy, sx}; + memory::dims padding_l = {pad_lh, pad_lw}; + memory::dims padding_r = {pad_rh, pad_rw}; + memory::dims kernel = {ker_h, ker_w}; + + // create memory desc + diff_src_md_.reset(new memory::desc({diff_src_d}, memory_data_type(), + memory::format::any)); // + // FIXME + // Pooling doesn't expose to get the diff_dst_primitive_desc, so we need to hard set the fmt for diff dst + // a util function is used to do this, may be broken the condition in future + diff_dst_md_.reset(new memory::desc({diff_dst_d}, memory_data_type(), + get_desired_format(diff_dst_d[1]))); // use diff dst chanel to decide fmt + + // create a pooling descriptor + bwd_desc_.reset(new pooling_backward::desc( + alg_kind, + *diff_src_md_, *diff_dst_md_, + strides, kernel, padding_l, padding_r, + padding_kind::zero)); + + //FIXME + //Need a forward hint to create backward, will be removed in future + // create a pooling descriptor + fwd_desc_.reset(new pooling_forward::desc(prop_kind::forward_training, + alg_kind, + *diff_src_md_, *diff_dst_md_, + strides, kernel, padding_l, padding_r, + padding_kind::zero)); + fwd_pd_.reset(new pooling_forward::primitive_desc( *fwd_desc_, cpu_engine)); + + bwd_pd_.reset(new pooling_backward::primitive_desc( + *bwd_desc_, cpu_engine, *fwd_pd_)); + + // store expected primitive format + diff_src_fmt_ = static_cast(bwd_pd_.get()->diff_src_primitive_desc().desc().data.format); + diff_dst_fmt_ = get_desired_format(diff_dst_d[1]); + + // create MKL-DNN internal memory object with dummy data + diff_src_mem_.reset(new memory(bwd_pd_.get()->diff_src_primitive_desc(), dummy)); + diff_dst_mem_.reset(new memory({{{diff_dst_d}, memory_data_type(), diff_dst_fmt_}, cpu_engine}, dummy)); + + // for max pooling, need to return workspace for backward + if (alg_kind == pooling_max) { + //FIXME + //Pooling backward doesn't expose to get the workspace_primitive_desc, we need to hard set here + // store workspace's dims and fmt to create ws tensor + ws_fmt_ = get_desired_format(ws_d[1]); + ws_mem_.reset(new memory({{{ws_d}, ws_dt, ws_fmt_}, cpu_engine}, dummy)); // use ws dims's channel to decide format + + bwd_.reset(new pooling_backward( + *bwd_pd_, *diff_dst_mem_, *ws_mem_, *diff_src_mem_)); + } else { + bwd_.reset(new pooling_backward( + *bwd_pd_, *diff_dst_mem_, *diff_src_mem_)); + } + + bwd_primitives_.push_back(*bwd_); + return; +} + +template +void Pooling2DBwd::execute(void *diff_src, void *diff_dst, void *ws) +{ + //LOG(INFO) << "Pooling backward"; + + diff_src_mem_->set_data_handle(diff_src); // input + diff_dst_mem_->set_data_handle(diff_dst); // output dst + if ( alg_kind_ == pooling_max ) { // max pooling must have ws + assert(ws!=NULL); + ws_mem_->set_data_handle(ws); // output workspace + } + + bwd_stream_->submit(bwd_primitives_); + + // set back data handle + diff_src_mem_->set_data_handle(dummy); + diff_dst_mem_->set_data_handle(dummy); + if ( alg_kind_ == pooling_max ) { // max pooling must have ws + assert(ws!=NULL); + ws_mem_->set_data_handle(dummy); + } + + //LOG(INFO) << "Pooling backward finish"; + return; +} + +template class Pooling2DBwd; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/primitives/ops/pooling_fwd.cc b/python/ideep4py/primitives/ops/pooling_fwd.cc new file mode 100644 index 00000000..00c42932 --- /dev/null +++ b/python/ideep4py/primitives/ops/pooling_fwd.cc @@ -0,0 +1,156 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "common.h" +#include "mkldnn.hpp" +#include "pooling_fwd.h" +#include "utils.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +Pooling2DFwd::Pooling2DFwd(mkldnn::memory::dims src_d, + mkldnn::memory::dims dst_d, + int ker_h, int ker_w, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw, + mkldnn::algorithm alg_kind ) +{ + fwd_stream_.reset(new stream(stream::kind::eager)); + // setup + if ( fwd_ == NULL) + setup(src_d, dst_d, ker_h, ker_w, sy, sx, + pad_lh, pad_lw, pad_rh, pad_rw, alg_kind); +} + +template +Pooling2DFwd::~Pooling2DFwd() +{ +} + +template +void Pooling2DFwd::setup(mkldnn::memory::dims src_d, + mkldnn::memory::dims dst_d, + int ker_h, int ker_w, + int sy, int sx, + int pad_lh, int pad_lw, int pad_rh, int pad_rw, + mkldnn::algorithm alg_kind ) +{ + //LOG(INFO) << "Pooling forward_setup"; + + if (alg_kind != pooling_max && alg_kind != pooling_avg + && alg_kind != pooling_avg_include_padding && alg_kind != pooling_avg_exclude_padding) { + //LOG(ERROR) << "alg_kind must be either pooling_max or " + //<< "pooling_avg"; + } + + alg_kind_ = alg_kind; + memory::dims strides = {sy, sx}; + memory::dims padding_l = {pad_lh, pad_lw}; + memory::dims padding_r = {pad_rh, pad_rw}; + memory::dims kernel = {ker_h, ker_w}; + + // create memory desc + // FIXME + // Pooling doesn't expose to get the src_primitive_desc, so we need to hard set the fmt for src + // a util function is used to do this, may be broken the condition in future + src_md_.reset(new memory::desc({src_d}, memory_data_type(), + get_desired_format(src_d[1]))); // use src's input channel to decide expected fmt + dst_md_.reset(new memory::desc({dst_d}, memory_data_type(), + memory::format::any)); + + // create a pooling descriptor + fwd_desc_.reset(new pooling_forward::desc(prop_kind::forward_training, + alg_kind, + *src_md_, *dst_md_, + strides, kernel, padding_l, padding_r, + padding_kind::zero)); + + fwd_pd_.reset(new pooling_forward::primitive_desc( + *fwd_desc_, cpu_engine)); + + // store expected primitive format + src_fmt_ = get_desired_format(src_d[1]); + dst_fmt_ = static_cast(fwd_pd_.get()->dst_primitive_desc().desc().data.format); + + // create MKL-DNN internal memory object with dummy data + src_mem_.reset(new memory({{{src_d}, memory_data_type(), src_fmt_}, cpu_engine}, dummy)); + dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), dummy)); + + // for max pooling, need to return workspace for backward + if (alg_kind == pooling_max) { + auto ws_pd = fwd_pd_.get()->workspace_primitive_desc().desc().data; + + // store workspace's dims and fmt to create ws tensor + ws_fmt_ = static_cast(ws_pd.format); + ws_dims_.assign(ws_pd.dims, ws_pd.dims+ws_pd.ndims); + ws_dt_ = static_cast(ws_pd.data_type); + ws_size_ = fwd_pd_.get()->workspace_primitive_desc().get_size(); + + ws_mem_.reset(new memory(fwd_pd_.get()->workspace_primitive_desc(), dummy)); + fwd_.reset(new pooling_forward( + *fwd_pd_, *src_mem_, *dst_mem_, *ws_mem_)); + } else { + fwd_.reset(new pooling_forward( + *fwd_pd_, *src_mem_, *dst_mem_)); + } + + fwd_primitives_.push_back(*fwd_); + return; +} + +template +void Pooling2DFwd::execute(void *src, void *dst, void *ws) +{ + //LOG(INFO) << "Pooling forward"; + + src_mem_->set_data_handle(src); // input + dst_mem_->set_data_handle(dst); // output dst + if ( alg_kind_ == pooling_max ) { // max pooling must have ws + assert(ws!=NULL); + ws_mem_->set_data_handle(ws); // output workspace + } + + fwd_stream_->submit(fwd_primitives_); + + // set back data handle + src_mem_->set_data_handle(dummy); + dst_mem_->set_data_handle(dummy); + if ( alg_kind_ == pooling_max ) { // max pooling must have ws + assert(ws!=NULL); + ws_mem_->set_data_handle(dummy); + } + + //LOG(INFO) << "Pooling forward finish"; + return; +} + +template class Pooling2DFwd; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/primitives/ops/reorder_op.cc b/python/ideep4py/primitives/ops/reorder_op.cc new file mode 100644 index 00000000..f29a4bdd --- /dev/null +++ b/python/ideep4py/primitives/ops/reorder_op.cc @@ -0,0 +1,88 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "mkldnn.hpp" +#include "reorder_op.h" +#include "utils.h" +#include "common.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +ReorderOp::ReorderOp( mkldnn::memory::dims dims, mkldnn::memory::format src_fmt, mkldnn::memory::format dst_fmt) +{ + reorder_stream_.reset(new stream(stream::kind::eager)); + // create primitive + if (reorder_prim_ == NULL) { + setup(dims, src_fmt, dst_fmt); + } +} + +template +ReorderOp::~ReorderOp() +{ +} + +template +void ReorderOp::setup(mkldnn::memory::dims dims, + mkldnn::memory::format src_fmt, + mkldnn::memory::format dst_fmt) +{ + //LOG(INFO) << "Reorder setup"; + + assert(src_fmt != dst_mfmt); + + src_md_.reset(new memory::desc(dims, memory_data_type(), src_fmt)); + dst_md_.reset(new memory::desc(dims, memory_data_type(), dst_fmt)); + + src_mem_.reset(new memory({*src_md_, cpu_engine},dummy)); + dst_mem_.reset(new memory({*dst_md_, cpu_engine},dummy)); + + reorder_prim_ = std::make_shared(reorder(*src_mem_, *dst_mem_)); + + return; +} + +template +void ReorderOp::execute(void* src, void* dst) +{ + //LOG(INFO) << "Reorder execute"; + src_mem_->set_data_handle(src); + dst_mem_->set_data_handle(dst); + reorder_stream_->submit({*reorder_prim_}); + + //after exec, set data handle back + src_mem_->set_data_handle(dummy); + dst_mem_->set_data_handle(dummy); + return; +} + +template class ReorderOp; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/primitives/pooling.cc b/python/ideep4py/primitives/pooling.cc new file mode 100644 index 00000000..c267fa26 --- /dev/null +++ b/python/ideep4py/primitives/pooling.cc @@ -0,0 +1,216 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#include +#include "common.h" +#include "mkldnn.hpp" +#include "tensor.h" +#include "mem.h" +#include "pooling.h" +#include "utils.h" +#include "pooling_fwd.h" +#include "pooling_bwd.h" +#include "prim_factory.h" +#include "reorder_op.h" + +using namespace mkldnn; + +extern engine cpu_engine; + +template +Pooling2D::Pooling2D() +{ +} + +template +Pooling2D::~Pooling2D() +{ +} + +template +std::vector Pooling2D::Forward( + Tensor *src, + pooling_param_t *pp) +{ + std::vector outputs; + + // sanity check + mkldnn::memory::dims src_dims = (mkldnn::memory::dims)(src->dims()); + mkldnn::memory::dims dst_dims = (mkldnn::memory::dims)(pp->out_dims); + assert(src_dims == src->cxx_dims()); + + //sanity check for data type + //assuem all should have same data type as T + //FIXME + //yli135: Is it possible x and w have different data type???? + assert(memory_data_type() == src->cxx_data_type()); + + // get a conv2d fwd from primitive pool + Pooling2DFwd *pooling2d_forward = NULL; + pooling2d_forward = Pooling2DFwdFactory::get(src_dims, dst_dims, + pp->kh, pp->kw, + pp->sy, pp->sx, + pp->pad_lh, pp->pad_lw, pp->pad_rh, pp->pad_rw, + pooling_algo_convert(pp->algo_kind)); + + mkldnn::memory::format src_fmt = src->cxx_format(); // src fmt in tensor + + void *src_tmp = src->data(); + shared_ptr src_reorder; + + // check wehther fmt is same + if (src_fmt == pooling2d_forward->src_fmt_) { + //LOG(INFO) << "pooling forward fmt matched"; + } else { + //LOG(INFO) << "pooling fwd fmt not match, need to reorder"; + + if (src_fmt != pooling2d_forward->src_fmt_) { + //LOG(INFO) << "src_fmt=" << src_fmt <<", pooling2d_forward->src_fmt_=" << pooling2d_forward->src_fmt_; + // FIXME: when to free the reordered memory + ReorderOp* reorder_src_op = ReorderFactory::get(src_dims, src_fmt, pooling2d_forward->src_fmt_); + src_reorder = Allocator::malloc(src->len(), MPOOL_REORDER); + //src_reorder = new avx::byte[src->len()]; + reorder_src_op->execute(src_tmp, src_reorder.get()); + src_tmp = src_reorder.get(); + } + } + + // create tensor based on primitive's dst + // assume dst and src have same data type + // Tensor *dst_tensor = new Tensor(dst_dims, src->cxx_data_type(), pooling2d_forward->dst_fmt_, cpu_engine); + auto data = Allocator::malloc(dst_dims, type2size(src->type()), MPOOL_POOLING_FWD); + Tensor *dst_tensor = new Tensor(dst_dims.size(), dst_dims, data, + (mkldnn_memory_format_t)pooling2d_forward->dst_fmt_, + src->type()); + + // do forward + // for max pooling, need to return workspace + if (pp->algo_kind == pooling_param_t::algorithm::pooling_max) { + //LOG(INFO) << "ws_dt_=" << pooling2d_forward->ws_dt_; + // workspace must be int tensor + //Tensor *ws_tensor = new Tensor((pooling2d_forward->ws_dims_), pooling2d_forward->ws_dt_, pooling2d_forward->ws_fmt_, cpu_engine); + auto ws_data = Allocator::malloc(pooling2d_forward->ws_size_, MPOOL_POOLING_FWD); + Tensor *ws_tensor = new Tensor(pooling2d_forward->ws_dims_, + static_cast(pooling2d_forward->ws_dt_), + pooling2d_forward->ws_fmt_, ws_data); + + pooling2d_forward->execute(src_tmp, dst_tensor->data(), ws_tensor->data()); + outputs.push_back(dst_tensor); + outputs.push_back(ws_tensor); + } else { + pooling2d_forward->execute(src_tmp, dst_tensor->data()); + outputs.push_back(dst_tensor); + } + + //LOG(INFO) << "Succ exec pooling forward"; + return outputs; +} + +template +Tensor *Pooling2D::Backward( + Tensor *diff_dst, + Tensor *ws, + pooling_param_t *pp) +{ + //sanity check + mkldnn::memory::dims diff_src_dims = (mkldnn::memory::dims)pp->out_dims; + mkldnn::memory::dims diff_dst_dims = (mkldnn::memory::dims)diff_dst->dims(); + assert(diff_dst_dims == diff_dst->cxx_dims()); + + mkldnn::memory::dims ws_dims; + mkldnn::memory::data_type ws_dt; + if (pp->algo_kind == pooling_param_t::algorithm::pooling_max) { + ws_dims = ws->cxx_dims(); + ws_dt = ws->cxx_data_type(); + } + // sanity check for data type + // assuem all x/w/b should have same data type as T + // FIXME + // yli135: Is it possible x and w have different data type???? + assert(memory_data_type() == diff_dst->cxx_data_type()); + + // get a conv2d bwd data from primitive pool + Pooling2DBwd *pooling2d_bwd = NULL; + if (pp->algo_kind == pooling_param_t::algorithm::pooling_max) { + pooling2d_bwd = Pooling2DBwdFactory::get( diff_src_dims, diff_dst_dims, ws_dims, ws_dt, + pp->kh, pp->kw, pp->sy, pp->sx, + pp->pad_lh, pp->pad_lw, pp->pad_rh, pp->pad_rw, + pooling_algo_convert(pp->algo_kind)); + } else { + pooling2d_bwd = Pooling2DBwdFactory::get( diff_src_dims, diff_dst_dims, NONE_DIMS, mkldnn::memory::data_type::data_undef, + pp->kh, pp->kw, pp->sy, pp->sx, + pp->pad_lh, pp->pad_lw, pp->pad_rh, pp->pad_rw, + pooling_algo_convert(pp->algo_kind)); + } + + // FIXME: in this model, every call to conv_forward will create a new tensor, when to free??? + mkldnn::memory::format ws_fmt; + void* ws_tmp = nullptr; + shared_ptr ws_reorder; + if (pp->algo_kind == pooling_param_t::algorithm::pooling_max) { + ws_fmt = ws->cxx_format(); + ws_tmp = ws->data(); + } + + mkldnn::memory::format diff_dst_fmt = diff_dst->cxx_format(); + void* diff_dst_tmp = diff_dst->data(); + shared_ptr diff_dst_reorder; + + if ( pp->algo_kind == pooling_param_t::algorithm::pooling_max && + ws_fmt != pooling2d_bwd->ws_fmt_) { + LOG(INFO) << "ws_fmt=" << ws_fmt << ", pooling2d_bwd->ws_fmt_="<< pooling2d_bwd->ws_fmt_; + ReorderOp* reorder_ws_op = ReorderFactory::get(ws_dims, ws_fmt, pooling2d_bwd->ws_fmt_); + ws_reorder = Allocator::malloc(ws->len(), MPOOL_REORDER); + //ws_reorder = new avx::byte[ws->len()]; + reorder_ws_op->execute(ws_tmp, ws_reorder.get()); + ws_tmp = ws_reorder.get(); + } + if (diff_dst_fmt != pooling2d_bwd->diff_dst_fmt_) { + LOG(INFO) << "diff_dst_fmt=" << diff_dst_fmt <<", pooling2d_bwd->diff_dst_fmt_=" << pooling2d_bwd->diff_dst_fmt_; + ReorderOp* reorder_diff_dst_op = ReorderFactory::get(diff_dst_dims, diff_dst_fmt, pooling2d_bwd->diff_dst_fmt_); + diff_dst_reorder = Allocator::malloc(diff_dst->len(), MPOOL_REORDER); + //diff_dst_reorder = new avx::byte[diff_dst->len()]; + reorder_diff_dst_op->execute(diff_dst_tmp, diff_dst_reorder.get()); + diff_dst_tmp = diff_dst_reorder.get(); + } + + // create tensor based on selected primitive + // assume dst and src have same data type + // Tensor *diff_src_tensor = new Tensor(diff_src_dims, diff_dst->cxx_data_type(), pooling2d_bwd->diff_src_fmt_, cpu_engine); + auto data = Allocator::malloc(diff_src_dims, type2size(diff_dst->type()), MPOOL_POOLING_BWD); + Tensor *diff_src_tensor = new Tensor(diff_src_dims.size(), diff_src_dims, data, + (mkldnn_memory_format_t)pooling2d_bwd->diff_src_fmt_, + diff_dst->type()); + + pooling2d_bwd->execute(diff_src_tensor->data(), diff_dst_tmp, ws_tmp); + + return diff_src_tensor; +} + + +template class Pooling2D; + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/py/dlcp/dlcp.i b/python/ideep4py/py/dlcp/dlcp.i new file mode 100644 index 00000000..be749d0a --- /dev/null +++ b/python/ideep4py/py/dlcp/dlcp.i @@ -0,0 +1,6 @@ +%{ + #define SWIG_FILE_WITH_INIT + #include "dlcp_py.h" +%} + +%include "dlcp_py.h" diff --git a/python/ideep4py/py/dlcp/dlcp_py.cc b/python/ideep4py/py/dlcp/dlcp_py.cc new file mode 100644 index 00000000..67c80bb8 --- /dev/null +++ b/python/ideep4py/py/dlcp/dlcp_py.cc @@ -0,0 +1,30 @@ +/* + *COPYRIGHT + *All modification made by Intel Corporation: © 2017 Intel Corporation. + *Copyright (c) 2015 Preferred Infrastructure, Inc. + *Copyright (c) 2015 Preferred Networks, Inc. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include "dlcp_py.h" + +bool dlCompression::available = false; diff --git a/python/ideep4py/py/dlcp/dlcp_py.h b/python/ideep4py/py/dlcp/dlcp_py.h new file mode 100644 index 00000000..b59a0ed6 --- /dev/null +++ b/python/ideep4py/py/dlcp/dlcp_py.h @@ -0,0 +1,121 @@ +/* + *COPYRIGHT + *All modification made by Intel Corporation: © 2017 Intel Corporation. + *Copyright (c) 2015 Preferred Infrastructure, Inc. + *Copyright (c) 2015 Preferred Networks, Inc. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _DLCP_PY_H_ +#define _DLCP_PY_H_ + +#include "dl_compression.h" +#include "mdarray.h" +#include "tensor.h" + +class dlCompression { +public: + enum { + dl_comp_none = DL_COMP_NONE, + dl_comp_dfp = DL_COMP_DFP, + }; + + enum { + dl_comp_ok = DL_COMP_OK, + dl_comp_fail = DL_COMP_FAIL, + dl_comp_fail_src_data_type_not_supported = + DL_COMP_FAIL_SRC_DATA_TYPE_NOT_SUPPORTED, + dl_comp_fail_ratio_not_supported = + DL_COMP_FAIL_RATIO_NOT_SUPPORTED, + dl_comp_fail_comp_method_not_supported = + DL_COMP_FAIL_COMP_METHOD_NOT_SUPPORTED, + dl_comp_fail_invalid_compressed_format = + DL_COMP_FAIL_INVALID_COMPRESSED_FORMAT, + dl_comp_fail_not_supported = + DL_COMP_FAIL_NOT_SUPPORTED, + }; + + static bool available; + + static void init() { + available = dl_comp_check_running_environ(); + } + + static bool is_available() { + return available; + } + + static int Compress(mdarray *src, mdarray *dst, + mdarray *diff, size_t ratio, int method) { + if (!is_available()) + return DL_COMP_FAIL_NOT_SUPPORTED; + + if (src->get()->tensor()->size() != + dst->get()->tensor()->size()) + return DL_COMP_FAIL; + + if (src->get()->tensor()->type() != + dst->get()->tensor()->type()) + return DL_COMP_FAIL; + + int dtype = -1; + switch (src->get()->tensor()->type()) { + case SINT8: + dtype = DL_COMP_INT8; + break; + + case FLOAT32: + dtype = DL_COMP_FLOAT32; + break; + + default: + break; + } + + if (-1 == dtype) + return DL_COMP_FAIL_SRC_DATA_TYPE_NOT_SUPPORTED; + + return dl_comp_compress_buffer(src->get()->tensor()->data(), + dst->get()->tensor()->data(), src->get()->tensor()->size(), + diff ? diff->get()->tensor()->data() : nullptr, + (dl_comp_data_type_t)dtype, ratio, (dl_comp_method_t)method); + } + + static int Decompress(mdarray *src, mdarray *dst) { + if (!is_available()) + return DL_COMP_FAIL_NOT_SUPPORTED; + + if (src->get()->tensor()->size() != + dst->get()->tensor()->size()) + return DL_COMP_FAIL; + + if (src->get()->tensor()->type() != + dst->get()->tensor()->type()) + return DL_COMP_FAIL; + + return dl_comp_decompress_buffer(src->get()->tensor()->data(), + dst->get()->tensor()->data(), + src->get()->tensor()->size()); + } +}; + +#endif diff --git a/python/ideep4py/py/ideep4py.i b/python/ideep4py/py/ideep4py.i new file mode 100644 index 00000000..f9d350f8 --- /dev/null +++ b/python/ideep4py/py/ideep4py.i @@ -0,0 +1,41 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%module ideep4py + +%init %{ + import_array(); + implementation::g_init(); +%} + +%include "mdarray.i" +%include "eltwise.i" +%include "conv.i" +%include "pooling.i" +%include "linear.i" +%include "bn.i" +%include "concat.i" +%include "lrn.i" +%include "dropout.i" +%include "dlcp.i" diff --git a/python/ideep4py/py/mm/basic.cc b/python/ideep4py/py/mm/basic.cc new file mode 100644 index 00000000..e3df0062 --- /dev/null +++ b/python/ideep4py/py/mm/basic.cc @@ -0,0 +1,71 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include "basic.h" +#include "tensor.h" + +PyObject *basic::copyto(mdarray *dst, mdarray *src) +{ + Tensor *tdst = dst->get()->tensor(); + Tensor *tsrc = src->get()->tensor(); + if (tdst->copyto(tsrc) == true) + Py_RETURN_NONE; + return nullptr; +} + +PyObject *basic::copyto(mdarray *dst, Py_buffer *src_view) +{ + // Validate it in ideepy code + Tensor *tdst = dst->get()->tensor(); + if (tdst->len() != (size_t)src_view->len) { + return nullptr; + } + tdst->copyto((char *)src_view->buf); + Py_RETURN_NONE; +} + +mdarray basic::acc_sum(vector arrays) +{ + vector> srcs_memory; + vector srcs_pd; + vector inputs; + vector scales; + for (vector::iterator it = arrays.begin(); + it != arrays.end(); it++) { + Tensor *tensor = (*it)->get()->tensor(); + scales.push_back(1.0); + srcs_pd.push_back(tensor->mkldnn_memory().get_primitive_desc()); + inputs.push_back(tensor->mkldnn_memory()); + } + auto sum_pd = sum::primitive_desc(scales, srcs_pd); + auto dst_pd = sum_pd.dst_primitive_desc(); + Tensor *dst_tensor = new Tensor(dst_pd); + auto sum_p = sum(sum_pd, inputs, dst_tensor->mkldnn_memory()); + + mkldnn::stream s(mkldnn::stream::eager); + s.submit({sum_p}).wait(); + + mdarray dst_mdarray = mdarray(dst_tensor); + return dst_mdarray; +} diff --git a/python/ideep4py/py/mm/basic.h b/python/ideep4py/py/mm/basic.h new file mode 100644 index 00000000..a6484963 --- /dev/null +++ b/python/ideep4py/py/mm/basic.h @@ -0,0 +1,36 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#pragma once +#define NO_IMPORT_ARRAY +#define PY_ARRAY_UNIQUE_SYMBOL basic_ARRAY_API +#include +#include "mdarray.h" + +class basic { +public: + static PyObject *copyto(mdarray *dst, mdarray *src); + static PyObject *copyto(mdarray *dst, Py_buffer *view); + static mdarray acc_sum(vector arrays); +}; diff --git a/python/ideep4py/py/mm/basic.i b/python/ideep4py/py/mm/basic.i new file mode 100644 index 00000000..591c18b9 --- /dev/null +++ b/python/ideep4py/py/mm/basic.i @@ -0,0 +1,67 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%{ + #include "basic.h" +%} + +%typemap(in) (vector arrays) { + int i; + int argc; + vector varr; + if (!PyTuple_Check($input)) { + PyErr_SetString(PyExc_ValueError,"Expected a tuple"); + return nullptr; + } + argc = PyTuple_Size($input); + for (i = 0; i < argc; i++) { + PyObject *obj = PyTuple_GET_ITEM($input, i); + if (!implementation::mdarray::is_mdarray(obj)) { + PyErr_SetString(PyExc_ValueError,"Expected a mdarray in acc_sum"); + return nullptr; + } +#if 0 + if (!PyArray_Check(obj)) { + PyErr_SetString(PyExc_ValueError,"Expected a array"); + return nullptr; + } +#endif + void *that; + int res1 = SWIG_ConvertPtr(obj, &that, nullptr, 0); + if (!SWIG_IsOK(res1)) { + PyErr_SetString(PyExc_ValueError, "Can't convert mdarray pyobject"); + return nullptr; + } + varr.push_back((mdarray *)that); + } + $1 = varr; +} + +class basic { +public: + static PyObject *copyto(mdarray *dst, mdarray *src); + static PyObject *copyto(mdarray *dst, Py_buffer *view); + static mdarray acc_sum(vector arrays); +}; + diff --git a/python/ideep4py/py/mm/mdarray.cc b/python/ideep4py/py/mm/mdarray.cc new file mode 100755 index 00000000..5d43934a --- /dev/null +++ b/python/ideep4py/py/mm/mdarray.cc @@ -0,0 +1,916 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#include +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include +#if defined(OPENMP_AFFINITY) +#include "cpu_info.h" +#endif +#include "mdarray.h" +#include +#include "mkldnn_ex.h" +#include "dlcp_py.h" + +namespace implementation { + +static PyObject *PyType_reorder_buffer = nullptr; + +static swig_type_info *SwigTy_mdarray = nullptr; +//static swig_type_info *SwigTy_engine = nullptr; +static PyObject *PyType_mdarray = nullptr; + +// get mdarray from PyObject +static inline mdarray *get_mdarray_from_PyObject(PyObject *self) { + void *oprd_self; + int res = SWIG_ConvertPtr(self, &oprd_self, nullptr, 0); + if (!SWIG_IsOK(res)) { + // PyErr_SetString(PyExc_ValueError, "Error self PyObject"); + return NULL; + } + return (reinterpret_cast(oprd_self))->get(); +} + +//check whether mdarray support this operation +static inline bool is_mdarray_supported(PyObject *self, PyObject *o) { + // get self mdarray + mdarray *self_mdarray = get_mdarray_from_PyObject(self); + if (!self_mdarray) + return false; + + // o is ndarray + // if size not equal, mean array broadcast + if (reinterpret_cast(o->ob_type) == &PyArray_Type) { + if ((size_t)PyArray_SIZE(reinterpret_cast(o)) + != self_mdarray->size() || + !PyArray_ISFLOAT(reinterpret_cast(o))) { + return false; + } + return true; + } + + // o is mdarray + if (reinterpret_cast(o->ob_type) + == reinterpret_cast(PyType_mdarray)) { + // if o is mdarray, try to get mdarray + mdarray *o_mdarray = get_mdarray_from_PyObject(o); + if (!o_mdarray) + return false; + + // not support different size's mdarray's operations + if (o_mdarray->size() != self_mdarray->size()) + return false; + + return true; + } + + return false; +} + +PyObject *queryPyTypeObject(const char *name) { + swig_type_info *info = SWIG_TypeQuery(name); + if (info != nullptr) { + SwigPyClientData *cd + = (SwigPyClientData *)info->clientdata; + return reinterpret_cast(cd->pytype); + } + + throw mkldnn::error(mkldnn_invalid_arguments + , "Failed to find reorderer object"); +} + +// We brought this to global scope to mitigate it consumption +#if PY_VERSION_HEX >= 0x03000000 +int g_init() { +#else +void g_init() { +#endif + PyType_reorder_buffer = queryPyTypeObject("_p_reorder_buffer"); + SwigTy_mdarray = SWIG_TypeQuery("_p_mdarray"); + PyType_mdarray = queryPyTypeObject("_p_mdarray"); + //SwigTy_engine = SWIG_TypeQuery("_p_mkldnn__engine"); + +#if PY_VERSION_HEX < 0x03000000 + if ((reinterpret_cast(PyType_mdarray)->tp_flags + & Py_TPFLAGS_HAVE_NEWBUFFER) != Py_TPFLAGS_HAVE_NEWBUFFER) + throw mkldnn::error(mkldnn_invalid_arguments + , "Python2 should have new buffer flag on!"); +#endif + + // XXX: I don't quite understand it, and its repercussions :) + SwigPyObject_stype = SWIG_MangledTypeQuery("_p_SwigPyObject"); + + if (SwigPyObject_stype == nullptr) + throw mkldnn::error(mkldnn_invalid_arguments + , "Failed to find SwigPyObject object"); + + // Initiate static variables imported from numpy include + import_array(); + +#if defined(OPENMP_AFFINITY) + google::SetStderrLogging(1); + google::InitGoogleLogging("mkldnn"); + OpenMpManager::bindOpenMpThreads(); + OpenMpManager::printVerboseInformation(); +#endif + + dlCompression::init(); + +#if PY_VERSION_HEX >= 0x03000000 + return 0; +#else + return; +#endif +} + +//FIXME: macro SWIG_as_voidptr is copied from mdarray_wrap.cpp +#define SWIG_as_voidptr(a) const_cast< void * >(static_cast< const void * >(a)) + +// Pickle +PyObject *mdarray::__getstate__() const { + auto md = desc(); + void *raw_data = data(); + int ndims = md.data.ndims; + mkldnn::memory::dims dims; + mkldnn::memory::data_type dtype = static_cast(md.data.data_type); + mkldnn::memory::format format = static_cast(md.data.format); + static mkldnn::engine engine = get_engine(); + + PyObject *py_dims = PyTuple_New(ndims); + for (int i = 0; i < ndims; i++) { + PyObject *py_dim = PyLong_FromLong(md.data.dims[i]); + PyTuple_SetItem(py_dims, i, py_dim); + } + + PyObject *py_dtype = PyLong_FromLong((long)dtype); + PyObject *py_format = PyLong_FromLong((long)format); + PyObject *py_engine = PyLong_FromVoidPtr((void *)&engine); + PyObject *py_rdata = PyLong_FromVoidPtr((void *)raw_data); + + PyObject *state = PyTuple_New(5); + PyTuple_SetItem(state, 0, py_dims); + PyTuple_SetItem(state, 1, py_dtype); + PyTuple_SetItem(state, 2, py_format); + PyTuple_SetItem(state, 3, py_engine); + PyTuple_SetItem(state, 4, py_rdata); + + return state; +} + +// Unpickle. +void mdarray::__setstate__(PyObject *state) { + return; +} + +PyObject *mdarray::py_mdarray_from(PyObject *o) const { + PyObject *argList = Py_BuildValue("(O)", o); + + if (argList == nullptr) { + PyErr_SetString(PyExc_SystemError, "Can not create argument list"); + return nullptr; + } + + o = PyObject_CallObject(PyType_mdarray, argList); + + Py_DECREF(argList); + + if (o == nullptr) { + PyErr_SetString(PyExc_BufferError, "Cannot create mdarray from input"); + return nullptr; + } + + return o; +} + +template +void mdarray::axpby(mdarray *dst, T a, mdarray *x, T b, mdarray *y) { + ::axpby(dst->tensor(), a, x->tensor(), b, y->tensor()); +} + +template +PyObject *mdarray::axpby(T a, T b, PyObject *o) { + /// Resource manager, for GCC do not accept lambda + struct py_decref { + void operator () (PyObject *p) { + Py_DECREF(p); + } + }; + + std::unique_ptr op(nullptr); + + /// Create mdarray from buffer provider + if (reinterpret_cast(o->ob_type) == &PyArray_Type) { + o = py_mdarray_from(o); + op.reset(o); + } + + void *oprd2; + int res = SWIG_ConvertPtr(o, &oprd2, nullptr, 0); + + if (!SWIG_IsOK(res)) { + PyErr_SetString(PyExc_ValueError, "Wrong operand object in add wrapper"); + return nullptr; + } + + auto x = (reinterpret_cast(oprd2))->get(); + py_handle *output = new py_handle(new mdarray(x->mkldnn_memory().get_primitive_desc())); + + /// Switch position for format consistency + axpby(output->get(), b, x, a, this); + + PyObject *resultobj = SWIG_Python_NewPointerObj(nullptr + , SWIG_as_voidptr(output), SwigTy_mdarray, SWIG_POINTER_OWN | 0 ); + + return resultobj; +} + +template +PyObject *mdarray::inplace_axpby(T a, PyObject *self, T b, PyObject *o) { + // Resource manager, for GCC do not accept lambda + struct py_decref { + void operator () (PyObject *p) { + Py_DECREF(p); + } + }; + + std::unique_ptr op(nullptr); + + // Create mdarray from buffer provider + if (reinterpret_cast(o->ob_type) == &PyArray_Type) { + o = py_mdarray_from(o); + op.reset(o); + } + + void *oprd2; + int res = SWIG_ConvertPtr(o, &oprd2, nullptr, 0); + + if (!SWIG_IsOK(res)) { + PyErr_SetString(PyExc_ValueError, "Wrong operand object in add wrapper"); + return nullptr; + } + + auto y = (reinterpret_cast(oprd2))->get(); + axpby(this, a, this, b, y); + Py_INCREF(self); + + return self; +} + +PyObject *mdarray::m_Add(PyObject *self, PyObject *o) { + // Array Broadcast + if (!is_mdarray_supported(self, o)) { + return m_Add_map_impl(self, o); + } else if (PyArray_Check(o) && + !PyArray_IS_C_CONTIGUOUS(reinterpret_cast(o))) { + // Make compatibility with Non-C-Contiguous array. + PyObject *_o = o; +#if PY_VERSION_HEX < 0x03000000 + _o = reinterpret_cast(PyArray_ContiguousFromAny( + o, PyArray_ISFLOAT(reinterpret_cast(o)) ? NPY_FLOAT : NPY_INT, 0, 0)); +#endif + PyObject *ret = m_Add_map_impl(self, _o); +#if PY_VERSION_HEX < 0x03000000 + Py_DECREF(_o); +#endif + return ret; + } else { + return axpby(1.0f, 1.0f, o); + } +} + +PyObject *mdarray::m_Subtract(PyObject *self, PyObject *o) { + // Array Broadcast + if (!is_mdarray_supported(self, o)) { + return m_Subtract_map_impl(self, o); + } else if (PyArray_Check(o) && + !PyArray_IS_C_CONTIGUOUS(reinterpret_cast(o))) { + PyObject *_o = o; +#if PY_VERSION_HEX < 0x03000000 + _o = reinterpret_cast(PyArray_ContiguousFromAny( + o, PyArray_ISFLOAT(reinterpret_cast(o)) ? NPY_FLOAT : NPY_INT, 0, 0)); +#endif + PyObject *ret = m_Subtract_map_impl(self, _o); +#if PY_VERSION_HEX < 0x03000000 + Py_DECREF(_o); +#endif + return ret; + } else { + return axpby(1.0f, -1.0f, o); + } +} + +PyObject *mdarray::m_InPlaceAdd(PyObject *self, PyObject *o) { + // Array Broadcast + if (!is_mdarray_supported(self, o)) { + return m_InPlaceAdd_map_impl(self, o); + } else if (PyArray_Check(o) && + !PyArray_IS_C_CONTIGUOUS(reinterpret_cast(o))) { + PyObject *_o = o; +#if PY_VERSION_HEX < 0x03000000 + _o = reinterpret_cast(PyArray_ContiguousFromAny( + o, PyArray_ISFLOAT(reinterpret_cast(o)) ? NPY_FLOAT : NPY_INT, 0, 0)); +#endif + PyObject *ret = m_InPlaceAdd_map_impl(self, _o); +#if PY_VERSION_HEX < 0x03000000 + Py_DECREF(_o); +#endif + return ret; + } else { + return inplace_axpby(1.0f, self, 1.0f, o); + } +} + +PyObject *mdarray::m_InPlaceSubtract(PyObject *self, PyObject *o) { + // Array Broadcast + if (!is_mdarray_supported(self, o)) { + return m_InPlaceSubtract_map_impl(self, o); + } else if (PyArray_Check(o) && + !PyArray_IS_C_CONTIGUOUS(reinterpret_cast(o))) { + PyObject *_o = o; +#if PY_VERSION_HEX < 0x03000000 + _o = reinterpret_cast(PyArray_ContiguousFromAny( + o, PyArray_ISFLOAT(reinterpret_cast(o)) ? NPY_FLOAT : NPY_INT, 0, 0)); +#endif + PyObject *ret = m_InPlaceSubtract_map_impl(self, _o); +#if PY_VERSION_HEX < 0x03000000 + Py_DECREF(_o); +#endif + return ret; + } else { + return inplace_axpby(1.0f, self, -1.0f, o); + } +} + +template +void plain_mult(const T *a, const T *b, T *o, int size) { + for (int idx = 0; idx < size; idx++) + o[idx] = a[idx] * b[idx]; +} + +template +void plain_div(const T *a, const T *b, T *o, int size) { + for (int idx = 0; idx < size; idx++) + o[idx] = a[idx] / b[idx]; +} + +enum {mmult, mdiv}; +PyObject *mdarray::m_mult_div(PyObject *self, PyObject *o, int mult_or_div, bool inplace) { + struct py_decref { + void operator () (PyObject *p) { + Py_DECREF(p); + } + }; + + std::unique_ptr op(nullptr); + + enum mult_type_t { MULT_UNKNOWN, MULT_ELTWISE, MULT_SCALAR }; + + PyTypeObject *oprd2_type = reinterpret_cast(o->ob_type); + int mult_type = static_cast(MULT_UNKNOWN); + if (oprd2_type == &PyArray_Type) { + mult_type = MULT_ELTWISE; + o = py_mdarray_from(o); + op.reset(o); + } else if (PyObject_HasAttrString(o, "is_mdarray")) { + mult_type = MULT_ELTWISE; + } else if (PyFloat_Check(o) || PyInt_Check(o) || PyNumber_Check(o)) { + mult_type = MULT_SCALAR; + } + + PyObject *resultobj = nullptr; + + switch (static_cast(mult_type)) { + case MULT_ELTWISE: { + void *oprd2; + int res = SWIG_ConvertPtr(o, &oprd2, nullptr, 0); + if (!SWIG_IsOK(res)) { + PyErr_SetString(PyExc_ValueError, "Error oprd2 %matrix element multiply"); + break; + } + + auto oprd1_mdarr = this; + auto oprd2_mdarr = (reinterpret_cast(oprd2))->get(); + + if (oprd1_mdarr->size() != oprd2_mdarr->size()) { + PyErr_SetString(PyExc_SystemError, "Abnormal matrix size %matrix element multiply"); + break; + } + + std::vector prims; + std::unique_ptr mreorder; + + auto oprd2_internal_m = reorder_if_must(oprd2_mdarr->mkldnn_memory(), + oprd1_mdarr->mkldnn_memory().get_primitive_desc(), + mreorder, + &prims); + mkldnn::stream s(mkldnn::stream::kind::eager); + s.submit(prims).wait(); + + mkldnn::memory::desc res_desc = oprd1_mdarr->desc(); + mkldnn::memory::dims res_tz; + mkldnn::memory::data_type res_dtype = + static_cast(res_desc.data.data_type); + mkldnn::memory::format res_fmt = + static_cast(res_desc.data.format); + mkldnn::engine res_engine = oprd1_mdarr->get_engine(); + + assert(oprd1_mdarr->ndims() == 2 || oprd1_mdarr->ndims() == 4); + for (int ndim = 0; ndim < static_cast(oprd1_mdarr->ndims()); ndim++) + res_tz.push_back(res_desc.data.dims[ndim]); + + mdarray *res_mdarr; + if (!inplace) { + res_mdarr = new mdarray(res_tz, res_dtype, res_fmt, res_engine); + } else { + res_mdarr = oprd1_mdarr; + } + + assert(mkldnn::memory::f32 == res_dtype || + mkldnn::memory::s32 == res_dtype || + mkldnn::memory::s16 == res_dtype || + mkldnn::memory::s8 == res_dtype || + mkldnn::memory::u8 == res_dtype ); + assert(mmult == mult_or_div || + mdiv == mult_or_div); + if (mkldnn::memory::f32 == res_dtype) { + switch (mult_or_div) { + case mmult: + vsMul(oprd1_mdarr->size(), + reinterpret_cast(oprd1_mdarr->data()), + reinterpret_cast(oprd2_internal_m.get_data_handle()), + reinterpret_cast(res_mdarr->data())); + break; + + case mdiv: + plain_div(reinterpret_cast(oprd1_mdarr->data()), + reinterpret_cast(oprd2_internal_m.get_data_handle()), + reinterpret_cast(res_mdarr->data()), + static_cast(oprd1_mdarr->size())); + break; + } + } else if (mkldnn::memory::s32 == res_dtype) { + switch (mult_or_div) { + case mmult: + plain_mult(reinterpret_cast(oprd1_mdarr->data()), + reinterpret_cast(oprd2_internal_m.get_data_handle()), + reinterpret_cast(res_mdarr->data()), + static_cast(oprd1_mdarr->size())); + break; + + case mdiv: + plain_div(reinterpret_cast(oprd1_mdarr->data()), + reinterpret_cast(oprd2_internal_m.get_data_handle()), + reinterpret_cast(res_mdarr->data()), + static_cast(oprd1_mdarr->size())); + break; + } + } else if (mkldnn::memory::s16 == res_dtype) { + switch (mult_or_div) { + case mmult: + plain_mult(reinterpret_cast(oprd1_mdarr->data()), + reinterpret_cast(oprd2_internal_m.get_data_handle()), + reinterpret_cast(res_mdarr->data()), + static_cast(oprd1_mdarr->size())); + break; + + case mdiv: + plain_div(reinterpret_cast(oprd1_mdarr->data()), + reinterpret_cast(oprd2_internal_m.get_data_handle()), + reinterpret_cast(res_mdarr->data()), + static_cast(oprd1_mdarr->size())); + break; + } + } else if (mkldnn::memory::s8 == res_dtype) { + switch (mult_or_div) { + case mmult: + plain_mult(reinterpret_cast(oprd1_mdarr->data()), + reinterpret_cast(oprd2_internal_m.get_data_handle()), + reinterpret_cast(res_mdarr->data()), + static_cast(oprd1_mdarr->size())); + break; + + case mdiv: + plain_div(reinterpret_cast(oprd1_mdarr->data()), + reinterpret_cast(oprd2_internal_m.get_data_handle()), + reinterpret_cast(res_mdarr->data()), + static_cast(oprd1_mdarr->size())); + break; + } + } else if (mkldnn::memory::u8 == res_dtype) { + switch (mult_or_div) { + case mmult: + plain_mult(reinterpret_cast(oprd1_mdarr->data()), + reinterpret_cast(oprd2_internal_m.get_data_handle()), + reinterpret_cast(res_mdarr->data()), + static_cast(oprd1_mdarr->size())); + break; + + case mdiv: + plain_div(reinterpret_cast(oprd1_mdarr->data()), + reinterpret_cast(oprd2_internal_m.get_data_handle()), + reinterpret_cast(res_mdarr->data()), + static_cast(oprd1_mdarr->size())); + break; + } + } + + if (!inplace) { + auto res_py_handle = new py_handle(res_mdarr); + resultobj = SWIG_Python_NewPointerObj(nullptr, + SWIG_as_voidptr(res_py_handle), + SwigTy_mdarray, + SWIG_POINTER_OWN | 0); + } else { + resultobj = self; + Py_INCREF(self); + } + + break; + } + + case MULT_SCALAR: { + double a = PyInt_Check(o) ? + static_cast(PyInt_AsLong(o)) : + PyFloat_AsDouble(o), + b = 0.0; + + a = (mmult == mult_or_div) ? a : (1 / a); + + if (!inplace) { + resultobj = axpby(a, b, self); + } else { + resultobj = inplace_axpby(a, self, b, self);; + } + break; + } + + case MULT_UNKNOWN: + default: + PyErr_SetString(PyExc_SystemError, "Abnormal type % matrix * scalar"); + break; + } + + return resultobj; +} + +PyObject *mdarray::m_Multiply(PyObject *self, PyObject *o) { + if (!is_mdarray_supported(self, o)) { + return m_Multiply_map_impl(self, o); + } else if (PyArray_Check(o) && + !PyArray_IS_C_CONTIGUOUS(reinterpret_cast(o))) { + PyObject *_o = o; +#if PY_VERSION_HEX < 0x03000000 + _o = reinterpret_cast(PyArray_ContiguousFromAny( + o, PyArray_ISFLOAT(reinterpret_cast(o)) ? NPY_FLOAT : NPY_INT, 0, 0)); +#endif + PyObject *ret = m_Multiply_map_impl(self, _o); +#if PY_VERSION_HEX < 0x03000000 + Py_DECREF(_o); +#endif + return ret; + } else { + return m_mult_div(self, o, mmult, false); + } +} + +PyObject *mdarray::m_InPlaceMultiply(PyObject *self, PyObject *o) { + if (!is_mdarray_supported(self, o)) { + return m_InPlaceMultiply_map_impl(self, o); + } else if (PyArray_Check(o) && + !PyArray_IS_C_CONTIGUOUS(reinterpret_cast(o))) { + PyObject *_o = o; +#if PY_VERSION_HEX < 0x03000000 + _o = reinterpret_cast(PyArray_ContiguousFromAny( + o, PyArray_ISFLOAT(reinterpret_cast(o)) ? NPY_FLOAT : NPY_INT, 0, 0)); +#endif + PyObject *ret = m_InPlaceMultiply_map_impl(self, _o); +#if PY_VERSION_HEX < 0x03000000 + Py_DECREF(_o); +#endif + return ret; + } else { + return m_mult_div(self, o, mmult, true); + } +} + +PyObject *mdarray::m_Divide(PyObject *self, PyObject *o) { + if (!is_mdarray_supported(self, o)) { + return m_Divide_map_impl(self, o); + } else if (PyArray_Check(o) && + !PyArray_IS_C_CONTIGUOUS(reinterpret_cast(o))) { + PyObject *_o = o; +#if PY_VERSION_HEX < 0x03000000 + _o = reinterpret_cast(PyArray_ContiguousFromAny( + o, PyArray_ISFLOAT(reinterpret_cast(o)) ? NPY_FLOAT : NPY_INT, 0, 0)); +#endif + PyObject *ret = m_Divide_map_impl(self, _o); +#if PY_VERSION_HEX < 0x03000000 + Py_DECREF(_o); +#endif + return ret; + } else { + return m_mult_div(self, o, mdiv, false); + } +} + +PyObject *mdarray::m_InPlaceDivide(PyObject *self, PyObject *o) { + if (!is_mdarray_supported(self, o)) { + return m_InPlaceDivide_map_impl(self, o); + } else if (PyArray_Check(o) && + !PyArray_IS_C_CONTIGUOUS(reinterpret_cast(o))) { + PyObject *_o = o; +#if PY_VERSION_HEX < 0x03000000 + _o = reinterpret_cast(PyArray_ContiguousFromAny( + o, PyArray_ISFLOAT(reinterpret_cast(o)) ? NPY_FLOAT : NPY_INT, 0, 0)); +#endif + PyObject *ret = m_InPlaceDivide_map_impl(self, _o); +#if PY_VERSION_HEX < 0x03000000 + Py_DECREF(_o); +#endif + return ret; + } else { + return m_mult_div(self, o, mdiv, true); + } +} + +int mdarray::getbuffer(PyObject *self, Py_buffer *view, int flags) { + if ((flags & PyBUF_F_CONTIGUOUS) == PyBUF_F_CONTIGUOUS) { + PyErr_SetString(PyExc_ValueError, "carray is not Fortran contiguous"); + return -1; + } + + if (view == nullptr) { + PyErr_SetString(PyExc_ValueError, "NULL view in getbuffer"); + return -1; + } + + // reorderer type object + if (PyType_reorder_buffer == nullptr) { + PyErr_SetString(PyExc_NameError, "name 'reorderer' is not defined"); + return -1; + } + + // Wrote some python in C++ :) + PyObject *argList = Py_BuildValue("(O)", self); + if (argList == nullptr) { + return -1; + } + + // TODO: Do we need to cache this thing? + PyObject *rbobj = PyObject_CallObject(PyType_reorder_buffer, argList); + Py_DECREF(argList); + + if (rbobj == nullptr) { + return -1; + } + + Reorderer *rb; + int res = SWIG_ConvertPtr(rbobj, reinterpret_cast(&rb), nullptr, 0); + + if (!SWIG_IsOK(res)) { + PyErr_SetString(PyExc_RuntimeError, "Can't get C++ object from python object"); + return -1; + } + + if (rb->non_trivial()) + rb->fire(this->tensor()); + + if (build_view(view, flags, *rb)) { + PyErr_SetString(PyExc_RuntimeError, "Can't build Py_buffer!"); + return -1; + } + + // Stolen reference + view->obj = rbobj; + sync_reorder_ = rb; + + // reset self mdarray's tensor, keep buffer consistency. + if (rb->non_trivial()) { + mdarray *src_mdarray = get_mdarray_from_PyObject(self); + if (!src_mdarray) { + PyErr_SetString(PyExc_RuntimeError, "Can't get src mdarray from python object!"); + return -1; + } + + Tensor *src_tensor = src_mdarray->tensor(); + mkldnn::memory::dims src_dims = (mkldnn::memory::dims)src_tensor->dims(); + mkldnn_memory_format_t dst_fmt = public_format(src_tensor->format()); + + Tensor *dst_tensor = new Tensor(src_dims.size(), src_dims, rb->data_, + dst_fmt, src_tensor->type()); + src_mdarray->reset_tensor(dst_tensor); + } + return 0; +} + +PyObject *mdarray::getattro(PyObject *self, PyObject *name) { + // XXX: Recursive alarm !!! XXX + PyObject *surrogate = PyArray_FromAny(self, nullptr, 0, 0 + , NPY_ARRAY_ELEMENTSTRIDES, nullptr); + + if (surrogate == nullptr) + return nullptr; + + // Watch the reference count of surrogate if more compicated + // looking up method involved + PyObject * attr = PyObject_GetAttr(surrogate, name); + + // The surrogate will be destroyed after attribute is done + Py_DECREF(surrogate); + + if (attr == nullptr && PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + + // Switch to our exception message if things gone wrong + PyTypeObject *tp = Py_TYPE(self); + PyErr_Format(PyExc_AttributeError + , "mdarray '%.50s' object has no attribute '%p'", tp->tp_name, name); + } + + return attr; +} + +Py_ssize_t mdarray::mp_length(PyObject *self) { + PyObject *surrogate = PyArray_FromAny(self, nullptr, 0, 0 + , NPY_ARRAY_ELEMENTSTRIDES, nullptr); + + if (surrogate == nullptr) + return -1; + + Py_ssize_t len = PyMapping_Length(surrogate); + Py_DECREF(surrogate); + + // TODO: Exception localize + return len; +} + +PyObject *mdarray::mp_subscript(PyObject *self, PyObject *op) { + PyObject *surrogate = PyArray_FromAny(self, nullptr, 0, 0 + , NPY_ARRAY_ELEMENTSTRIDES, nullptr); + + if (surrogate == nullptr) + return nullptr; + + PyObject *ret = PyObject_GetItem(surrogate, op); + Py_DECREF(surrogate); + + // TODO: Exception localize + return ret; +} + +int mdarray::mp_ass_subscript(PyObject *self, PyObject *ind, PyObject *op) { + PyObject *surrogate = PyArray_FromAny(self, nullptr, 0, 0 + , NPY_ARRAY_ELEMENTSTRIDES, nullptr); + + int ret; + + if (surrogate == nullptr) + return -1; + + if (op == nullptr) + ret = PyObject_DelItem(surrogate, ind); + else + ret = PyObject_SetItem(surrogate, ind, op); + + if (sync_reorder_ && sync_reorder_->non_trivial()) { + sync_reorder_->sync(this->tensor()); + } + + Py_DECREF(surrogate); + + // TODO: Exception localize + return ret; +} + +PyObject *mdarray::flat() { + long int dims[1] = {static_cast(this->size())}; + + int typenum = NPY_NOTYPE; + switch(static_cast(this->mkldnn_memory().get_primitive_desc().desc().data.data_type)) { + case mkldnn::memory::f32: + typenum = NPY_FLOAT32; + break; + case mkldnn::memory::s32: + typenum = NPY_INT; + break; + case mkldnn::memory::s16: + typenum = NPY_INT16; + break; + case mkldnn::memory::s8: + typenum = NPY_INT8; + break; + case mkldnn::memory::u8: + typenum = NPY_UINT8; + break; + default: + PyErr_SetString(PyExc_ValueError, "Bad mdarray data_type"); + break; + } + + PyObject *plain_arr = nullptr; + plain_arr = PyArray_SimpleNewFromData(1, dims, typenum, this->data()); + if (!plain_arr) + PyErr_SetString(PyExc_ValueError, "Can't create plain array with format from mdarray"); + + return plain_arr; +} + +PyObject *mdarray::reshape(py_handle *self, vector dims) +{ + if (dims.size() != 4 && dims.size() != 2) { + PyErr_SetString(PyExc_ValueError,"Only support reshape to 2 dimension"); + return nullptr; + } + int idx_unknown = -1; + size_t size = 1; + for (unsigned int i = 0; i < dims.size(); i++) { + if (dims[i] < 0) { + if (idx_unknown == -1) { + idx_unknown = i; + } else { + PyErr_SetString(PyExc_ValueError,"Only support 1 unkown dimension"); + return nullptr; + } + } else { + size *= dims[i]; + } + } + if (idx_unknown == -1) { + if (size != this->size()) { + PyErr_SetString(PyExc_ValueError,"Wrong dimension to reshape"); + return nullptr; + } + } else if (this->size() % size) { + PyErr_SetString(PyExc_ValueError,"Wrong dimension to reshape"); + return nullptr; + } else { + dims[idx_unknown] = this->size() / size; + } + Tensor *tensor = tensor_->reshape(dims); + if (tensor == nullptr) { + PyErr_SetString(PyExc_ValueError,"The dimension is not valid in reshape"); + return nullptr; + } else { + //mdarray *new_array = new ::mdarray(tensor); + py_handle *output = new py_handle(new mdarray(tensor)); + PyObject *resultobj = SWIG_Python_NewPointerObj(nullptr + , SWIG_as_voidptr(output), SwigTy_mdarray, SWIG_POINTER_OWN | 0 ); + return resultobj; + } +} + +PyObject *mdarray::sum(vector axis, bool keepdims) +{ + auto tensor = tensor_->sum(axis); + if (tensor) { + if (keepdims) { + vector expected_shape; + for (int v = 0; v < this->ndims(); v++) + expected_shape.push_back(this->desc().data.dims[v]); + + for (unsigned a = 0; a < axis.size(); a++) + expected_shape[axis[a]] = 1; + + auto _tensor = tensor->reshape(expected_shape); + delete tensor; + tensor = _tensor; + } + + auto output = new py_handle(new mdarray(tensor)); + auto resultobj = SWIG_Python_NewPointerObj(nullptr, + SWIG_as_voidptr(output), SwigTy_mdarray, + SWIG_POINTER_OWN | 0); + return resultobj; + } else { + return nullptr; + } +} + +bool mdarray::is_mdarray(PyObject *o) +{ + return (reinterpret_cast(o->ob_type) + == reinterpret_cast(PyType_mdarray)); +} + +} diff --git a/python/ideep4py/py/mm/mdarray.h b/python/ideep4py/py/mm/mdarray.h new file mode 100755 index 00000000..c943942c --- /dev/null +++ b/python/ideep4py/py/mm/mdarray.h @@ -0,0 +1,547 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _MDARRAY_H_ +#define _MDARRAY_H_ +#include +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mem.h" +#include "tensor.h" +#include "reorder.h" + +// FIXME +// use global engine to init mdarray +using namespace mkldnn; +extern engine cpu_engine; + +namespace implementation { + class mdarray; +} + +using py_handle = std::shared_ptr; + +namespace implementation { + +#if PY_VERSION_HEX >= 0x03000000 + int g_init(); +#else + void g_init(); +#endif + +#define NPY_ARRAY_SURROGATE_ENTRY(mdarray) \ + PyObject *surrogate = PyArray_FromAny(mdarray, nullptr, 0, 0 \ + , NPY_ARRAY_ELEMENTSTRIDES, nullptr) \ + +#define NPY_ARRAY_SURROGATE_EXIT() + +#define nb_unary_map_impl(method) \ + PyObject * m_ ## method ## _map_impl(PyObject *self) { \ + NPY_ARRAY_SURROGATE_ENTRY(self); \ + \ + if (surrogate == nullptr) \ + return nullptr; \ + \ + PyObject *res = PyNumber_ ## method(surrogate); \ + Py_DECREF(surrogate); \ + NPY_ARRAY_SURROGATE_EXIT(); \ + return res; \ + } \ + +#define nb_unary_map(method) \ + nb_unary_map_impl(method) \ + PyObject * m_ ## method (PyObject *self) { \ + return m_ ## method ## _map_impl(self); \ + } \ + +#define nb_binary_map_impl(method) \ + PyObject * m_ ## method ## _map_impl(PyObject *self, PyObject *o) { \ + PyObject *left = self, *right = o; \ + if (is_mdarray(left)) { \ + left = PyArray_FromAny(left, nullptr, 0, 0 \ + , NPY_ARRAY_ELEMENTSTRIDES, nullptr); \ + } \ + if (is_mdarray(right)) { \ + right = PyArray_FromAny(right, nullptr, 0, 0 \ + , NPY_ARRAY_ELEMENTSTRIDES, nullptr); \ + } \ + PyObject *res = PyNumber_ ## method(left, right); \ + if (left != self) \ + Py_DECREF(left); \ + if (right != o) \ + Py_DECREF(right); \ + return res; \ + } + +#define nb_binary_map_impl_with_target_func(method, tfunc) \ + PyObject * m_ ## method ## _map_impl(PyObject *self, PyObject *o) { \ + NPY_ARRAY_SURROGATE_ENTRY(self); \ + \ + if (surrogate == nullptr) \ + return nullptr; \ + \ + PyObject *res = PyNumber_ ## tfunc(surrogate, o); \ + Py_DECREF(surrogate); \ + NPY_ARRAY_SURROGATE_EXIT(); \ + return res; \ + } + +#define nb_binary_map(method) \ + nb_binary_map_impl(method) \ + PyObject * m_ ## method (PyObject *self, PyObject *o) { \ + return m_ ## method ## _map_impl(self, o); \ + } \ + +#define nb_ternary_map_impl(method) \ + PyObject * m_ ## method ## _map_impl(PyObject *self, PyObject *o1, PyObject *o2) { \ + NPY_ARRAY_SURROGATE_ENTRY(self); \ + \ + if (surrogate == nullptr) \ + return nullptr; \ + \ + PyObject *res = PyNumber_ ## method(surrogate, o1, o2); \ + Py_DECREF(surrogate); \ + NPY_ARRAY_SURROGATE_EXIT(); \ + return res; \ + } + +#define nb_ternary_map(method) \ + nb_ternary_map_impl(method) \ + PyObject * m_ ## method (PyObject *self, PyObject *o1, PyObject *o2) { \ + return m_ ## method ## _map_impl(self, o1, o2); \ + } \ + + +//class mdarray : public Tensor { +class mdarray { +public: + // It is exposed to python + // + static constexpr int MAX_NDIM = 12; //XXX: For now + + class Reorder_buffer : Reorderer { + public: + Reorder_buffer(const py_handle in) + :Reorderer(in.get()->tensor()) {} + }; + +public: + typedef size_t size_type; + // Generated on demand + //FIXME + //yli135: add default constructor so that we can pass vector form native + mdarray(); + virtual ~mdarray() = default; + + mdarray(Tensor *tensor) : tensor_(tensor) {} + + mdarray(mkldnn::memory::dims &dims + , mkldnn::memory::data_type dt + , mkldnn::memory::format format + , const mkldnn::engine &engine) + : tensor_(new Tensor(dims, dt, format, engine)) {} + + mdarray(mkldnn::memory::primitive_desc pd) + : tensor_(new Tensor(pd)) {} + +#if 0 + mdarray(int ndims, vector dims, void *data, + mkldnn_memory_format_t mm_fmt, data_type_t type=FLOAT32) + : tensor_(new Tensor(ndims, dims, data, mm_fmt, type)) {} +#endif + + mdarray(Py_buffer *view, char input_type='d') {// input_type : 'd'-->data, 'w'-->weight + data_type_t dt; + std::string format(view->format); + if (std::string::npos != format.find_last_of('f')) { + dt = FLOAT32; + } else if (std::string::npos != format.find_last_of('i')) { + dt = SINT32; + } else if (std::string::npos != format.find_last_of('h')) { + dt = SINT16; + } else if (std::string::npos != format.find_last_of('b')) { + dt = SINT8; + } else if (std::string::npos != format.find_last_of('B')) { + dt = UINT8; + } else { + throw mkldnn::error(mkldnn_invalid_arguments + , std::string("MKLDNN does not support data type: ") + + format); + } + vector dims(view->shape, view->shape + view->ndim); + //std::unique_ptr tensor(new Tensor(view->ndim, dims, view->buf, dt)); + tensor_.reset(new Tensor(view->ndim, dims, view->buf, dt, input_type)); + + PyBuffer_Release(view); + +#if 0 + ndims_ = view->ndim; + dims_.assign(view->shape, view->shape + view->ndim); + size_ = view->len / view->itemsize; + type_ = dt; + data_ = std::shared_ptr(new avx::byte [view->len] + , [] (avx::byte *p) {delete [] p;}); + memcpy(data_.get(), view->buf, view->len); + mm_fmt_ = ndims2format(ndims_); + memory::data_type type = to_mkldnn_type(); + mem_.reset(new mkldnn::memory( + { { { dims_ }, type, static_cast(mm_fmt_) } + , cpu_engine }, data_.get())); +#endif + } + + static bool is_mdarray(PyObject *o); + + //FIXME + inline void unpickled_data(void *pdata) { + //data_.reset(reinterpret_cast(pdata)); + //m_.set_data_handle(pdata); + return; + } + + // PEP 3118 interface + int build_view(Py_buffer *view, int flags, const Reorderer &reorder) { + view->buf = reorder.data_.get(); + view->itemsize = reorder.itemsize_; + view->readonly = 0; + view->internal = nullptr; + view->len = reorder.size_ * reorder.itemsize_; + + if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) { + view->format = const_cast(reorder.format_); + } else { + view->format = nullptr; + } + + if ((flags & PyBUF_ND) == PyBUF_ND) { + view->ndim = reorder.ndims_; + view->shape = const_cast(reorder.shape_); + } else { + view->ndim = 0; + view->shape = nullptr; + } + + if ((flags & PyBUF_STRIDES) == PyBUF_STRIDES) { + view->strides = const_cast(reorder.strides_); + } else { + view->strides = nullptr; + } + + view->suboffsets = nullptr; + + return 0; + } + +#if 0 + // Array protocol + PyArrayInterface *build_array_struct(void) { + auto arrstr = new PyArrayInterface(); + + arrstr->two = 2; + arrstr->nd = ndims_; + arrstr->typekind = *((char *)format_); + arrstr->itemsize = itemsize_; + arrstr->flags = NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_NOTSWAPPED | + NPY_ARRAY_ALIGNED | NPY_ARRAY_WRITEABLE; + arrstr->flags &= ~(NPY_ARRAY_UPDATEIFCOPY | NPY_ARRAY_OWNDATA); + arrstr->shape = shape_; + arrstr->strides = strides_; + arrstr->data = data_.get(); + arrstr->descr = nullptr; + + return arrstr; + } +#endif + + PyObject *__getstate__(void) const; + + void __setstate__(PyObject *state); + + PyObject *py_mdarray_from(PyObject *o) const; + + /// d = a * x + b * y, using x's format + template + static void axpby(mdarray *dst, T a, mdarray *x, T b, mdarray *y); + + /// Interface to directly contact python + template + PyObject *axpby(T a, T b, PyObject *o); + + template + PyObject *inplace_axpby(T a, PyObject *self, T b, PyObject *o); + + PyObject *flat(void); + + PyObject *reshape(py_handle *self, vector dims); + + PyObject *m_mult_div(PyObject *self, PyObject *o, int mult_or_div, bool inplace); + + PyObject *sum(std::vector axis, bool keepdims); + + // PEP: 3118 Buffer Protocol Producer + virtual int getbuffer(PyObject *obj, Py_buffer *view, int flags); + + PyObject *getattro(PyObject *self, PyObject *name); + + PyObject *m_Add(PyObject *self, PyObject *o); + nb_binary_map_impl(Add); + PyObject *m_InPlaceAdd(PyObject *self, PyObject *o); + nb_binary_map_impl(InPlaceAdd); + PyObject *m_Subtract(PyObject *self, PyObject *o); + nb_binary_map_impl(Subtract); + PyObject *m_InPlaceSubtract(PyObject *self, PyObject *o); + nb_binary_map_impl(InPlaceSubtract); + PyObject *m_Multiply(PyObject *self, PyObject *o); + nb_binary_map_impl(Multiply); + PyObject *m_InPlaceMultiply(PyObject *self, PyObject *o); + nb_binary_map_impl(InPlaceMultiply); + // SWIG: nb_true_divide (no slot) <= nb_divide + PyObject *m_Divide(PyObject *self, PyObject *o); +#if PY_VERSION_HEX < 0x03000000 + nb_binary_map_impl(Divide); +#else + nb_binary_map_impl_with_target_func(Divide, TrueDivide); +#endif + PyObject *m_InPlaceDivide(PyObject *self, PyObject *o); +#if PY_VERSION_HEX < 0x03000000 + nb_binary_map_impl(InPlaceDivide); +#else + nb_binary_map_impl_with_target_func(InPlaceDivide, InPlaceTrueDivide); +#endif + + nb_binary_map(Remainder); + nb_binary_map(Divmod); + nb_unary_map(Negative); + nb_unary_map(Positive); + nb_unary_map(Absolute); + nb_unary_map(Invert); + nb_binary_map(Lshift); + nb_binary_map(Rshift); + nb_binary_map(And); + nb_binary_map(Xor); + nb_binary_map(Or); + nb_binary_map(InPlaceRemainder); + nb_ternary_map(InPlacePower); + nb_binary_map(InPlaceLshift); + nb_binary_map(InPlaceRshift); + nb_binary_map(InPlaceAnd); + nb_binary_map(InPlaceXor); + nb_binary_map(InPlaceOr); + nb_binary_map(FloorDivide); + nb_binary_map(InPlaceFloorDivide); +#if (PY_VERSION_HEX >= 0x03000000) + nb_binary_map(MatrixMultiply); + nb_binary_map(InPlaceMatrixMultiply); +#endif + + Py_ssize_t mp_length(PyObject *self); + PyObject *mp_subscript(PyObject *self, PyObject *op); + int mp_ass_subscript(PyObject *self, PyObject *ind, PyObject *op); + + inline Tensor* tensor() { + return tensor_.get(); + } + inline Tensor &tensor2() { + return *(tensor_.get()); + } + inline int ndims() const { + return tensor_->ndims(); + } + inline memory::desc desc() const { + return tensor_->desc(); + } + inline size_type size() const { + return tensor_->size(); + } + inline void *data() const { + return tensor_->data(); + } + inline mkldnn::engine get_engine() const { + return tensor_->get_engine(); + } + inline mkldnn::memory mkldnn_memory() const { + return tensor_->mkldnn_memory(); + } + inline void reset_tensor(Tensor *dst) { + tensor_.reset(dst); + } +private: + struct WeDontManageIt { + void operator() (const Py_buffer *view) { + PyBuffer_Release(const_cast(view)); + delete view; + } + }; + + std::unique_ptr view_; + +protected: + std::unique_ptr tensor_; + Reorderer *sync_reorder_; + +#if 0 +private: + static mkldnn::memory::desc _d_from_view(const Py_buffer *view + , mkldnn::memory::format order) { + mkldnn::memory::dims dims (view->ndim); + + for( int i=0; i < view->ndim; i++) + dims[i] = view->shape[i]; + + std::string format(view->format); + mkldnn::memory::data_type dt; + + if (view->itemsize == 4) { + if (std::string::npos != format.find_last_of('f')) { + dt = mkldnn::memory::f32; + } else if (std::string::npos != format.find_last_of('i')) { + dt = mkldnn::memory::s32; + } else + throw mkldnn::error(mkldnn_invalid_arguments + , std::string("MKLDNN does not support data type: ") + + format); + } else + throw mkldnn::error(mkldnn_invalid_arguments + , "MKLDNN does not support itemsize other than 4"); + + return mkldnn::memory::desc(dims, dt, order); + } +#endif +}; + +} + +// +// Actual interface for python +// DO NOT add field +// +class mdarray : public py_handle { +public: + //FIXME + //yli135: add default constructor so that we can pass vector form native + mdarray() {}; + + mdarray(Tensor *tensor) + : py_handle(std::make_shared(tensor)) {} + + mdarray(mkldnn::memory::dims &dims + , mkldnn::memory::data_type dt + , mkldnn::memory::format format + , mkldnn::engine &engine) + : py_handle(std::make_shared + (dims, dt, format, engine)) {} + + mdarray(mkldnn::memory::primitive_desc pd) + : py_handle(std::make_shared(pd)) {} + + mdarray(Py_buffer *view, char input_type='d') + : py_handle(std::make_shared(view, input_type)) {} + +#if 0 + mdarray(int ndims, vector dims, void *data, + mkldnn_memory_format_t mm_fmt, data_type_t type=FLOAT32) + : py_handle(std::make_shared(ndims, dims, data, mm_fmt, type)) {} +#endif + + static PyObject *mdarray_shape_get(mdarray *arg) { + implementation::mdarray *self = arg->get(); + int ndim = self->ndims(); + PyObject *intTuple = PyTuple_New(ndim); + auto data = self->desc().data; + + if (!intTuple) + goto fail; + + for (int i = 0; iget(); + PyArray_Descr *pd; + // Translate our data_type to numpy one + switch (static_cast(m->desc().data.data_type)) { + case mkldnn::memory::f32: + pd = PyArray_DescrFromType(NPY_FLOAT); + break; + case mkldnn::memory::s32: + pd= PyArray_DescrFromType(NPY_INT); + break; + case mkldnn::memory::s16: + pd= PyArray_DescrFromType(NPY_INT16); + break; + case mkldnn::memory::s8: + pd= PyArray_DescrFromType(NPY_INT8); + break; + case mkldnn::memory::u8: + pd= PyArray_DescrFromType(NPY_UINT8); + break; + default: + PyErr_SetString(PyExc_ValueError, "Bad mdarray data_type"); + return nullptr; + } + + return reinterpret_cast(pd); + } + + static long mdarray_size_get(mdarray *self) { + return self->get()->size(); + } + + static long mdarray_ndim_get(mdarray *self) { + return self->get()->desc().data.ndims; + } + + static bool mdarray_is_mdarray_get(mdarray *self) { + return true; + } +}; + +using reorder_buffer = implementation::mdarray::Reorder_buffer; + +#endif // _MDARRAY_H_ diff --git a/python/ideep4py/py/mm/mdarray.i b/python/ideep4py/py/mm/mdarray.i new file mode 100644 index 00000000..890ca5ed --- /dev/null +++ b/python/ideep4py/py/mm/mdarray.i @@ -0,0 +1,351 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%{ + #define SWIG_FILE_WITH_INIT + #include + #include + #include + #include + #include + #include + #include +#define SWIG_INLINE + #include "mdarray.h" +%} + +%include exception.i +%include pep_3118.i +%include getattro.i +%include asnumber.i +%include asmap.i +%include attribute.i +%include tp.i +%include std_vector.i + +%template(mdarrayVector) std::vector; +%template(intVector) std::vector; + +%tp_protocol(mdarray) +%buffer_protocol_producer(mdarray) +%buffer_protocol_typemap(Py_buffer *view) +%getattr_wrapper(mdarray) +%number_protocol(mdarray) +%map_protocol(mdarray) + +%define %codegen(Class, ret_type, attrib, getter) +%{ + inline ret_type %mangle(Class) ##_## attrib ## _get(Class *self_) { + return (ret_type) Class::getter(self_); + } +%} +%enddef + +%define %extend_ro_attr(Class, ret_type, attrib, getter) + %immutable Class::attrib; + %extend Class { + ret_type attrib; + } + %codegen(Class, ret_type, attrib, getter) +%enddef + +%define %extend_ro_attr_and_own(Class, ret_type, attrib, getter) + %immutable Class::attrib; + %newobject Class::attrib; + + %extend Class { + ret_type attrib; + } + + %codegen(Class, ret_type *, attrib, getter) +%enddef + +%extend_ro_attr(mdarray, PyObject *, dtype, mdarray_dtype_get) +%extend_ro_attr(mdarray, PyObject *, shape, mdarray_shape_get) +%extend_ro_attr(mdarray, long, size, mdarray_size_get) +%extend_ro_attr(mdarray, long, ndim, mdarray_ndim_get) +%extend_ro_attr(mdarray, bool, is_mdarray, mdarray_is_mdarray_get) + +%extend mdarray { + PyObject *axpby(double a, double b, PyObject *y) { + return (*$self)->axpby(a, b, y); + } + + PyObject *inplace_axpby(double a, double b, PyObject *y) { + /// Second param y is a harmless dummy + return (*$self)->inplace_axpby(a, y, b, y); + } + + PyObject *flat() { + return (*self)->flat(); + } +} + +/* mdarray::reshape */ +%extend mdarray { + %typemap(in) (...)(vector args) { + int i; + int argc; + argc = PySequence_Size(varargs); + if (argc > 4) { + PyErr_SetString(PyExc_ValueError,"Too many arguments"); + return NULL; + } + if (argc == 1) { + Py_ssize_t size = 0; + PyObject *o = PySequence_GetItem(varargs,0); + if (PyNumber_Check(o)) { + goto numpy_surrogate; + } else if (!PySequence_Check(o)) { + PyErr_SetString(PyExc_ValueError,"Expected a sequence"); + return NULL; + } + size = PySequence_Size(o); + if (size != 4 && size != 2) { + numpy_surrogate: + PyObject *surrogate = PyArray_FromAny($self, nullptr, 0, 0 + , NPY_ARRAY_ELEMENTSTRIDES, nullptr); + + if (surrogate == nullptr) { + PyErr_SetString(PyExc_ValueError,"Unexpected array"); + return nullptr; + } + PyObject *res = PyArray_Reshape((PyArrayObject *)surrogate, o); + + Py_DECREF(surrogate); + return res; + } + for (i = 0; i < PySequence_Size(o); i++) { + PyObject *obj = PySequence_GetItem(o, i); + if (!PyInt_Check(obj) && !PyLong_Check(obj)) { + PyErr_SetString(PyExc_ValueError,"Expected a int or long in sequence"); + return NULL; + } + args.push_back(PyInt_AsLong(obj)); + } + } else { + Py_ssize_t size = argc; + if (size != 4 && size != 2) { + PyObject *surrogate = PyArray_FromAny($self, nullptr, 0, 0 + , NPY_ARRAY_ELEMENTSTRIDES, nullptr); + + if (surrogate == nullptr) { + PyErr_SetString(PyExc_ValueError,"Unexpected array"); + return nullptr; + } + PyObject *res = PyArray_Reshape((PyArrayObject *)surrogate, varargs); + + Py_DECREF(surrogate); + return res; + } + for (i = 0; i < argc; i++) { + PyObject *o = PySequence_GetItem(varargs,i); + if (!PyInt_Check(o) && !PyLong_Check(o)) { + PyErr_SetString(PyExc_ValueError,"Expected a int"); + return NULL; + } + //args[i] = PyInt_AsLong(o); + args.push_back(PyInt_AsLong(o)); + } + } + $1 = &args; + } + + PyObject *reshape(...) { + va_list vl; + va_start(vl, self); + vector *dims = va_arg(vl, vector*); + va_end(vl); + return (*self)->reshape(self, *dims); + } +} + +/* mdarray::sum */ +%extend mdarray { + %feature ("kwargs") sum; + %typemap(in) vector axis { + $1.clear(); + if (PyTuple_Check(obj1)) { + for (int i = 0; i < PyTuple_Size(obj1); i++) { + PyObject *item = PyTuple_GetItem(obj1, i); +#if PY_VERSION_HEX > 0x03000000 + if (!PyLong_Check(item)) { +#else + if (!PyInt_Check(item)) { +#endif + SWIG_exception_fail(SWIG_ValueError, + "in method '" "mdarray_sum" "', argument " "2"" of type '" "tuple (int, int, ...)""'"); + SWIG_fail; + } + + $1.push_back(PyLong_AsLong(item)); + } +#if PY_VERSION_HEX > 0x03000000 + } else if (PyLong_Check(obj1)) { +#else + } else if (PyInt_Check(obj1)) { +#endif + $1.push_back(PyLong_AsLong(obj1)); + } else { + void *_obj1; + if (!SWIG_IsOK(SWIG_ConvertPtr(obj1, &_obj1, nullptr, 0))) { + PyErr_SetString(PyExc_ValueError, "Wrong object in sum wrapper"); + SWIG_fail; + } + + if (!_obj1) { + $1.clear(); + } else { + SWIG_exception_fail(SWIG_ValueError, + "in method '" "mdarray_sum" "', argument " "2"" of type '" "tuple or int""'"); + SWIG_fail; + } + } + } + + %typemap(argout) (vector axis) { + if (!$result) { + auto *surrogate = reinterpret_cast(PyArray_FromAny( + $self, nullptr, 0, 0, NPY_ARRAY_ELEMENTSTRIDES, nullptr)); + if (surrogate == nullptr) + return nullptr; + + if (!$1.size()) { + for (int i = 0; i < PyArray_NDIM(surrogate); i++) + $1.push_back(i); + } + + vector expected_shape; + long *shape = PyArray_DIMS(surrogate); + if (arg5) { + for (int v = 0; v < PyArray_NDIM(surrogate); v++) + expected_shape.push_back(shape[v]); + + for (unsigned a = 0; a < $1.size(); a++) + expected_shape[$1[a]] = 1; + } + + auto *res = surrogate; + for (auto i = 0; i < static_cast($1.size()); i++) { + auto *tmp = reinterpret_cast(PyArray_Sum( + res, $1[i], PyArray_TYPE(res), nullptr)); + for (unsigned j = i + 1; j < $1.size(); j++) { + if ($1[i] < $1[j]) + $1[j] -= 1; + } + + // if (i < axis.size() - 1) + // Py_DECREF(res); + + Py_DECREF(res); + res = tmp; + } + + if (arg5) { + PyObject *new_shape = PyTuple_New(expected_shape.size()); + for (unsigned v = 0; v < expected_shape.size(); v++) +#if PY_VERSION_HEX > 0x03000000 + PyTuple_SetItem(new_shape, v, PyLong_FromLong(expected_shape[v])); +#else + PyTuple_SetItem(new_shape, v, PyInt_FromLong(expected_shape[v])); +#endif + res = reinterpret_cast(PyArray_Reshape(res, new_shape)); + } + return reinterpret_cast(res); + } + } + + PyObject *sum(vector axis={0}, int dtype=0, + PyObject *out=nullptr, bool keepdims=false) { + return (*self)->sum(axis, keepdims); + } +} + +/* +%extend mdarray { + PyObject *__getstate__() { + return (*$self)->__getstate__(); + } + + //TODO + %typemap(default) (PyObject *state) { + PyObject *state; + + if (!PyArg_UnpackTuple(args, (char *)"mdarray___setstate__", 0, 1, &state)) SWIG_fail; + + if (!PyTuple_Check(state)) SWIG_fail; + + PyObject *py_dims = PyTuple_GetItem(state, 0); + PyObject *py_dtype = PyTuple_GetItem(state, 1); + PyObject *py_format = PyTuple_GetItem(state, 2); + PyObject *py_engine = PyTuple_GetItem(state, 3); + PyObject *py_rdata = PyTuple_GetItem(state, 4); + + void *rdata = PyLong_AsVoidPtr(py_rdata); + + mdarray *unpickled_mdarr = nullptr; //new mdarray(dims, dtype, format, engine); + (*unpickled_mdarr)->unpickled_data(rdata); + SwigPyObject *sobj = SWIG_Python_GetSwigThis(self); + if (sobj) { + sobj->ptr = reinterpret_cast(unpickled_mdarr); + sobj->ty = SWIGTYPE_p_mdarray; + sobj->own = 0; + sobj->next = 0; + } else { + SWIG_fail; + } + } + + void __setstate__(PyObject *state) { + (*$self)->__setstate__(state); + } +} +*/ + +class mdarray: public py_handle { +public: + // It is deliberately NOT matching prototypes! + // FIXME + // add default constructor so that native can pass vector to python + mdarray(); + mdarray(Py_buffer *view, char input_type = 'd'); + virtual ~mdarray(); +}; + +%typemap(in) (mdarray *in_mdarray) { + void *that; + int res1 = SWIG_ConvertPtr($input, &that, nullptr, 0); + if (!SWIG_IsOK(res1)) { + PyErr_SetString(PyExc_ValueError, "Can't convert mdarray pyobject"); + return nullptr; + } + $1 = (reinterpret_cast(that)); +}; + +class reorder_buffer { +public: + reorder_buffer(mdarray in); +}; + +%include "basic.i" diff --git a/python/ideep4py/py/primitives/bn.i b/python/ideep4py/py/primitives/bn.i new file mode 100644 index 00000000..855b7f1b --- /dev/null +++ b/python/ideep4py/py/primitives/bn.i @@ -0,0 +1,35 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%{ + #define SWIG_FILE_WITH_INIT + #include "bn_py.h" + #include "op_param.h" +%} + +%include "param.i" +%include "std_vector.i" +%include "bn_py.h" + +%template(batchNormalization) batch_normalization_py; diff --git a/python/ideep4py/py/primitives/bn_py.h b/python/ideep4py/py/primitives/bn_py.h new file mode 100644 index 00000000..ed9e234f --- /dev/null +++ b/python/ideep4py/py/primitives/bn_py.h @@ -0,0 +1,72 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _BN_PY_H_ +#define _BN_PY_H_ + +#include +#include +#include "op_param.h" +#include "mdarray.h" +#include "bn.h" + +template +class batch_normalization_py { +public: + static std::vector Forward(mdarray *src, + mdarray *w, mdarray *mean, mdarray *var, float eps) { + + std::vector outs; + auto tensors = batch_normalization::Forward( + (src->get()->tensor()), + (w ? w->get()->tensor() : nullptr), + (mean ? mean->get()->tensor() : nullptr), + (var ? var->get()->tensor() : nullptr), eps); + + for (int i = 0; i < tensors.size(); i++) + outs.push_back(mdarray(tensors[i])); + + return outs; + } + + static std::vector Backward(mdarray *src, mdarray *diff_dst, + mdarray *mean, mdarray *var, mdarray *w, float eps) { + + std::vector outs; + auto tensors = batch_normalization::Backward( + (src->get()->tensor()), + (diff_dst->get()->tensor()), + (mean->get()->tensor()), + (var->get()->tensor()), + (w ? w->get()->tensor() : nullptr), + eps); + + for (int i = 0; i < tensors.size(); i++) + outs.push_back(mdarray(tensors[i])); + + return outs; + } +}; + +#endif // _BN_PY_H_ diff --git a/python/ideep4py/py/primitives/concat.i b/python/ideep4py/py/primitives/concat.i new file mode 100644 index 00000000..7fbaefa9 --- /dev/null +++ b/python/ideep4py/py/primitives/concat.i @@ -0,0 +1,44 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%{ + #define SWIG_FILE_WITH_INIT + #include "concat_py.h" +%} + +%include "std_vector.i" +%include "concat_py.h" + +%template(concat) Concat_Py; + +// +// Python API for Concat +// +// mdarray Concat_Py::Forward( +// std::vector src, +// int axis); +// std::vector Concat_Py::Backward( +// mdarray *diff_dst, +// std::vector offsets, +// int axis); diff --git a/python/ideep4py/py/primitives/concat_py.h b/python/ideep4py/py/primitives/concat_py.h new file mode 100644 index 00000000..c528e907 --- /dev/null +++ b/python/ideep4py/py/primitives/concat_py.h @@ -0,0 +1,82 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _CONCAT_PY_H_ +#define _CONCAT_PY_H_ + +#include +#include +#include "mdarray.h" +#include "concat.h" + +template +class Concat_Py +{ +public: + /* + * Python Concat Forward + * params: + * src: input, xs + * axis + */ + static mdarray Forward(std::vector src, int axis) { + std::vector src_tensor; + + for (int i = 0; i < src.size(); i++) { + src_tensor.push_back(src[i].get()->tensor()); + } + + Tensor *dst_tensor = Concat::Forward(src_tensor, axis); + + mdarray dst_mdarray = mdarray(dst_tensor); + return dst_mdarray; + } + + /* + * Python Concat Backward + */ + static std::vector Backward(mdarray *diff_dst, + std::vector offsets, + int axis) { + std::vector gxs; + + std::vector gxs_tensor = Concat::Backward( + (diff_dst->get()->tensor()), + offsets, + axis); + + // + for (int i = 0; i < gxs_tensor.size(); i++){ + gxs.push_back(mdarray(gxs_tensor[i])); + } + + return gxs; + } + +}; + +#endif // _CONCAT_PY_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/py/primitives/conv.i b/python/ideep4py/py/primitives/conv.i new file mode 100644 index 00000000..5d96d58c --- /dev/null +++ b/python/ideep4py/py/primitives/conv.i @@ -0,0 +1,48 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%{ + #define SWIG_FILE_WITH_INIT + #include "conv_py.h" + #include "op_param.h" +%} + +%include "param.i" +%include "conv_py.h" + +%template(convolution2D) Convolution2D_Py; + +// +// Python API for Convolution2D +// +// mdarray Convolution2D_Py::Forward( +// mdarray *src, mdarray *weights, +// mdarray *dst, mdarray *bias, +// conv_param_t *cp); +// std::vector Convolution2D_Py::BackwardWeights( +// mdarray *src, mdarray *diff_dst, +// con_prarm_t *cp); +// mdarray Convolution2D_Py::BackwardData( +// mdarray *weights, mdarray *diff_dst, +// conv_param_t *cp); diff --git a/python/ideep4py/py/primitives/conv_py.h b/python/ideep4py/py/primitives/conv_py.h new file mode 100644 index 00000000..d11ba34a --- /dev/null +++ b/python/ideep4py/py/primitives/conv_py.h @@ -0,0 +1,126 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _CONV_PY_H_ +#define _CONV_PY_H_ + +#include +#include +#include "op_param.h" +#include "mdarray.h" +#include "conv.h" + +template +class Convolution2D_Py +{ +public: + /* + * Python Convolution Forward + * Y = W*X + b + * params: + * src: input, x + * weight: weights, w + * dst: output, y + * bias: bias, b + * cp: convolution parameters + */ + static mdarray Forward(mdarray *src, + mdarray *weights, + mdarray *bias, + conv_param_t *cp) { + auto tensor = Convolution2D::Forward( + src->get()->tensor(), + weights->get()->tensor(), + bias ? bias->get()->tensor() : nullptr, cp); + + auto out = mdarray(tensor); + return out; + } + + /* + * Python Convolution backward weights + * gW = gy*x + * params: + * src: input, x + * diff_dst: diff dst, gy + * cp: convolution parameters + */ + static mdarray BackwardWeights(mdarray *src, + mdarray *diff_dst, + conv_param_t *cp) { + auto tensor = Convolution2D::BackwardWeights( + (src->get()->tensor()), + (diff_dst->get()->tensor()), cp); + + auto out = mdarray(tensor); + return out; + } + + /* + * Python Convolution backward weights & bias + * gW = gy*x + * params: + * src: input, x + * diff_dst: diff dst, gy + * cp: convolution parameters + */ + static std::vector BackwardWeightsBias(mdarray *src, + mdarray *diff_dst, + conv_param_t *cp) { + std::vector outs; + auto tensors = Convolution2D::BackwardWeightsBias( + (src->get()->tensor()), + (diff_dst->get()->tensor()), cp); + + for (int i = 0; i < tensors.size(); i++) + outs.push_back(mdarray(tensors[i])); + + return outs; + } + + /* + * Python Convolution backward data + * gx = gy*w + * param: + * weights: weights, w + * diff_dst: diff dst, gy + * cp: convolution parameters + */ + static mdarray BackwardData(mdarray *weights, + mdarray *diff_dst, + conv_param_t *cp) { + auto tensor = Convolution2D::BackwardData( + (weights->get()->tensor()), + (diff_dst->get()->tensor()), cp); + + auto out = mdarray(tensor); + return out; + } + +}; + +#endif // _CONV_PY_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/py/primitives/dropout.i b/python/ideep4py/py/primitives/dropout.i new file mode 100644 index 00000000..901e9216 --- /dev/null +++ b/python/ideep4py/py/primitives/dropout.i @@ -0,0 +1,35 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%{ + #define SWIG_FILE_WITH_INIT + #include "dropout_py.h" + #include "op_param.h" +%} + +%include "param.i" +%include "std_vector.i" +%include "dropout_py.h" + +%template(dropout) Dropout_py; diff --git a/python/ideep4py/py/primitives/dropout_py.h b/python/ideep4py/py/primitives/dropout_py.h new file mode 100644 index 00000000..a3e80450 --- /dev/null +++ b/python/ideep4py/py/primitives/dropout_py.h @@ -0,0 +1,53 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _DROPOUT_PY_H_ +#define _DROPOUT_PY_H_ + +#include +#include +#include "op_param.h" +#include "mdarray.h" +#include "dropout.h" + +template +class Dropout_py { +public: + static std::vector Forward(mdarray* x, float ratio) { + auto tensors = Dropout::Forward(x->get()->tensor(), ratio); + + std::vector outs; + for (const auto& tensor : tensors) { + outs.push_back(mdarray(tensor)); + } + + return outs; // [0]: mask, [1]: y + } + + static mdarray Backward(mdarray* mask, mdarray* gy) { + return mdarray(Dropout::Backward(mask->get()->tensor(), gy->get()->tensor())); + } +}; + +#endif // _DROPOUT_PY_H_ diff --git a/python/ideep4py/py/primitives/eltwise.i b/python/ideep4py/py/primitives/eltwise.i new file mode 100644 index 00000000..aea74cff --- /dev/null +++ b/python/ideep4py/py/primitives/eltwise.i @@ -0,0 +1,34 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%{ + #define SWIG_FILE_WITH_INIT + #include "eltwise_py.h" +%} + +%include "std_vector.i" +%include "eltwise_py.h" + +%template(relu) Relu_Py; +%template(tanh) Tanh_Py; diff --git a/python/ideep4py/py/primitives/eltwise_py.h b/python/ideep4py/py/primitives/eltwise_py.h new file mode 100644 index 00000000..b583c512 --- /dev/null +++ b/python/ideep4py/py/primitives/eltwise_py.h @@ -0,0 +1,92 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#pragma once + +#include +#include +#include "mdarray.h" +#include "eltwise.h" + +template +class Relu_Py +{ +public: + static mdarray Forward(mdarray &src) { + // Shoule be removed in future???? + implementation::mdarray *src_internal = src.get(); + Tensor *dst_tensor = Eltwise::Forward( + src_internal->tensor(), ELTWISE_RELU, 0.0 , 0.0); + + mdarray dst_mdarray = mdarray(dst_tensor); + return dst_mdarray; + } + + static mdarray Backward(mdarray& src, mdarray& diff_dst) { + //FIXME + //Should be removed in future + Tensor *src_tensor = src.get()->tensor(); + Tensor *diff_dst_tensor = diff_dst.get()->tensor(); + + Tensor *diff_src_tensor = Eltwise::Backward(src_tensor, diff_dst_tensor, ELTWISE_RELU, 0.0, 0.0); + + // FIXME + // In future, mdarray will have a Tensor member, no need to create a new one + mdarray diff_src_mdarray = mdarray(diff_src_tensor); + return diff_src_mdarray; + } + +}; + +template +class Tanh_Py +{ +public: + static mdarray Forward(mdarray &src) { + // Shoule be removed in future???? + implementation::mdarray *src_internal = src.get(); + Tensor *dst_tensor = Eltwise::Forward( + src_internal->tensor(), ELTWISE_TANH, 0.0 , 0.0); + + mdarray dst_mdarray = mdarray(dst_tensor); + return dst_mdarray; + } + + static mdarray Backward(mdarray& src, mdarray& diff_dst) { + //FIXME + //Should be removed in future + Tensor *src_tensor = src.get()->tensor(); + Tensor *diff_dst_tensor = diff_dst.get()->tensor(); + + Tensor *diff_src_tensor = Eltwise::Backward(src_tensor, diff_dst_tensor, ELTWISE_TANH, 0.0, 0.0); + + // FIXME + // In future, mdarray will have a Tensor member, no need to create a new one + mdarray diff_src_mdarray = mdarray(diff_src_tensor); + return diff_src_mdarray; + } + +}; + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/py/primitives/linear.i b/python/ideep4py/py/primitives/linear.i new file mode 100644 index 00000000..51d44825 --- /dev/null +++ b/python/ideep4py/py/primitives/linear.i @@ -0,0 +1,48 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%{ + #define SWIG_FILE_WITH_INIT + #include "linear_py.h" + #include "op_param.h" +%} + +%include "param.i" +%include "std_vector.i" +%include "linear_py.h" + +%template(linear) Linear_Py; +// +// API for Linear +// mdarray Linear_F32::Forward( +// mdarray& src, mdarray& weights, +// mdarray& dst, mdarray& bias, +// linear_param_t& lp); +// std::vector Linear_F32::BackwardWeights( +// mdarray& src, mdarray& diff_dst, +// linear_param_t& lp); +// mdarray Linear_F32::BackwardData( +// mdarray& weights, mdarray& diff_dst, +// linear_param_t* lp); + diff --git a/python/ideep4py/py/primitives/linear_py.h b/python/ideep4py/py/primitives/linear_py.h new file mode 100644 index 00000000..8352cd46 --- /dev/null +++ b/python/ideep4py/py/primitives/linear_py.h @@ -0,0 +1,84 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _LINEAR_PY_H_ +#define _LINEAR_PY_H_ + +#include +#include +#include "op_param.h" +#include "mdarray.h" +#include "linear.h" + +template +class Linear_Py +{ +public: + static mdarray Forward(mdarray *src, + mdarray *weights, + mdarray *bias) { + auto tensor = Linear::Forward( + src->get()->tensor(), + weights->get()->tensor(), + bias ? bias->get()->tensor(): nullptr); + + auto out = mdarray(tensor); + return out; + } + + static mdarray BackwardWeights(mdarray* src, + mdarray* diff_dst) { + auto tensors = Linear::BackwardWeights( + src->get()->tensor(), + diff_dst->get()->tensor(), false); + + auto out = mdarray(tensors[0]); + return out; + } + + static std::vector BackwardWeightsBias(mdarray* src, + mdarray* diff_dst) { + std::vector outs; + auto tensors = Linear::BackwardWeights( + src->get()->tensor(), + diff_dst->get()->tensor(), true); + + for (int i = 0; i < tensors.size(); i++) + outs.push_back(mdarray(tensors[i])); + + return outs; + } + + static mdarray BackwardData(mdarray* weights, + mdarray* diff_dst) { + auto tensor = Linear::BackwardData( + weights->get()->tensor(), + diff_dst->get()->tensor()); + + auto out = mdarray(tensor); + return out; + } +}; + +#endif //_LINEAR_PY_H diff --git a/python/ideep4py/py/primitives/lrn.i b/python/ideep4py/py/primitives/lrn.i new file mode 100755 index 00000000..b08f220a --- /dev/null +++ b/python/ideep4py/py/primitives/lrn.i @@ -0,0 +1,47 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%{ + #define SWIG_FILE_WITH_INIT + #include "lrn_py.h" + #include "op_param.h" +%} + +%include "param.i" +%include "std_vector.i" +%include "lrn_py.h" + +%template(localResponseNormalization) LocalResponseNormalization_Py; + +// +// Python API for LocalResponseNormalization +// +// std::vector LocalResponseNormalization_Py::Forward( +// mdarray *src, +// lrn_prarm_t *pp); +// mdarray* LocalResponseNormalization_Py::Backward( +// mdarray *src, +// mdarray *diff_dst, +// mdarray *ws, +// lrn_param_t *pp); diff --git a/python/ideep4py/py/primitives/lrn_py.h b/python/ideep4py/py/primitives/lrn_py.h new file mode 100755 index 00000000..bc7ce387 --- /dev/null +++ b/python/ideep4py/py/primitives/lrn_py.h @@ -0,0 +1,95 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _LRN_PY_H_ +#define _LRN_PY_H_ + +#include +#include +#include "op_param.h" +#include "mdarray.h" +#include "lrn.h" + +template +class LocalResponseNormalization_Py +{ +public: + /* + * Python Lrn Forward + * params: + * src: input, x + * pp: lrn parameters + */ + static std::vector Forward(mdarray *src, + lrn_param_t *pp) { + std::vector outputs; + + // Shoule be removed in future???? + implementation::mdarray *src_internal = src->get(); + + std::vector outputs_tensor = LocalResponseNormalization::Forward( + (src_internal->tensor()), + pp); + //FIXME + for (int i = 0; i < outputs_tensor.size(); i++) { + outputs.push_back( mdarray(outputs_tensor[i]) ); + } + + return outputs; + } + + /* + * Python Lrn backward + * param: + * src: x + * diff_dst: diff dst, gy + * ws: workspace + * pp: lrn parameters + */ + static mdarray Backward(mdarray *src, mdarray *diff_dst, mdarray *ws, lrn_param_t *pp) { + //FIXME + //Should be removed in future + implementation::mdarray *diff_dst_internal = diff_dst->get(); + implementation::mdarray *src_internal = src->get(); + implementation::mdarray *ws_internal = ws->get(); + + Tensor *diff_src_tensor = LocalResponseNormalization::Backward( + (src_internal->tensor()), + (diff_dst_internal->tensor()), + (ws_internal->tensor()), + pp); + + + // FIXME + // In future, mdarray will have a Tensor member, no need to create a new one + mdarray diff_src_mdarray = mdarray(diff_src_tensor); + return diff_src_mdarray; + } + +}; + +#endif // _LRN_PY_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/py/primitives/param.i b/python/ideep4py/py/primitives/param.i new file mode 100644 index 00000000..45ad0911 --- /dev/null +++ b/python/ideep4py/py/primitives/param.i @@ -0,0 +1,60 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%rename (convolution2DParam) conv_param_t; +struct conv_param_t { + std::vector out_dims; + int kh, kw; // kernel size + int dilate_y = 0, dilate_x = 0; // in MKL-DNN, common conv is treated as 0 dilate + int sy, sx; // stride + int pad_lh, pad_lw, pad_rh, pad_rw; //padding +}; + +%rename (pooling2DParam) pooling_param_t; +struct pooling_param_t { + std::vector out_dims; + int kh, kw; // kernel size + int sy, sx; // stride + int pad_lh, pad_lw, pad_rh, pad_rw; //padding + + enum algorithm { + pooling_max, + pooling_avg, + pooling_avg_include_padding, + pooling_avg_exclude_padding, + } algo_kind; +}; + +%rename (localResponseNormalizationParam) lrn_param_t; +struct lrn_param_t { + int n; // local size + double k; + double alpha; + double beta; + + enum algorithm { + lrn_across_channels, + lrn_within_channel, + } algo_kind; +}; diff --git a/python/ideep4py/py/primitives/pooling.i b/python/ideep4py/py/primitives/pooling.i new file mode 100644 index 00000000..16c9105d --- /dev/null +++ b/python/ideep4py/py/primitives/pooling.i @@ -0,0 +1,46 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%{ + #define SWIG_FILE_WITH_INIT + #include "pooling_py.h" + #include "op_param.h" +%} + +%include "param.i" +%include "std_vector.i" +%include "pooling_py.h" + +%template(pooling2D) Pooling2D_Py; + +// +// Python API for Pooling2D +// +// std::vector Pooling2D_Py::Forward( +// mdarray *src, +// pooling_prarm_t *pp); +// mdarray* Pooling2D_Py::Backward( +// mdarray *diff_dst, +// mdarray *ws, +// conv_param_t *pp); diff --git a/python/ideep4py/py/primitives/pooling_py.h b/python/ideep4py/py/primitives/pooling_py.h new file mode 100644 index 00000000..fe1bbd68 --- /dev/null +++ b/python/ideep4py/py/primitives/pooling_py.h @@ -0,0 +1,104 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +#ifndef _POOLING_PY_H_ +#define _POOLING_PY_H_ + +#include +#include +#include "op_param.h" +#include "mdarray.h" +#include "pooling.h" + +template +class Pooling2D_Py +{ +public: + /* + * Python Pooling Forward + * params: + * src: input, x + * pp: pooling parameters + */ + static std::vector Forward(mdarray *src, + pooling_param_t *pp) { + std::vector outputs; + + // Shoule be removed in future???? + implementation::mdarray *src_internal = src->get(); + + std::vector outputs_tensor = Pooling2D::Forward( + (src_internal->tensor()), + pp); + // FIXME + //FIXME + for (int i = 0; i < outputs_tensor.size(); i++) { + outputs.push_back( mdarray(outputs_tensor[i]) ); + } + + return outputs; + } + + /* + * Python Pooling backward + * param: + * diff_dst: diff dst, gy + * ws: workspace + * pp: pooling parameters + */ + static mdarray Backward(mdarray *diff_dst, + mdarray *ws, + pooling_param_t *pp) { + //FIXME + //Should be removed in future + implementation::mdarray *diff_dst_internal = diff_dst->get(); + implementation::mdarray *ws_internal; + if ( pp->algo_kind == pooling_param_t::algorithm::pooling_max) + ws_internal = ws->get(); + + Tensor *diff_src_tensor; + if ( pp->algo_kind == pooling_param_t::algorithm::pooling_max) { + diff_src_tensor = Pooling2D::Backward( + (diff_dst_internal->tensor()), + (ws_internal->tensor()), + pp); + } else { + diff_src_tensor = Pooling2D::Backward( + (diff_dst_internal->tensor()), + NULL, + pp); + } + + // FIXME + // In future, mdarray will have a Tensor member, no need to create a new one + mdarray diff_src_mdarray = mdarray(diff_src_tensor); + return diff_src_mdarray; + } + +}; + +#endif // _POOLING_PY_H_ + + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s diff --git a/python/ideep4py/py/swig_utils/asmap.i b/python/ideep4py/py/swig_utils/asmap.i new file mode 100644 index 00000000..e6ff7dab --- /dev/null +++ b/python/ideep4py/py/swig_utils/asmap.i @@ -0,0 +1,74 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%{ + template + struct map_traits { + static Py_ssize_t mp_length(PyObject *self) { + void *that; + + int res1 = SWIG_ConvertPtr(self, &that, nullptr, 0); + if (!SWIG_IsOK(res1)) { + PyErr_SetString(PyExc_ValueError, "Wrong self object in mp_length"); + return 0; + } + + return (*reinterpret_cast(that))->mp_length(self); + } + + static PyObject *mp_subscript(PyObject *self, PyObject *op) { + void *that; + + int res1 = SWIG_ConvertPtr(self, &that, nullptr, 0); + if (!SWIG_IsOK(res1)) { + PyErr_SetString(PyExc_ValueError, "Wrong self object in mp_subscript"); + return nullptr; + } + + return (*reinterpret_cast(that))->mp_subscript(self, op); + } + + static int mp_ass_subscript(PyObject *self, PyObject *ind, PyObject *op) { + void *that; + + int res1 = SWIG_ConvertPtr(self, &that, nullptr, 0); + if (!SWIG_IsOK(res1)) { + PyErr_SetString(PyExc_ValueError, "Wrong self object in mp_subscript"); + return -1; + } + + return (*reinterpret_cast(that))->mp_ass_subscript(self, ind, op); + } + }; +%} + +%define %map_slot(name, type) + %feature("python:mp_" %str(name)) type "map_traits<" %str(type) ">::mp_" %str(name); +%enddef + +%define %map_protocol(type...) + %map_slot(length, type) + %map_slot(subscript, type) + %map_slot(ass_subscript, type) +%enddef diff --git a/python/ideep4py/py/swig_utils/asnumber.i b/python/ideep4py/py/swig_utils/asnumber.i new file mode 100644 index 00000000..325f075d --- /dev/null +++ b/python/ideep4py/py/swig_utils/asnumber.i @@ -0,0 +1,138 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%{ + // TODO: Support both raw or smart pointer type + #define nb_unary(op, m) \ + static PyObject * nb_ ## op (PyObject *self) { \ + void *that; \ + int res1 = SWIG_ConvertPtr(self, &that, nullptr, 0); \ + if (!SWIG_IsOK(res1)) { \ + PyErr_SetString(PyExc_ValueError, "Wrong self object in nb_unary wrapper"); \ + return nullptr; \ + } \ + return (*reinterpret_cast(that))->m_ ## m(self); \ + } + + #define nb_binary(op, m) \ + static PyObject * nb_ ## op (PyObject *left, PyObject *right) { \ + void *that; \ + int res1 = SWIG_ConvertPtr(left, &that, nullptr, 0); \ + if (SWIG_IsOK(res1)) { \ + return (*reinterpret_cast(that))->m_ ## m(left, right); \ + } else { \ + res1 = SWIG_ConvertPtr(right, &that, nullptr, 0); \ + if (!SWIG_IsOK(res1)) { \ + PyErr_SetString(PyExc_ValueError, "Wrong self object in nb_binary wrapper"); \ + return nullptr; \ + } \ + return (*reinterpret_cast(that))->m_ ## m(left, right); \ + } \ + } + + #define nb_ternary(op, m) \ + static PyObject * nb_ ## op (PyObject *self, PyObject *o1, PyObject *o2) { \ + void *that; \ + int res1 = SWIG_ConvertPtr(self, &that, nullptr, 0); \ + if (!SWIG_IsOK(res1)) { \ + PyErr_SetString(PyExc_ValueError, "Wrong self object in nb_ternary wrapper"); \ + return nullptr; \ + } \ + return (*reinterpret_cast(that))->m_ ## m(self, o1, o2); \ + } + + + template + struct number_traits { + nb_binary(add, Add); + nb_binary(subtract, Subtract); + nb_binary(multiply, Multiply); + nb_binary(divide, Divide); + nb_binary(remainder, Remainder); + nb_binary(divmod, Divmod); + nb_ternary(power, Power); + nb_unary(negative, Negative); + nb_unary(positive, Positive); + nb_unary(absolute, Absolute); + nb_unary(invert, Invert); + nb_binary(lshift, Lshift); + nb_binary(rshift, Rshift); + nb_binary(and, And); + nb_binary(xor, Xor); + nb_binary(or, Or); + nb_binary(inplace_add, InPlaceAdd); + nb_binary(inplace_subtract, InPlaceSubtract); + nb_binary(inplace_multiply, InPlaceMultiply); + nb_binary(inplace_divide, InPlaceDivide); + nb_binary(inplace_remainder, InPlaceRemainder); + nb_ternary(inplace_power, InPlacePower); + nb_binary(inplace_lshift, InPlaceLshift); + nb_binary(inplace_rshift, InPlaceRshift); + nb_binary(inplace_and, InPlaceAnd); + nb_binary(inplace_xor, InPlaceXor); + nb_binary(inplace_or, InPlaceOr); + nb_binary(floor_divide, FloorDivide); + nb_binary(true_divide, TrueDivide); + nb_binary(inplace_floor_divide, InPlaceFloorDivide); + nb_binary(inplace_true_divide, InPlaceTrueDivide); + nb_binary(matrix_multiply, MatrixMultiply); + nb_binary(inplace_matrix_multiply, InPlaceMatrixMultiply); + }; +%} + +%define %nb_slot(name, type) + %feature("python:nb_" %str(name)) type "number_traits<" %str(type) ">::nb_" %str(name); +%enddef + +%define %number_protocol(type...) + %nb_slot(add, type); + %nb_slot(subtract, type); + %nb_slot(multiply, type); + %nb_slot(divide, type) + %nb_slot(divmod, type); + %nb_slot(negative, type); + %nb_slot(positive, type); + %nb_slot(absolute, type); + %nb_slot(invert, type); + %nb_slot(lshift, type); + %nb_slot(rshift, type); + %nb_slot(and, type); + %nb_slot(xor, type); + %nb_slot(or, type); + %nb_slot(inplace_add, type); + %nb_slot(inplace_subtract, type); + %nb_slot(inplace_multiply, type); + %nb_slot(inplace_divide, type) + %nb_slot(inplace_remainder, type); + %nb_slot(inplace_power, type); + %nb_slot(inplace_lshift, type); + %nb_slot(inplace_rshift, type); + %nb_slot(inplace_and, type); + %nb_slot(inplace_xor, type); + %nb_slot(inplace_or, type); + %nb_slot(floor_divide, type); + %nb_slot(inplace_floor_divide, type); + %nb_slot(matrix_multiply, type); + %nb_slot(inplace_matrix_multiply, type); +%enddef diff --git a/python/ideep4py/py/swig_utils/getattro.i b/python/ideep4py/py/swig_utils/getattro.i new file mode 100644 index 00000000..2b945970 --- /dev/null +++ b/python/ideep4py/py/swig_utils/getattro.i @@ -0,0 +1,57 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%{ + template + struct getattr_traits { + static PyObject *getattro_hook(PyObject *self, PyObject *name) { + + // Call python default first. + PyObject *res = PyObject_GenericGetAttr(self, name); + + // notify our hook if we find nothing from outside. + if (res == nullptr && PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + + void *that; + int res1 = SWIG_ConvertPtr(self, &that, nullptr, 0); + + if (!SWIG_IsOK(res1)) { + PyErr_SetString(PyExc_ValueError, "Wrong self object in getattro wrapper"); + res = nullptr; + } + + // XXX: should we bump up reference counter? + // TODO: Support both raw and smart pointer + res = reinterpret_cast(that)->get()->getattro(self, name); + } + + return res; + } + }; +%} + +%define %getattr_wrapper(type...) + %feature("python:tp_getattro") type "getattr_traits<" %str(type) ">::getattro_hook"; +%enddef diff --git a/python/ideep4py/py/swig_utils/pep_3118.i b/python/ideep4py/py/swig_utils/pep_3118.i new file mode 100755 index 00000000..7a573469 --- /dev/null +++ b/python/ideep4py/py/swig_utils/pep_3118.i @@ -0,0 +1,74 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%{ + template + struct buffer_traits { + #define GET_SELF_OBJ(self, that) \ + do { \ + int res1 = SWIG_ConvertPtr(self, &that, nullptr, 0); \ + if (!SWIG_IsOK(res1)) { \ + PyErr_SetString(PyExc_ValueError, "Wrong self object in getbuffer wrapper"); \ + return -1; \ + } \ + } while (0) + + static int getbuffer(PyObject *self, Py_buffer *view, int flags) { + void *that; + + GET_SELF_OBJ(self, that); + + // TODO: support smart pointer and raw at same time + return (*reinterpret_cast(that))->getbuffer(self, view, flags); + } + }; +%} + +%define %buffer_protocol_producer(type...) + %feature("python:bf_getbuffer") type "buffer_traits<" %str(type) ">::getbuffer"; + +#if defined(NEWBUFFER_ON) + %feature("python:tp_flags") type "Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER"; +#endif + +%enddef + +%define %buffer_protocol_typemap(VIEW) +%typemap(typecheck) (VIEW) { + $1 = PyObject_CheckBuffer($input); +} + +%typemap(in) (VIEW) (int res, Py_buffer view + , int flags = PyBUF_C_CONTIGUOUS | PyBUF_RECORDS) { + /* view = new Py_buffer;*/ + res = PyObject_GetBuffer($input, &view, flags); + if (res != 0) { + $1 = NULL; + goto fail; + } else { + $1 = ($1_ltype) &view; + } + // TODO: IF WE CONFRONT A F_CONTINGUOUS ONE??? +} +%enddef diff --git a/python/ideep4py/py/swig_utils/seq_typemap.i b/python/ideep4py/py/swig_utils/seq_typemap.i new file mode 100644 index 00000000..9728a4cd --- /dev/null +++ b/python/ideep4py/py/swig_utils/seq_typemap.i @@ -0,0 +1,69 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%define %int_sequence_typemap(integer_sequence_compitable_type) + +%typemap(typecheck) (integer_sequence_compitable_type) { + $1 = PySequence_Check($input); +} + +%typemap(in) (integer_sequence_compitable_type) (int count) { + count = PySequence_Size($input); + + for (int i =0; i < count; i ++) { + PyObject *o = PySequence_GetItem($input, i); + $1.push_back(PyLong_AsLong(o)); + } +} +%enddef + +%define %at_sequence_typemap(at_sequence_compitable_type) + +%typemap(typecheck) (at_sequence_compitable_type) { + $1 = PySequence_Check($input); +} + +%typemap(in) (at_sequence_compitable_type) (int count, + at_sequence_compitable_type ins) { + count = PySequence_Size($input); + for (int i =0; i < count; i ++) { + PyObject *o = PySequence_GetItem($input, i); + mkldnn::primitive::at *tmp; + int res1 = SWIG_ConvertPtr(o, reinterpret_cast(&tmp) + , $descriptor(mkldnn::primitive::at *), 0); + + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1) + , "typemap 'mkldnn::primitive::at' sequence type failed"); + } + if (tmp == nullptr) { + SWIG_exception_fail(SWIG_ArgError(res1) + , "Input is not a sequential type of 'mkldnn::primitive::at'"); + } + ins.emplace_back(*tmp); + } + + $1 = std::move(ins); +} +%enddef diff --git a/python/ideep4py/py/swig_utils/tp.i b/python/ideep4py/py/swig_utils/tp.i new file mode 100644 index 00000000..fd5bff74 --- /dev/null +++ b/python/ideep4py/py/swig_utils/tp.i @@ -0,0 +1,59 @@ +/* + *Copyright (c) 2018 Intel Corporation. + * + *Permission is hereby granted, free of charge, to any person obtaining a copy + *of this software and associated documentation files (the "Software"), to deal + *in the Software without restriction, including without limitation the rights + *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the Software is + *furnished to do so, subject to the following conditions: + * + *The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + *THE SOFTWARE. + * + */ + + +%{ + template + struct tp_traits { + static PyObject *tp_richcompare(PyObject *self, PyObject *other, int cmp_op) { + PyObject *surrogate = PyArray_FromAny(self, nullptr, 0, 0 \ + , NPY_ARRAY_ELEMENTSTRIDES, nullptr); + if (surrogate == nullptr) + return nullptr; + + PyObject *res = PyObject_RichCompare(surrogate, other, cmp_op); + Py_DECREF(surrogate); + return res; + } + + static PyObject *tp_iter(PyObject *self) { + PyObject *surrogate = PyArray_FromAny(self, nullptr, 0, 0 \ + , NPY_ARRAY_ELEMENTSTRIDES, nullptr); + if (surrogate == nullptr) + return nullptr; + + PyObject *res = PyObject_GetIter(surrogate); + Py_DECREF(surrogate); + return res; + } + }; +%} + +%define %tp_slot(name, type) + %feature("python:tp_" %str(name)) type "tp_traits<" %str(type) ">::tp_" %str(name); +%enddef + +%define %tp_protocol(type...) + %tp_slot(richcompare, type) + %tp_slot(iter, type) +%enddef diff --git a/python/ideep4py/tests/mm/test_acc_sum.py b/python/ideep4py/tests/mm/test_acc_sum.py new file mode 100644 index 00000000..784fc5f1 --- /dev/null +++ b/python/ideep4py/tests/mm/test_acc_sum.py @@ -0,0 +1,18 @@ +import numpy +import ideep4py + +x1 = numpy.random.uniform(-1, 1, (3, 16, 2, 4)).astype(numpy.float32) +x2 = numpy.random.uniform(-1, 1, (3, 16, 2, 4)).astype(numpy.float32) +x3 = numpy.random.uniform(-1, 1, (3, 16, 2, 4)).astype(numpy.float32) +x4 = numpy.random.uniform(-1, 1, (3, 16, 2, 4)).astype(numpy.float32) +mx1 = ideep4py.mdarray(x1) +mx2 = ideep4py.mdarray(x2) +mx3 = ideep4py.mdarray(x3) +mx4 = ideep4py.mdarray(x4) + +x = x1 + x2 + x3 + x4 +mx = ideep4py.basic_acc_sum((mx1, mx2, mx3, mx4)) +# mx = numpy.asarray(mx) +res = numpy.allclose(mx, x, 1e-5, 1e-4, True) +if res is not True: + print("error!!!!") diff --git a/python/ideep4py/tests/mm/test_copyto.py b/python/ideep4py/tests/mm/test_copyto.py new file mode 100755 index 00000000..fc080a83 --- /dev/null +++ b/python/ideep4py/tests/mm/test_copyto.py @@ -0,0 +1,22 @@ +import numpy +# from chainer import testing +# from chainer import utils +import ideep4py + +x1 = numpy.ndarray(shape=(2, 16, 2, 2), dtype=numpy.float32, order='C') +x2 = numpy.ndarray(shape=(2, 16, 2, 2), dtype=numpy.float32, order='C') +mx1 = ideep4py.mdarray(x1) +mx2 = ideep4py.mdarray(x2) +numpy.copyto(x2, x1) +ideep4py.basic_copyto(mx2, mx1) +t = numpy.asarray(mx2) +numpy.allclose(t, x2, 1e-5, 1e-4, True) + + +x1 = numpy.ndarray(shape=(2, 16, 2, 2), dtype=numpy.float32, order='C') +x2 = numpy.ndarray(shape=(2, 16, 2, 2), dtype=numpy.float32, order='C') +mx2 = ideep4py.mdarray(x2) +numpy.copyto(x2, x1) +ideep4py.basic_copyto(mx2, x1) +t = numpy.asarray(mx2) +numpy.allclose(t, x2, 1e-5, 1e-4, True) diff --git a/python/ideep4py/tests/mm/test_dlcp.py b/python/ideep4py/tests/mm/test_dlcp.py new file mode 100644 index 00000000..67b6966a --- /dev/null +++ b/python/ideep4py/tests/mm/test_dlcp.py @@ -0,0 +1,18 @@ +import ideep4py +from ideep4py import dlCompression + +import numpy + +a = numpy.arange(9, dtype=numpy.float32) +a = a.reshape((3, 3)) +am = ideep4py.array(a) + +ret = dlCompression.Compress(am, am, None, 4, dlCompression.dl_comp_dfp) +assert(ret == dlCompression.dl_comp_ok) + +ret = dlCompression.Decompress(am, am) +assert(ret == dlCompression.dl_comp_ok) + +_a = numpy.array(am) + +numpy.testing.assert_allclose(a, _a, atol=0.1, rtol=0.01, verbose=True) diff --git a/python/ideep4py/tests/mm/test_mdarray.py b/python/ideep4py/tests/mm/test_mdarray.py new file mode 100755 index 00000000..60441c31 --- /dev/null +++ b/python/ideep4py/tests/mm/test_mdarray.py @@ -0,0 +1,40 @@ +import numpy +from chainer import testing +from chainer import utils # NOQA +import ideep4py + +x1 = numpy.ndarray(shape=(2, 2, 2, 2), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x1) +x = x + 1 +testing.assert_allclose(x1 + 1, x) + +x = ideep4py.mdarray(x1) + +print(x) +print("ndims=", x.ndim) +print("shape=", x.shape) +print("size=", x.size) +print("dtype=", x.dtype) +print("is_mdarry=", x.is_mdarray) + +x1 += x +x += x +x2 = numpy.array(x) +testing.assert_allclose(x1, x2) + + +x1 = numpy.ones(shape=(2, 2, 2, 2), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x1) +y = x + x1 +y2 = numpy.array(y) +testing.assert_allclose(y2, x1 + x1) + +y = x * x1 +y2 = numpy.array(y) +testing.assert_allclose(y2, x1 * x1) + +x1 = numpy.random.uniform(-1, 1, (3, 4)).astype(numpy.float32) +x = ideep4py.mdarray(x1) +z1 = (x1 > 0).astype(x1.dtype) +z = (x > 0).astype(x1.dtype) +testing.assert_allclose(z, z1) diff --git a/python/ideep4py/tests/mm/test_mdarray3.py b/python/ideep4py/tests/mm/test_mdarray3.py new file mode 100755 index 00000000..3dc11460 --- /dev/null +++ b/python/ideep4py/tests/mm/test_mdarray3.py @@ -0,0 +1,37 @@ +import numpy +from chainer import testing # NOQA +from chainer import utils # NOQA +import ideep4py + +x1 = numpy.ndarray(shape=(2, 2), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x1) +print(x1) +y = x1 > 0 +print(y) +x *= y + + +# test devide +x1 = numpy.ndarray(shape=(2, 2), dtype=numpy.float32, order='C') +x1.fill(2.) +x = ideep4py.mdarray(x1) +testing.assert_allclose(1 / x1, 1 / x) +testing.assert_allclose(2 * x1, 2 * x) +testing.assert_allclose(1 - x1, 1 - x) +testing.assert_allclose(1 + x1, 1 + x) + +x1 /= 3 +x /= 3 +testing.assert_allclose(x1, x) + +x1 *= 2 +x *= 2 +testing.assert_allclose(x1, x) + +x1 += 3 +x += 3 +testing.assert_allclose(x1, x) + +x1 -= 5 +x -= 5 +testing.assert_allclose(x1, x) diff --git a/python/ideep4py/tests/mm/test_mdarray_iter.py b/python/ideep4py/tests/mm/test_mdarray_iter.py new file mode 100644 index 00000000..85029cee --- /dev/null +++ b/python/ideep4py/tests/mm/test_mdarray_iter.py @@ -0,0 +1,59 @@ +import ideep4py # NOQA +import numpy +import six +from chainer import testing +from ideep4py import relu, mdarray + +# enumerate test +x = numpy.random.uniform(-1, 1, (256, 512, 13, 13)).astype(numpy.float32) +mx = mdarray(x) + +a = [] +b = [] +for p, xi in enumerate(x): + a.append(xi) +for p, mxi in enumerate(mx): + b.append(mxi) + +testing.assert_allclose(numpy.asarray(a), numpy.asarray(b)) + + +# zip test +x1 = numpy.random.uniform(-1, 1, (256, 512, 13, 13)).astype(numpy.float32) +x2 = numpy.random.uniform(-1, 1, (256, 512, 13, 13)).astype(numpy.float32) + +mx1 = mdarray(x1) +mx2 = mdarray(x2) + +a1 = [] +a2 = [] +b1 = [] +b2 = [] + +for x, y in six.moves.zip(x1, x2): + a1.append(x) + a2.append(y) + +for mx, my in six.moves.zip(mx1, mx2): + b1.append(mx) + b2.append(my) + +testing.assert_allclose(numpy.asarray(a1), numpy.asarray(b1)) +testing.assert_allclose(numpy.asarray(a2), numpy.asarray(b2)) + + +# mkl-dnn format test +x = numpy.random.uniform(-1, 1, (256, 512, 13, 13)).astype(numpy.float32) +y = numpy.maximum(x, 0, dtype=x.dtype) +mx = mdarray(x) +my = relu.Forward(mx) +testing.assert_allclose(y, my) + +a = [] +b = [] +for p, xi in enumerate(y): + a.append(xi) +for p, mxi in enumerate(my): + b.append(mxi) + +testing.assert_allclose(numpy.asarray(a), numpy.asarray(b)) diff --git a/python/ideep4py/tests/mm/test_mdarray_reshape.py b/python/ideep4py/tests/mm/test_mdarray_reshape.py new file mode 100755 index 00000000..ab15f289 --- /dev/null +++ b/python/ideep4py/tests/mm/test_mdarray_reshape.py @@ -0,0 +1,45 @@ +import numpy +import ideep4py + +# list case +x1 = numpy.ndarray(shape=(2, 2, 2, 2), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x1) +y1 = x1.reshape([4, 4]) +y = x.reshape([4, 4]) +res = numpy.allclose(y, y1, 1e-5, 1e-4) +if res is not True: + print("error!!!!") + +# singal number case +x1 = numpy.ndarray(shape=(2, 2, 2, 2), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x1) +x1.reshape(16) +x.reshape(16) +res = numpy.allclose(y, y1, 1e-5, 1e-4) +if res is not True: + print("error!!!!") + +# value change +x1 = numpy.ndarray(shape=(2, 2, 2, 2), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x1) +print(type(x)) +y = x.reshape(len(x), -1) +x[0, 0, 0, 0] = 3.333 +assert(x[0, 0, 0, 0] == y[0, 0]) + +y = x.reshape((len(x), -1)) +x[0, 0, 0, 0] = 4.4444 +assert(x[0, 0, 0, 0] == y[0, 0]) + +# -1 case +x1 = numpy.ndarray(shape=(2, 2, 2, 2), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x1) +y = x.reshape((2, 2, -1)) +y1 = x1.reshape((2, 2, -1)) +res = numpy.allclose(y, y1, 1e-5, 1e-4) +if res is not True: + print("error!!!!") +y = x.reshape(2, 2, -1) +numpy.allclose(y, y1, 1e-5, 1e-4) +if res is not True: + print("error!!!!") diff --git a/python/ideep4py/tests/mm/test_mdarray_sum.py b/python/ideep4py/tests/mm/test_mdarray_sum.py new file mode 100644 index 00000000..db9c5ebb --- /dev/null +++ b/python/ideep4py/tests/mm/test_mdarray_sum.py @@ -0,0 +1,133 @@ +import ideep4py # NOQA +import numpy +from chainer import testing +from ideep4py import relu, mdarray + +print('mdarray sum [larg shape routine]') +print('shape (256, 384, 13, 13) along (0, 2, 3)') +x = numpy.ndarray((256, 384, 13, 13), dtype=numpy.float32) +y = numpy.maximum(x, 0, dtype=x.dtype) + +mx = mdarray(x) +my = relu.Forward(mx) + +testing.assert_allclose(my.sum((0, 2, 3)), y.sum((0, 2, 3))) +print('pass ...\n') + + +print('mdarray sum [small shape routine]') +print('shape (39, 32, 13, 13) along (0, 2, 3)') +x = numpy.ndarray((39, 32, 13, 13), dtype=numpy.float32) +y = numpy.maximum(x, 0, dtype=x.dtype) + +mx = mdarray(x) +my = relu.Forward(mx) + +testing.assert_allclose(my.sum((0, 2, 3)), y.sum((0, 2, 3))) +print('pass ...\n') + + +print('mdarray sum [mkldnn format keepdims routine]') +print('shape (39, 32, 13, 13) along (0, 2, 3)') +x = numpy.ndarray((39, 32, 13, 13), dtype=numpy.float32) +y = numpy.maximum(x, 0, dtype=x.dtype) + +mx = mdarray(x) +my = relu.Forward(mx) + +testing.assert_allclose(my.sum((0, 2, 3), keepdims=True), + y.sum((0, 2, 3), keepdims=True)) +print('pass ...\n') + + +print('mdarray sum [common format small shape routine]') +print('shape (2, 2, 3, 3) along (0, 2, 3)') +x = numpy.ndarray((2, 2, 3, 3), dtype=numpy.float32) + +x.fill(2.3232) +x[0].fill(3.1212) +mx = mdarray(x) + +testing.assert_allclose(mx.sum((0, 2, 3)), x.sum((0, 2, 3))) +print('pass ...\n') + + +print('mdarray sum [common format small shape routine]') +print('shape (2, 2, 3, 3) along (1, 3)') +x = numpy.ndarray((2, 2, 3, 3), dtype=numpy.float32) + +x.fill(2.3232) +x[0].fill(3.1212) +mx = mdarray(x) + +testing.assert_allclose(mx.sum((1, 3)), x.sum((1, 3))) +print('pass ...\n') + + +print('mdarray sum [common format routine keepdims]') +print('shape (2, 2, 3, 3) along (0, 2, 3)') +x = numpy.ndarray((2, 2, 3, 3), dtype=numpy.float32) + +x.fill(2.3232) +x[0].fill(3.1212) +mx = mdarray(x) + +ms = mx.sum((0, 2, 3), keepdims=True) +ns = x.sum((0, 2, 3), keepdims=True) +testing.assert_allclose(ms, ns) +print('pass ...\n') + + +print('mdarray sum [common format routine]') +print('shape (2, 15, 3, 3) along (0, 2, 3)') +x = numpy.ndarray((2, 15, 3, 3), dtype=numpy.float32) + +x.fill(1) +x[0].fill(3.1212) +mx = mdarray(x) + +ms = mx.sum((0, 2, 3)) +ns = x.sum((0, 2, 3)) +testing.assert_allclose(ms, ns) +print('pass ...\n') + + +print('mdarray sum [common format big shape routine]') +print('shape (256, 385, 13, 13) along (0, 2, 3)') +x = numpy.ndarray((256, 385, 13, 13), dtype=numpy.float32) + +x.fill(1) +x[0].fill(3.1212) +mx = mdarray(x) + +ms = mx.sum((0, 2, 3)) +ns = x.sum((0, 2, 3)) +testing.assert_allclose(ms, ns) +print('pass ...\n') + + +print('mdarray sum [common format big shape routine]') +print('shape (256, 1000) along (0)') +x = numpy.ndarray((256, 1000), dtype=numpy.float32) + +x.fill(1) +x[0].fill(3.1212) +mx = mdarray(x) + +ms = mx.sum((0)) +ns = x.sum((0)) +testing.assert_allclose(ms, ns) +print('pass ...\n') + +print('mdarray sum [common format big shape routine]') +print('shape (256, 1000) along (1)') +x = numpy.ndarray((256, 1000), dtype=numpy.float32) + +x.fill(1) +x[0].fill(3.1212) +mx = mdarray(x) + +ms = mx.sum((1)) +ns = x.sum((1)) +testing.assert_allclose(ms, ns) +print('pass ...\n') diff --git a/python/ideep4py/tests/mm/test_memcpy.py b/python/ideep4py/tests/mm/test_memcpy.py new file mode 100644 index 00000000..552aab00 --- /dev/null +++ b/python/ideep4py/tests/mm/test_memcpy.py @@ -0,0 +1,7 @@ +import numpy +import ideep4py +x1 = numpy.ndarray(shape=(1, 2, 3, 4), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x1) +x2 = numpy.array(x) +print("x = ", x1) +print("x2 = ", x2) diff --git a/python/ideep4py/tests/mm/test_tanh.py b/python/ideep4py/tests/mm/test_tanh.py new file mode 100644 index 00000000..5e724465 --- /dev/null +++ b/python/ideep4py/tests/mm/test_tanh.py @@ -0,0 +1,30 @@ +import numpy +from chainer import testing +import ideep4py + +# x = numpy.ndarray(shape=(1,32,224,224), dtype=numpy.float32, order='C') +x = numpy.random.uniform(-1, 1, (1, 32, 2, 224)).astype(numpy.float32) +y = numpy.tanh(x) + +mx = ideep4py.mdarray(x) +x2 = numpy.array(mx) +testing.assert_allclose(x, x2) + +print("tanh fwd") +my = ideep4py._ideep4py.tanh.Forward(mx) +y2 = numpy.array(my) +testing.assert_allclose(y, y2) + +# Test backward +print("tanh bwd") +x = numpy.random.uniform(-1, 1, (1, 32, 224, 224)).astype(numpy.float32) +gy = numpy.random.uniform(-1, 1, (1, 32, 224, 224)).astype(numpy.float32) +gx = gy * (1 - numpy.tanh(x) ** 2) + + +mx = ideep4py.mdarray(x) +mgy = ideep4py.mdarray(gy) +mgx = ideep4py._ideep4py.tanh.Backward(mx, mgy) + +gx1 = numpy.array(mgx) +testing.assert_allclose(gx1, gx) diff --git a/python/ideep4py/tests/primitives/test_bn.py b/python/ideep4py/tests/primitives/test_bn.py new file mode 100644 index 00000000..bfc4ea1b --- /dev/null +++ b/python/ideep4py/tests/primitives/test_bn.py @@ -0,0 +1,68 @@ +import numpy +import ideep4py + +from ideep4py import batchNormalization + + +def run(): + src = numpy.arange(3 * 2 * 2 * 2, dtype=numpy.float32) + src = src.reshape((3, 2, 2, 2)) + src = ideep4py.mdarray(src) + + gamma = numpy.ones(2, dtype=numpy.float32) + beta = numpy.zeros(2, dtype=numpy.float32) + w = numpy.concatenate((gamma, beta), axis=0).reshape((2, -1)) + w = ideep4py.mdarray(w) + + eps = 2e-5 + + print("FWD *****************************") + y = batchNormalization.Forward(src, w, None, None, eps) + print(y) + print(-y[0]) + print(-y[1]) + print(-y[2]) + print("==============") + y = batchNormalization.Forward(src, w, None, None, eps) + print(y) + print(-y[0]) + print(-y[1]) + print(-y[2]) + print("==============") + mean = y[1] + var = y[2] + y = batchNormalization.Forward(src, w, mean, var, eps) + print(y) + print(-y[0]) + print("==============") + + print("BWD *****************************") + diff_dst = numpy.ones(src.shape, dtype=numpy.float32) + diff_dst = ideep4py.mdarray(diff_dst) + y = batchNormalization.Backward(src, diff_dst, mean, var, w, eps) + print(y) + print(-y[0]) + print(-y[1]) + print("==============") + y = batchNormalization.Backward(src, diff_dst, mean, var, w, eps) + print(y) + print(-y[0]) + print(-y[1]) + print("==============") + src = numpy.arange(3 * 2 * 3 * 3, dtype=numpy.float32) + src = src.reshape((3, 2, 3, 3)) + src = ideep4py.mdarray(src) + diff_dst = numpy.ones(src.shape, dtype=numpy.float32) + diff_dst = ideep4py.mdarray(diff_dst) + y = batchNormalization.Backward(src, diff_dst, mean, var, w, eps) + print(y) + print(-y[0]) + print(-y[1]) + print("==============") + y = batchNormalization.Backward(src, diff_dst, mean, var, None, eps) + print(y) + print(-y[0]) + print("==============") + + +run() diff --git a/python/ideep4py/tests/primitives/test_concat.py b/python/ideep4py/tests/primitives/test_concat.py new file mode 100755 index 00000000..ee61cbe4 --- /dev/null +++ b/python/ideep4py/tests/primitives/test_concat.py @@ -0,0 +1,45 @@ +import numpy +import ideep4py + +# from dnn._dnn import convolution2DParam, conv_test +from ideep4py import intVector, mdarrayVector, concat + +x1 = numpy.ndarray(shape=(1, 16, 224, 224), dtype=numpy.float32, order='C') +x2 = numpy.ndarray(shape=(1, 32, 224, 224), dtype=numpy.float32, order='C') +x3 = numpy.ndarray(shape=(1, 64, 224, 224), dtype=numpy.float32, order='C') +inputs = (x1, x2, x3) +sizes = numpy.array( + [v.shape[1] for v in inputs[:-1]] +).cumsum() +print("sizes=", sizes) +print("type=", type(sizes)) + +x1 = ideep4py.mdarray(x1) +x2 = ideep4py.mdarray(x2) +x3 = ideep4py.mdarray(x3) + +xs = mdarrayVector() +xs.push_back(x1) +xs.push_back(x2) +xs.push_back(x3) + +print("fwd") +y = concat.Forward(xs, 1) +print("==============") +y = concat.Forward(xs, 1) +print("y.shape=", y.shape) + +print("backward") + +int_sizes = intVector() + +for i in sizes: + print("i=", i) + int_sizes.push_back(i) + +gxs = concat.Backward(y, int_sizes, 1) + +for gx in gxs: + print("gx.type=", type(gx)) + print("gx.shape=", gx.shape) +print("after backward") diff --git a/python/ideep4py/tests/primitives/test_conv.py b/python/ideep4py/tests/primitives/test_conv.py new file mode 100755 index 00000000..dfbc8431 --- /dev/null +++ b/python/ideep4py/tests/primitives/test_conv.py @@ -0,0 +1,106 @@ +import numpy +import ideep4py + +# from ideep4py import convolution2DParam, conv_test +from ideep4py import intVector, convolution2DParam, convolution2D + +x = numpy.ndarray(shape=(1, 32, 224, 224), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x) + +w = numpy.ndarray(shape=(32, 32, 3, 3), dtype=numpy.float32, order='C') +w = ideep4py.mdarray(w) + +b = numpy.ndarray(shape=(32,), dtype=numpy.float32, order='C') +b = ideep4py.mdarray(b) + +cp = convolution2DParam() +cp.out_dims = intVector() +cp.out_dims.push_back(1) +cp.out_dims.push_back(32) +cp.out_dims.push_back(224) +cp.out_dims.push_back(224) +cp.sy = cp.sx = 1 +cp.pad_lh = cp.pad_lw = cp.pad_rh = cp.pad_rw = 1 + +print("fwd with bias") +y = convolution2D.Forward(x, w, b, cp) +print("==============") +y = convolution2D.Forward(x, w, b, cp) +print("==============") +y = convolution2D.Forward(y, w, b, cp) + +print("fwd without bias") +y = convolution2D.Forward(x, w, None, cp) +print("==============") +y = convolution2D.Forward(x, w, None, cp) +print("==============") +y = convolution2D.Forward(y, w, None, cp) + +print("bwd data") +x = convolution2D.BackwardData(w, y, cp) +print("==============") +x = convolution2D.BackwardData(w, y, cp) +print("==============") +x = convolution2D.BackwardData(w, y, cp) + +cp = convolution2DParam() +cp.out_dims = intVector() +cp.out_dims.push_back(32) +cp.out_dims.push_back(32) +cp.out_dims.push_back(3) +cp.out_dims.push_back(3) +cp.sy = cp.sx = 1 +cp.pad_lh = cp.pad_lw = cp.pad_rh = cp.pad_rw = 1 + +print("bwd weights with bias") +weights = convolution2D.BackwardWeightsBias(x, y, cp) +print("weights=", type(weights)) +print("len=", len(weights)) +print("gw.shape=", weights[0].shape) +print("gb.shape=", weights[1].shape) +print("==============") +x = numpy.ndarray(shape=(1, 32, 224, 224), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x) +weights = convolution2D.BackwardWeightsBias(x, y, cp) +print("weights=", type(weights)) +print("len=", len(weights)) +print("gw.shape=", weights[0].shape) +print("gb.shape=", weights[1].shape) +print("==============") +x = numpy.ndarray(shape=(1, 32, 224, 224), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x) +weights = convolution2D.BackwardWeightsBias(x, y, cp) +print("weights=", type(weights)) +print("len=", len(weights)) +print("gw.shape=", weights[0].shape) +print("gb.shape=", weights[1].shape) +print("==============") + +print("bwd weights without bias") +x = numpy.ndarray(shape=(1, 32, 224, 224), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x) +weights = convolution2D.BackwardWeights(x, y, cp) +print("weights=", type(weights)) +print("gw.shape=", weights.shape) +print("==============") +x = numpy.ndarray(shape=(1, 32, 224, 224), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x) +weights = convolution2D.BackwardWeights(x, y, cp) +print("weights=", type(weights)) +print("gw.shape=", weights.shape) +print("==============") +x = numpy.ndarray(shape=(1, 32, 224, 224), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x) +weights = convolution2D.BackwardWeights(x, y, cp) +print("weights=", type(weights)) +print("gw.shape=", weights.shape) +print("==============") +x = numpy.ndarray(shape=(1, 32, 224, 224), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x) +weights = convolution2D.BackwardWeights(x, y, cp) + +# print("type=", type(x)) +# print("shape=", y.shape) +# print("size=", y.size) +# print("ndim=", y.ndim) +# print("dtype=", y.dtype) diff --git a/python/ideep4py/tests/primitives/test_dropout.py b/python/ideep4py/tests/primitives/test_dropout.py new file mode 100644 index 00000000..cc2e36dc --- /dev/null +++ b/python/ideep4py/tests/primitives/test_dropout.py @@ -0,0 +1,21 @@ +import numpy +from chainer import ideepy + + +dropout_ratio = 0.8 + +# Forward +x = numpy.random.rand(128, 3, 224, 224).astype(numpy.float32) +x_md, = ideepy.to_mdarray((x, )) +mask, y = ideepy.dropout.Forward(x_md, dropout_ratio) +y = numpy.array(y, dtype=numpy.float32) +y_expect = x * mask +numpy.testing.assert_allclose(y, y_expect) + +# Backward +gy = numpy.random.rand(128, 3, 224, 224).astype(numpy.float32) +gy_md, = ideepy.to_mdarray((gy, )) +gx = ideepy.dropout.Backward(mask, gy_md) +gx = numpy.array(gx, dtype=numpy.float32) +gx_expect = gy * mask +numpy.testing.assert_allclose(gx, gx_expect) diff --git a/python/ideep4py/tests/primitives/test_linear.py b/python/ideep4py/tests/primitives/test_linear.py new file mode 100644 index 00000000..17af7cf2 --- /dev/null +++ b/python/ideep4py/tests/primitives/test_linear.py @@ -0,0 +1,100 @@ +import numpy +import ideep4py +# from ideep4py import linearParam, linear_test +from ideep4py import linear + +x = numpy.ndarray(shape=(1, 32), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x) + +w = numpy.ndarray(shape=(32, 32), dtype=numpy.float32, order='C') +print("ndarray w", w.shape) +w = ideep4py.mdarray(w) +print("w.dim", w.shape) +b = numpy.ndarray(shape=(32,), dtype=numpy.float32, order='C') +b = ideep4py.mdarray(b) + +print("===============2 dims============") + +print("fwd") +y = linear.Forward(x, w, b) +print("================") +y = linear.Forward(x, w, b) +print("================") +y = linear.Forward(x, w, b) + +print("bwd data") +x = linear.BackwardData(w, y) +print("================") +x = linear.BackwardData(w, y) +print("================") +x = linear.BackwardData(w, y) +print("================") + +print("bwd weight bias") +weights = linear.BackwardWeightsBias(x, y) +print("weights= ", type(weights)) +print("len", len(weights)) +print("gw.shape", weights[0].shape) +print("gb.shape = ", weights[1].shape) +print("================") + +x = numpy.ndarray(shape=(1, 32), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x) +weights = linear.BackwardWeightsBias(x, y) +print("weights= ", type(weights)) +print("len", len(weights)) +print("gw.shape", weights[0].shape) +print("gb.shape = ", weights[1].shape) +print("================") + +x = numpy.ndarray(shape=(1, 32), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x) +weights = linear.BackwardWeightsBias(x, y) +print("weights= ", type(weights)) +print("len", len(weights)) +print("gw.shape", weights[0].shape) +print("gb.shape = ", weights[1].shape) +print("================") + +print("bwd weight") +weights = linear.BackwardWeights(x, y) +print("weights= ", type(weights)) +print("gw.shape", weights.shape) +print("================") + +x = numpy.ndarray(shape=(1, 32), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x) +weights = linear.BackwardWeights(x, y) +print("weights= ", type(weights)) +print("gw.shape", weights.shape) +print("================") + +x = numpy.ndarray(shape=(1, 32), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x) +weights = linear.BackwardWeights(x, y) +print("weights= ", type(weights)) +print("gw.shape", weights.shape) +print("================") + +# print("==========4 dims=================") +# +# x = numpy.ndarray(shape=(1, 32, 224, 224), dtype=numpy.float32, order='C') +# x = ideep4py.mdarray(x) +# +# w = numpy.ndarray(shape=(32, 32, 224, 224), dtype=numpy.float32, order='C') +# print("ndarray w", w.shape) +# w = ideep4py.mdarray(w) +# print("w.dim", w.shape) +# b = numpy.ndarray(shape=(32,), dtype=numpy.float32, order='C') +# b = ideep4py.mdarray(b) +# +# print("fwd") +# y = linear.Forward(x, w, b) +# print("================") +# y = linear.Forward(x, w, b) +# print("================") +# y = linear.Forward(x, w, b) +# +# print("================") +# print("bwd data") +# x = linear.BackwardData(w, y) diff --git a/python/ideep4py/tests/primitives/test_lrn.py b/python/ideep4py/tests/primitives/test_lrn.py new file mode 100755 index 00000000..200d8661 --- /dev/null +++ b/python/ideep4py/tests/primitives/test_lrn.py @@ -0,0 +1,33 @@ +import numpy +import ideep4py + +from ideep4py import localResponseNormalizationParam +from ideep4py import localResponseNormalization + +x = numpy.ndarray(shape=(1, 32, 224, 224), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x) + +pp = localResponseNormalizationParam() +pp.n = 5 +pp.k = 2 +pp.alpha = 1e-4 +pp.beta = .75 +pp.algo_kind = ideep4py.localResponseNormalizationParam.lrn_across_channels + +print("fwd") +(y, ws) = localResponseNormalization.Forward(x, pp) +print("==============") +(y, ws) = localResponseNormalization.Forward(x, pp) + +# print ("y =", y) +print("y.shape=", y.shape) +print("ws.shape=", ws.shape) +print("ws.dtype=", ws.dtype) + +print("==============") +print("bwd") +gx = localResponseNormalization.Backward(x, y, ws, pp) +print("==============") +gx = localResponseNormalization.Backward(x, y, ws, pp) +print("gx.shape=", gx.shape) +print("===== Finish backward=========") diff --git a/python/ideep4py/tests/primitives/test_pooling.py b/python/ideep4py/tests/primitives/test_pooling.py new file mode 100755 index 00000000..c05943da --- /dev/null +++ b/python/ideep4py/tests/primitives/test_pooling.py @@ -0,0 +1,56 @@ +import numpy +import ideep4py + +from ideep4py import pooling2DParam +from ideep4py import pooling2D + +x = numpy.ndarray(shape=(1, 32, 224, 224), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x) + +pp = pooling2DParam() +pp.src_d1 = 1 +pp.src_d2 = 32 +pp.src_d3 = 224 +pp.src_d4 = 224 +pp.dst_d1 = 1 +pp.dst_d2 = 32 +pp.dst_d3 = 224 +pp.dst_d4 = 224 +pp.kh = pp.kw = 3 +pp.sy = pp.sx = 1 +pp.pad_lh = pp.pad_lw = pp.pad_rh = pp.pad_rw = 1 +pp.algo_kind = ideep4py.pooling2DParam.pooling_avg + +print("fwd") +y = pooling2D.Forward(x, pp) +print("==============") +y = pooling2D.Forward(x, pp) +print("==============") + +pp.algo_kind = ideep4py.pooling2DParam.pooling_max +(y, ws) = pooling2D.Forward(x, pp) +print("==============") +(y, ws) = pooling2D.Forward(x, pp) + +print("y.shape=", y.shape) +print("ws.shape=", ws.shape) +print("ws.dtype=", ws.dtype) + +print("==============") +print("bwd") +x = pooling2D.Backward(y, ws, pp) +print("==============") +x = pooling2D.Backward(y, ws, pp) +print("===== Finish max pooling backward=========") + +pp.algo_kind = ideep4py.pooling2DParam.pooling_avg +x = pooling2D.Backward(y, ws, pp) +print("==============") +x = pooling2D.Backward(y, ws, pp) +print("==============") +x = numpy.ndarray(shape=(1, 32, 224, 224), dtype=numpy.float32, order='C') +x = ideep4py.mdarray(x) +x = pooling2D.Backward(x, ws, pp) +print("===== Finsh avg pooing backward =========") +print("x.shape=", x.shape) +print("==============") diff --git a/python/ideep4py/tests/primitives/test_relu.py b/python/ideep4py/tests/primitives/test_relu.py new file mode 100755 index 00000000..d97174c9 --- /dev/null +++ b/python/ideep4py/tests/primitives/test_relu.py @@ -0,0 +1,36 @@ +import numpy +from chainer import testing +import ideep4py +from ideep4py import relu + +# x = numpy.ndarray(shape=(1,32,224,224), dtype=numpy.float32, order='C') +x = numpy.random.uniform(-1, 1, (1, 32, 224, 224)).astype(numpy.float32) +y = numpy.maximum(x, 0, dtype=x.dtype) + +mx = ideep4py.mdarray(x) +x2 = numpy.array(mx) +testing.assert_allclose(x, x2) + +print("Relu fwd") +my = relu.Forward(mx) +y2 = numpy.array(my) +testing.assert_allclose(y, y2) +my = relu.Forward(my) +y2 = numpy.array(my) +testing.assert_allclose(y, y2) + + +# Test backward +print("Relu bwd") +x = numpy.random.uniform(-1, 1, (1, 32, 224, 224)).astype(numpy.float32) +gy = numpy.random.uniform(-1, 1, (1, 32, 224, 224)).astype(numpy.float32) +gx = (x > 0) * gy + + +mx = ideep4py.mdarray(x) +mgy = ideep4py.mdarray(gy) +mgx = relu.Backward(mx, mgy) + + +gx1 = numpy.array(mgx) +testing.assert_allclose(gx1, gx) diff --git a/python/setup.py b/python/setup.py new file mode 100644 index 00000000..36695aca --- /dev/null +++ b/python/setup.py @@ -0,0 +1,145 @@ +from setuptools.extension import Extension +from numpy import get_include +from platform import system + +import os +import sys +import external + +from setuptools.command.build_py import build_py +from setuptools.command.install import install +from setuptools import setup + +subdir = 'mkldnn' + +# Sepcify prefix under which you put ipl_mkldnn +# prefix = '/usr/local' +mkldnn_root = external.mkldnn.root() +mkldnn_version = 'ae00102be506ed0fe2099c6557df2aa88ad57ec1' +dlcp_root = os.getcwd() + '/external/dlcp' + + +def prepare_mkldnn(): + external.mkldnn.prepare(mkldnn_version) + + +class _build_py(build_py): + def run(self): + prepare_mkldnn() + self.run_command('build_ext') + build_py.run(self) + + +class _install(install): + def run(self): + prepare_mkldnn() + self.run_command('build_ext') + install.run(self) + + +swig_opts = ['-c++', '-builtin', '-modern', '-modernargs', + '-Iideep4py/py/mm', + '-Iideep4py/py/primitives', + '-Iideep4py/py/swig_utils', + '-Iideep4py/py/dlcp', + '-Iideep4py/include/primitives/', + '-Iideep4py/include/mm/', + '-I' + dlcp_root + '/include'] + +if sys.version_info.major < 3: + swig_opts += ['-DNEWBUFFER_ON'] + +ccxx_opts = ['-std=c++11', '-Wno-unknown-pragmas'] +link_opts = ['-Wl,-z,now', '-Wl,-z,noexecstack', + '-Wl,-rpath,' + mkldnn_root + '/lib', '-L' + mkldnn_root + '/lib', + '-Wl,-rpath,' + dlcp_root + '/lib', '-L' + dlcp_root + '/lib'] + +includes = [get_include(), + 'ideep4py/include', + 'ideep4py/include/mkl', + 'ideep4py/common', + 'ideep4py/include/mm', + 'ideep4py/py/mm', + 'ideep4py/py/primitives', + 'ideep4py/py/dlcp', + 'ideep4py/include/primitives', + 'ideep4py/include/blas', + 'ideep4py/include/primitives/ops', + 'ideep4py/include/primitives/prim_mgr', + mkldnn_root + '/include', + dlcp_root + '/include'] + +libraries = ['mkldnn', 'mklml_intel', 'dlcomp'] + +if system() == 'Linux': + ccxx_opts += ['-fopenmp', '-DOPENMP_AFFINITY'] + libraries += ['boost_system', 'glog', 'm'] + src = ['ideep4py/py/ideep4py.i', + 'ideep4py/py/dlcp/dlcp_py.cc', + 'ideep4py/mm/mem.cc', + 'ideep4py/mm/tensor.cc', + 'ideep4py/py/mm/mdarray.cc', + 'ideep4py/common/cpu_info.cc', + 'ideep4py/common/utils.cc', + 'ideep4py/common/common.cc', + 'ideep4py/blas/sum.cc', + 'ideep4py/py/mm/basic.cc', + 'ideep4py/primitives/ops/eltwise_fwd.cc', + 'ideep4py/primitives/ops/eltwise_bwd.cc', + 'ideep4py/primitives/eltwise.cc', + 'ideep4py/primitives/ops/conv_fwd.cc', + 'ideep4py/primitives/ops/conv_bwd_weights.cc', + 'ideep4py/primitives/ops/conv_bwd_data.cc', + 'ideep4py/primitives/ops/reorder_op.cc', + 'ideep4py/primitives/conv.cc', + 'ideep4py/primitives/ops/pooling_fwd.cc', + 'ideep4py/primitives/ops/pooling_bwd.cc', + 'ideep4py/primitives/pooling.cc', + 'ideep4py/primitives/ops/linear_fwd.cc', + 'ideep4py/primitives/ops/linear_bwd_weights.cc', + 'ideep4py/primitives/ops/linear_bwd_data.cc', + 'ideep4py/primitives/linear.cc', + 'ideep4py/primitives/bn.cc', + 'ideep4py/primitives/ops/bn_fwd.cc', + 'ideep4py/primitives/ops/bn_bwd.cc', + 'ideep4py/primitives/ops/concat_fwd.cc', + 'ideep4py/primitives/ops/concat_bwd.cc', + 'ideep4py/primitives/concat.cc', + 'ideep4py/primitives/ops/lrn_fwd.cc', + 'ideep4py/primitives/ops/lrn_bwd.cc', + 'ideep4py/primitives/lrn.cc', + 'ideep4py/primitives/dropout.cc', + ] +else: + # TODO + src = ['mkldnn/mdarray.i', 'mkldnn/mdarray.cc'] + +ext_modules = [] + +ext = Extension( + 'ideep4py._ideep4py', sources=src, + swig_opts=swig_opts, + extra_compile_args=ccxx_opts, extra_link_args=link_opts, + include_dirs=includes, libraries=libraries) + +ext_modules.append(ext) + +packages = ['ideep4py', 'ideep4py.cosim'] + +setup( + name='ideep4py', + version='0.0', + description='', + author='Intel', + author_email='', + url='', + license='MIT License', + packages=packages, + ext_modules=ext_modules, + cmdclass={'install': _install, 'build_py': _build_py}, + zip_safe=False, + # setup_requires=setup_requires, + # install_requires=install_requires, + # tests_require=['mock', + # 'pytest'], +) diff --git a/tests/ideep4py_tests/test_batch_normalization.py b/tests/ideep4py_tests/test_batch_normalization.py new file mode 100644 index 00000000..1fe764e6 --- /dev/null +++ b/tests/ideep4py_tests/test_batch_normalization.py @@ -0,0 +1,136 @@ +import sys +import unittest + +import numpy +import six +import ideep4py +from ideep4py import batchNormalization + +try: + import testing + from testing import condition +except Exception as ex: + print('*** testing directory is missing: %s' % ex) + sys.exit(-1) + + +def _x_hat(x, mean, inv_std): + x_mu = x - mean + x_mu *= inv_std + return x_mu + + +def _batch_normalization(expander, gamma, beta, x, mean, var): + mean = mean[expander] + std = numpy.sqrt(var)[expander] + y_expect = (gamma[expander] * (x - mean) / std + beta[expander]) + return y_expect + + +@testing.parameterize(*(testing.product({ + 'param_shape': [(3, ), ], + 'ndim': [2, ], + 'dtype': [numpy.float32], +}))) +class TestBatchNormalizationF32(unittest.TestCase): + + def setUp(self): + self.eps = 2e-5 + self.expander = (None, Ellipsis) + (None,) * self.ndim + self.gamma = numpy.random.uniform(.5, 1, + self.param_shape).astype(self.dtype) + self.beta = numpy.random.uniform(-1, 1, + self.param_shape).astype(self.dtype) + self.head_ndim = self.gamma.ndim + 1 + shape = (5,) + self.param_shape + (2,) * self.ndim + self.x = numpy.random.uniform(-1, 1, shape).astype(self.dtype) + self.gy = numpy.random.uniform(-1, 1, shape).astype(self.dtype) + + self.args = [self.x, self.gamma, self.beta] + self.aggr_axes = (0,) + tuple( + six.moves.range(self.head_ndim, self.x.ndim)) + self.mean = self.x.mean(axis=self.aggr_axes) + self.var = self.x.var(axis=self.aggr_axes) + self.eps + self.check_forward_options = {'atol': 1e-4, 'rtol': 1e-3} + self.check_backward_options = {'atol': 1e-4, 'rtol': 1e-3} + + def check_forward(self, args): + x, gamma, beta = args + expander = (None, Ellipsis) + (None,) * (x.ndim - self.head_ndim) + self.expander = expander + self.axis = (0,) + tuple(range(self.head_ndim, x.ndim)) + expand_dim = False + if x.ndim == 2: + expand_dim = True + x = x[:, :, None, None] + + gamma = gamma[expander] + beta = beta[expander] + W = numpy.concatenate((gamma, beta), axis=0).reshape((2, -1)) + + y_act, self.mean, self.var, inv_std = batchNormalization.Forward( + ideep4py.mdarray(x), + ideep4py.mdarray(W), + None, + None, + self.eps + ) + + if expand_dim: + y_act = numpy.squeeze(y_act, axis=(2, 3)) + y_act = numpy.array(y_act, dtype=self.dtype) + + y_expect = _batch_normalization( + self.expander, self.gamma, self.beta, self.x, self.mean, self.var) + + numpy.testing.assert_allclose( + y_expect, y_act, **self.check_forward_options) + + @condition.retry(3) + def test_forward_cpu(self): + self.check_forward(self.args) + + def check_backward(self, args, y_grad): + x, gamma, beta = args + gy = y_grad + expander = self.expander + inv_m = gamma.dtype.type(1. / (x.size // gamma.size)) + + expand_dim = False + if x.ndim == 2: + expand_dim = True + x = x[:, :, None, None] + gy = gy[:, :, None, None] + + gamma = gamma[self.expander] + beta = numpy.zeros_like(gamma) + W = numpy.concatenate((gamma, beta), axis=0).reshape((2, -1)) + + gx_act, gW = batchNormalization.Backward( + ideep4py.mdarray(x), + ideep4py.mdarray(gy), + ideep4py.mdarray(self.mean), + ideep4py.mdarray(self.var), + ideep4py.mdarray(W), + self.eps + ) + if expand_dim: + gx_act = numpy.squeeze(gx_act, axis=(2, 3)) + gx_act = numpy.array(gx_act, dtype=self.dtype) + + self.inv_std = self.var ** (-0.5) + + gbeta = y_grad.sum(axis=self.aggr_axes) + x_hat = _x_hat(x, self.mean[expander], self.inv_std[expander]) + ggamma = (y_grad * x_hat).sum(axis=self.aggr_axes) + gx_expect = (self.gamma * self.inv_std)[expander] * ( + y_grad - (x_hat * ggamma[expander] + gbeta[expander]) * inv_m) + + numpy.testing.assert_allclose( + gx_expect, gx_act, **self.check_backward_options) + + def test_backward_cpu(self): + self.check_backward(self.args, self.gy) + + +testing.run_module(__name__, __file__) diff --git a/tests/ideep4py_tests/test_concat_py.py b/tests/ideep4py_tests/test_concat_py.py new file mode 100644 index 00000000..5eeeaf10 --- /dev/null +++ b/tests/ideep4py_tests/test_concat_py.py @@ -0,0 +1,102 @@ +import sys +import unittest + +import numpy +import ideep4py +from ideep4py import intVector, mdarrayVector, concat + +try: + import testing +except Exception as ex: + print('*** testing directory is missing: %s' % ex) + sys.exit(-1) + + +@testing.parameterize(*testing.product_dict( + [ + {'shape': (7, 2, 3, 5), 'axis': 0, 'section': [2, 5], + 'slices': [[slice(None, 2)], [slice(2, 5)], + [slice(5, None)]]}, + {'shape': (2, 7, 3, 5), 'axis': 1, 'section': [2, 5], + 'slices': [[slice(None), slice(None, 2)], [slice(None), slice(2, 5)], + [slice(None), slice(5, None)]]}, + {'shape': (2, 3, 7, 5), 'axis': 2, 'section': [2, 5], + 'slices': [[slice(None), slice(None), slice(None, 2)], + [slice(None), slice(None), slice(2, 5)], + [slice(None), slice(None), slice(5, None)]]}, + {'shape': (2, 3, 5, 7), 'axis': 3, 'section': [2, 5], + 'slices': [[slice(None), slice(None), slice(None), slice(None, 2)], + [slice(None), slice(None), slice(None), slice(2, 5)], + [slice(None), slice(None), slice(None), slice(5, None)]]}, + {'shape': (60, 33, 3, 3), 'axis': 0, 'section': [12, 48], + 'slices': [[slice(None, 12)], + [slice(12, 48)], + [slice(48, None)]]}, + {'shape': (33, 60, 3, 3), 'axis': 1, 'section': [12, 48], + 'slices': [[slice(None), slice(None, 12)], + [slice(None), slice(12, 48)], + [slice(None), slice(48, None)]]}, + {'shape': (33, 3, 60, 3), 'axis': 2, 'section': [12, 48], + 'slices': [[slice(None), slice(None), slice(None, 12)], + [slice(None), slice(None), slice(12, 48)], + [slice(None), slice(None), slice(48, None)]]}, + {'shape': (33, 3, 3, 60), 'axis': 3, 'section': [12, 48], + 'slices': [[slice(None), slice(None), slice(None), slice(None, 12)], + [slice(None), slice(None), slice(None), slice(12, 48)], + [slice(None), slice(None), slice(None), slice(48, None)]]}, + ], + [ + {'dtype': numpy.float32}, + ], +)) +class TestConcatPyF32(unittest.TestCase): + + def setUp(self): + self.y = numpy.arange( + numpy.prod(self.shape), dtype=self.dtype).reshape(self.shape) + self.xs = [self.y[s] for s in self.slices] + + def check_forward(self, xs_data, y_data, axis): + xs = tuple(x_data for x_data in xs_data) + xs_mdarray = mdarrayVector() + for yi in xs: + if isinstance(yi, numpy.ndarray): + if yi.flags.contiguous is False: + yi = numpy.ascontiguousarray(yi) + yi = ideep4py.mdarray(numpy.ascontiguousarray(yi)) + xs_mdarray.push_back(yi) + y_act = concat.Forward(xs_mdarray, self.axis) + y_act = numpy.array(y_act, dtype=self.dtype) + + numpy.testing.assert_allclose(y_data, y_act, atol=0, rtol=0) + + def test_forward_cpu(self): + self.check_forward(self.xs, self.y, axis=self.axis) + + def check_backward(self, xs_data, y_data, axis): + xs = tuple(x_data for x_data in xs_data) + xs_mdarray = mdarrayVector() + for yi in xs: + if isinstance(yi, numpy.ndarray): + if yi.flags.contiguous is False: + yi = numpy.ascontiguousarray(yi) + yi = ideep4py.mdarray(numpy.ascontiguousarray(yi)) + xs_mdarray.push_back(yi) + y_data = ideep4py.mdarray(y_data) + offsets = intVector() + # FIXME + for i in self.section: + offsets.push_back(i) + x_act_mdarray = concat.Backward(y_data, offsets, self.axis) + i = 0 + for x in xs: + x_act = numpy.array(x_act_mdarray[i], dtype=self.dtype) + numpy.testing.assert_allclose( + x, x_act, atol=0, rtol=0) + i = i + 1 + + def test_backward_cpu(self): + self.check_backward(self.xs, self.y, axis=self.axis) + + +testing.run_module(__name__, __file__) diff --git a/tests/ideep4py_tests/test_convolution2d_py.py b/tests/ideep4py_tests/test_convolution2d_py.py new file mode 100644 index 00000000..54b8b93c --- /dev/null +++ b/tests/ideep4py_tests/test_convolution2d_py.py @@ -0,0 +1,175 @@ +import sys +import unittest +import numpy +import ideep4py +from ideep4py import convolution2DParam +from ideep4py import convolution2D + +try: + import testing + from testing import condition + from testing.conv import im2col_cpu, col2im_cpu, get_conv_outsize +except Exception as ex: + print('*** testing directory is missing: %s' % ex) + sys.exit(-1) + + +def _set_cover_all(self, x, W): + in_h, in_w = x.shape[2:] + kh, kw = W.shape[2:] + self.cover_all = ( + in_h != get_conv_outsize(self.outh, kh, self.sy, + self.ph, d=self.dy) or + in_w != get_conv_outsize(self.outw, kw, self.sx, + self.pw, d=self.dx)) + + +@testing.parameterize(*testing.product({ + 'dtype': [numpy.float32, ], + 'cover_all': [False, True], + 'channel': [1, 2, 4, 8, 10, ], + 'bs': [1, 2, 4, 8, 10, 16, 32, 64, ], + 'with_bias': [True, ], +})) +@testing.fix_random() +class TestConvolution2DPyF32(unittest.TestCase): + + def setUp(self): + self.x_shape = (self.bs, self.channel, 224, 224) + self.w_shape = (self.channel, self.channel, 3, 3) + self.b_shape = self.channel + + self.x = numpy.random.uniform(-1, 1, self.x_shape).astype(self.dtype) + self.x = ideep4py.mdarray(self.x) + self.w = numpy.random.uniform(-1, 1, self.w_shape).astype(self.dtype) + self.w = ideep4py.mdarray(self.w) + self.b = numpy.random.uniform(-1, 1, self.b_shape).astype(self.dtype) + self.b = ideep4py.mdarray(self.b) + + self.cp = convolution2DParam(self.x_shape, + 1, 1, + 1, 1, + 1, 1, + 1, 1) + + stride = 1 + pad = 1 + dilate = 1 + self.sy, self.sx = stride, stride + self.ph, self.pw = pad, pad + self.n = self.x_shape[0] + self.outc = self.w_shape[0] + self.outh = self.x_shape[2] + self.outw = self.x_shape[3] + self.cover_all = self.cover_all + self.dy, self.dx = dilate, dilate + + self.gy = numpy.random.uniform( + -1, 1, + (self.n, self.outc, self.outh, self.outw)).astype(self.dtype) + self.gy = ideep4py.mdarray(self.gy) + + self.check_forward_options = {'atol': 1e-3, 'rtol': 1e-2} + self.check_backward_options = {'atol': 1e-3, 'rtol': 1e-2} + + def check_forward(self, x, w, b, cp): + if self.with_bias: + y_act = convolution2D.Forward(x, w, b, cp) + else: + y_act = convolution2D.Forward(x, w, None, cp) + y_act = numpy.array(y_act, dtype=self.dtype) + + x = numpy.array(x, dtype=self.dtype) + w = numpy.array(w, dtype=self.dtype) + b = numpy.array(b, dtype=self.dtype) + kh, kw = w.shape[2:] + col = im2col_cpu( + x, kh, kw, self.sy, self.sx, self.ph, self.pw, + cover_all=self.cover_all, dy=self.dy, dx=self.dx) + y = numpy.tensordot( + col, w, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) + if b is not None: + y += b + y_expect = numpy.rollaxis(y, 3, 1) + numpy.testing.assert_allclose( + y_act, y_expect, **self.check_forward_options) + + def test_forward_cpu(self): + self.check_forward(self.x, self.w, self.b, self.cp) + + def check_backward_weights(self, x, w, b, cp, gy): + gW_act, gB_act = convolution2D.BackwardWeightsBias(x, gy, cp) + gW_act = numpy.array(gW_act, dtype=self.dtype) + + x = numpy.array(x, dtype=self.dtype) + w = numpy.array(w, dtype=self.dtype) + b = numpy.array(b, dtype=self.dtype) + gy = numpy.array(gy, dtype=self.dtype) + kh, kw = w.shape[2:] + col = im2col_cpu( + x, kh, kw, self.sy, self.sx, self.ph, self.pw, + cover_all=self.cover_all, dy=self.dy, dx=self.dx) + + gW_expect = numpy.tensordot( + gy, col, ((0, 2, 3), (0, 4, 5))).astype(self.dtype, copy=False) + numpy.testing.assert_allclose( + gW_act, gW_expect, **self.check_backward_options) + + @condition.retry(3) + def test_backward_cpu_weights(self): + print("test_backward_cpu_weights") + cp = convolution2DParam(self.w_shape, + 1, 1, + 1, 1, + 1, 1, + 1, 1) + + self.check_backward_weights(self.x, self.w, self.b, cp, self.gy) + + def check_backward_data(self, x, w, b, cp): + out_c, in_c, kh, kw = w.shape + n, out_c, in_h, in_w = x.shape + self.pd = self.sy * (in_h - 1) + ( + kh + (kh - 1) * (self.dy - 1)) - self.outh - self.ph + self.pr = self.sx * (in_w - 1) + ( + kw + (kw - 1) * (self.dx - 1)) - self.outw - self.pw + + _set_cover_all(self, x, w) + # create conv parameter + # for IA specific + param = convolution2DParam(x.shape, + self.dy, self.dx, + self.sy, self.sx, + self.ph, self.pw, + self.pd, self.pr) + y_act = convolution2D.BackwardData(w, x, param) + if b is not None: + y_act += b.reshape(1, b.size, 1, 1) + y_act = numpy.array(y_act, dtype=self.dtype) + + x = numpy.array(x, dtype=self.dtype) + w = numpy.array(w, dtype=self.dtype) + + gcol = numpy.tensordot(w, x, (0, 1)).astype(x.dtype, copy=False) + # - k, m, n: shape of out_channel + # - b: number of inputs + # - h, w: height and width of kernels + # k, m, n, b, h, w -> b, k, m, n, h, w + gcol = numpy.rollaxis(gcol, 3) + y_expect = col2im_cpu( + gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw, + dy=self.dy, dx=self.dx) + # b, k, h, w + if b is not None: + y_expect += b.reshape(1, b.size, 1, 1) + + numpy.testing.assert_allclose( + y_act, y_expect, **self.check_backward_options) + + @condition.retry(3) + def test_backward_cpu_data(self): + print("test_backward_cpu_data") + self.check_backward_data(self.x, self.w, self.b, self.cp) + + +testing.run_module(__name__, __file__) diff --git a/tests/ideep4py_tests/test_dropout.py b/tests/ideep4py_tests/test_dropout.py new file mode 100644 index 00000000..75d2c7bc --- /dev/null +++ b/tests/ideep4py_tests/test_dropout.py @@ -0,0 +1,52 @@ +import sys +import unittest + +import numpy +import ideep4py +from ideep4py import dropout + +try: + import testing +except Exception as ex: + print('*** testing directory is missing: %s' % ex) + sys.exit(-1) + + +def _dropout(x, creator): + return x * creator.mask + + +@testing.parameterize(*testing.product({ + 'dropout_ratio': [0.0, 0.1, 0.3, 0.5, 0.8], + 'dtype': [numpy.float32, ], +})) +@testing.fix_random() +class TestDropoutF32(unittest.TestCase): + + def setUp(self): + self.x = numpy.random.rand(128, 3, 224, 224).astype(self.dtype) + self.x_md = ideep4py.mdarray(self.x) + self.gy = numpy.random.rand(128, 3, 224, 224).astype(self.dtype) + + def check_forward(self, x, x_md): + mask, y = dropout.Forward(x_md, self.dropout_ratio) + y = numpy.array(y, dtype=self.dtype) + y_expect = x * mask + numpy.testing.assert_allclose(y, y_expect) + + def check_backward(self, x_md, gy): + mask, y = dropout.Forward(x_md, self.dropout_ratio) + gy_md = ideep4py.mdarray(gy) + gx = dropout.Backward(mask, gy_md) + gx = numpy.array(gx, dtype=self.dtype) + gx_expect = gy * mask + numpy.testing.assert_allclose(gx, gx_expect) + + def test_forward_cpu(self): + self.check_forward(self.x, self.x_md) + + def test_backward_cpu(self): + self.check_backward(self.x_md, self.gy) + + +testing.run_module(__name__, __file__) diff --git a/tests/ideep4py_tests/test_linear_py.py b/tests/ideep4py_tests/test_linear_py.py new file mode 100644 index 00000000..182111dd --- /dev/null +++ b/tests/ideep4py_tests/test_linear_py.py @@ -0,0 +1,107 @@ +import sys +import unittest +import numpy +import ideep4py +from ideep4py import linear + +try: + import testing + from testing import condition +except Exception as ex: + print('*** testing directory is missing: %s' % ex) + sys.exit(-1) + + +@testing.parameterize(*testing.product({ + 'x_dtype': [numpy.float32], + 'W_dtype': [numpy.float32], +})) +class TestLinearPyF32(unittest.TestCase): + + def setUp(self): + self.W = numpy.random.uniform( + -1, 1, (2, 3)).astype(self.W_dtype) + self.b = numpy.random.uniform( + -1, 1, 2).astype(self.x_dtype) + + self.x = numpy.random.uniform(-1, 1, (4, 3)).astype(self.x_dtype) + self.gy = numpy.random.uniform(-1, 1, (4, 2)).astype(self.x_dtype) + + self.check_forward_options = {'atol': 5e-4, 'rtol': 5e-3} + self.check_backward_options = {'atol': 5e-4, 'rtol': 5e-3} + + def check_forward(self, x, W, b, y_expect): + with_bias = True if b is not None else False + + x = ideep4py.mdarray(x) + W = ideep4py.mdarray(W) + if with_bias: + b = ideep4py.mdarray(b) + y_act = linear.Forward(x, W, b) + else: + y_act = linear.Forward(x, W, None) + + y_act = numpy.array(y_act, dtype=self.x_dtype) + numpy.testing.assert_allclose( + y_expect, y_act, **self.check_forward_options) + + @condition.retry(3) + def test_forward_cpu(self): + self.check_forward(self.x, self.W, self.b, + self.x.dot(self.W.T) + self.b) + + @condition.retry(3) + def test_forward_cpu_nobias(self): + self.check_forward(self.x, self.W, None, self.x.dot(self.W.T)) + + def check_backward_data(self, x, W, gy): + gx_expect = gy.dot(W).astype(gy.dtype, copy=False) + + W = ideep4py.mdarray(W) + gy = ideep4py.mdarray(gy) + gx_act = linear.BackwardData(W, gy) + gx_act = numpy.array(gx_act, dtype=self.W_dtype) + gx_expect = gy.dot(W).astype(gy.dtype, copy=False) + numpy.testing.assert_allclose( + gx_expect, gx_act, **self.check_backward_options) + + @condition.retry(3) + def test_backward_cpu_data(self): + self.check_backward_data(self.x, self.W, self.gy) + + def check_backward_weights(self, x, gy): + gW_expect = gy.T.dot(x).astype(self.W_dtype, copy=False) + + x = ideep4py.mdarray(x) + gy = ideep4py.mdarray(gy) + gW_act = linear.BackwardWeights(x, gy) + gW_act = numpy.array(gW_act, dtype=self.W_dtype) + + numpy.testing.assert_allclose( + gW_expect, gW_act, **self.check_backward_options) + + @condition.retry(3) + def test_backward_cpu_weights(self): + self.check_backward_weights(self.x, self.gy) + + def check_backward_weights_bias(self, x, gy): + gW_expect = gy.T.dot(x).astype(self.W_dtype, copy=False) + gb_expect = gy.sum((0)) + + x = ideep4py.mdarray(x) + gy = ideep4py.mdarray(gy) + (gW_act, gb_act) = linear.BackwardWeightsBias(x, gy) + gW_act = numpy.array(gW_act, dtype=self.W_dtype) + gb_act = numpy.array(gb_act, dtype=self.W_dtype) + + numpy.testing.assert_allclose( + gW_expect, gW_act, **self.check_backward_options) + numpy.testing.assert_allclose( + gb_expect, gb_act, **self.check_backward_options) + + @condition.retry(3) + def test_backward_cpu_weights_bias(self): + self.check_backward_weights_bias(self.x, self.gy) + + +testing.run_module(__name__, __file__) diff --git a/tests/ideep4py_tests/test_local_response_normalization_py.py b/tests/ideep4py_tests/test_local_response_normalization_py.py new file mode 100644 index 00000000..cb77740a --- /dev/null +++ b/tests/ideep4py_tests/test_local_response_normalization_py.py @@ -0,0 +1,86 @@ +import sys +import unittest + +import numpy +import six +import ideep4py +from ideep4py import localResponseNormalizationParam +from ideep4py import localResponseNormalization + +try: + import testing +except Exception as ex: + print('*** testing directory is missing: %s' % ex) + sys.exit(-1) + + +@testing.parameterize(*testing.product({ + 'dtype': [numpy.float32], + 'shape': [(2, 7, 1, 1), (2, 7, 3, 2), ], +})) +class TestLocalResponseNormalizationPyF32(unittest.TestCase): + + def setUp(self): + self.x = numpy.random.uniform( + -1, 1, self.shape).astype(self.dtype) + self.gy = numpy.random.uniform( + -1, 1, self.shape).astype(self.dtype) + self.pp = localResponseNormalizationParam( + 5, 2, 1e-4, .75, + ideep4py.localResponseNormalizationParam.lrn_across_channels + ) + self.check_forward_options = {'atol': 1e-4, 'rtol': 1e-3} + self.check_backward_options = {'atol': 1e-4, 'rtol': 1e-3} + + def check_forward(self, x, pp): + x_mdarray = ideep4py.mdarray(x) + (y_act, ws) = localResponseNormalization.Forward(x_mdarray, pp) + y_act = numpy.array(y_act, dtype=self.dtype) + + y_expect = numpy.zeros_like(self.x) + for n, c, h, w in numpy.ndindex(self.x.shape): + s = 0 + for i in six.moves.range(max(0, c - 2), min(7, c + 2)): + s += self.x[n, i, h, w] ** 2 + denom = (2 + 1e-4 * s) ** .75 + y_expect[n, c, h, w] = self.x[n, c, h, w] / denom + + numpy.testing.assert_allclose( + y_expect, y_act, **self.check_forward_options) + + def test_forward_cpu(self): + self.check_forward(self.x, self.pp) + + def check_backward(self, x, gy, pp): + x_mdarray = ideep4py.mdarray(x) + gy_mdarray = ideep4py.mdarray(gy) + (y_act, ws) = localResponseNormalization.Forward(x_mdarray, pp) + gx_act = localResponseNormalization.Backward( + x_mdarray, gy_mdarray, ws, pp) + gx_act = numpy.array(gx_act, dtype=self.dtype) + + half_n = self.pp.n // 2 + x2 = numpy.square(x) + sum_part = x2.copy() + for i in six.moves.range(1, half_n + 1): + sum_part[:, i:] += x2[:, :-i] + sum_part[:, :-i] += x2[:, i:] + self.unit_scale = pp.k + pp.alpha * sum_part + self.scale = self.unit_scale ** -pp.beta + self.y = x_mdarray * self.scale + + summand = self.y * gy / self.unit_scale + sum_p = summand.copy() + for i in six.moves.range(1, half_n + 1): + sum_p[:, i:] += summand[:, :-i] + sum_p[:, :-i] += summand[:, i:] + + gx_expect = gy * self.scale - 2 * pp.alpha * pp.beta * x * sum_p + numpy.testing.assert_allclose( + gx_expect, gx_act, **self.check_backward_options) + + def test_backward_cpu(self): + self.check_backward(self.x, self.gy, self.pp) + + +testing.run_module(__name__, __file__) diff --git a/tests/ideep4py_tests/test_pooling_2d_py.py b/tests/ideep4py_tests/test_pooling_2d_py.py new file mode 100644 index 00000000..1b17a086 --- /dev/null +++ b/tests/ideep4py_tests/test_pooling_2d_py.py @@ -0,0 +1,81 @@ +import sys +import unittest + +import numpy +import six + +import ideep4py +from ideep4py import pooling2DParam +from ideep4py import pooling2D + +try: + import testing + from testing import condition + from testing.conv import col2im_cpu +except Exception as ex: + print('*** testing directory is missing: %s' % ex) + sys.exit(-1) + + +@testing.parameterize(*testing.product({ + 'dtype': [numpy.float32], + 'channel': [1, 2, 4, 8, 10, 16, 24, 32, 64], + 'bs': [0, 1, 2, 4, 6, 8, 10, 16, 24, 32, 64], + 'stride': [2, ], +})) +class TestPooling2DPyF32(unittest.TestCase): + + def setUp(self): + self.x = numpy.random.uniform( + -1, 1, (self.bs, self.channel, 4, 3)).astype(self.dtype) + self.gy = numpy.random.uniform( + -1, 1, (self.bs, self.channel, 2, 2)).astype(self.dtype) + + self.pp_fwd = pooling2DParam( + self.gy.shape, 3, 3, self.stride, self.stride, 1, 1, + 1, 1, pooling2DParam.pooling_avg_include_padding) + self.pp_bwd = pooling2DParam( + (self.bs, self.channel, 4, 3), 3, 3, self.stride, self.stride, + 1, 1, 1, 1, pooling2DParam.pooling_avg_include_padding) + + self.check_forward_options = {'atol': 1e-5, 'rtol': 1e-4} + self.check_backward_options = {'atol': 1e-5, 'rtol': 1e-4} + + def check_forward(self, x, pp): + x_mdarray = ideep4py.mdarray(x) + (y_act,) = pooling2D.Forward(x_mdarray, pp) + y_act = numpy.array(y_act, dtype=self.dtype) + + for k in six.moves.range(self.bs): + for c in six.moves.range(self.channel): + x = self.x[k, c] + expect = numpy.array([ + [x[0:2, 0:2].sum(), x[0:2, 1:3].sum()], + [x[1:4, 0:2].sum(), x[1:4, 1:3].sum()]]) / 9 + numpy.testing.assert_allclose( + expect, y_act[k, c], **self.check_forward_options) + + @condition.retry(3) + def test_forward_cpu(self): + self.check_forward(self.x, self.pp_fwd) + + def check_backward(self, x, gy, pp): + # self.shape[2:] + h, w = 4, 3 + gcol = numpy.tile(gy[:, :, None, None], + (1, 1, 3, 3, 1, 1)) + gx_expect = col2im_cpu(gcol, 2, 2, 1, 1, h, w) + gx_expect /= 3 * 3 + gy_mdarray = ideep4py.mdarray(gy) + gx_act = pooling2D.Backward(gy_mdarray, None, pp) + gx_act = numpy.array(gx_act, dtype=self.dtype) + + numpy.testing.assert_allclose( + gx_expect, gx_act, **self.check_backward_options) + + @condition.retry(3) + def test_backward_cpu(self): + self.check_backward(self.x, self.gy, self.pp_bwd) + + +testing.run_module(__name__, __file__) diff --git a/tests/ideep4py_tests/test_relu_py.py b/tests/ideep4py_tests/test_relu_py.py new file mode 100644 index 00000000..97cf1dd0 --- /dev/null +++ b/tests/ideep4py_tests/test_relu_py.py @@ -0,0 +1,65 @@ +import sys +import unittest + +import numpy + +import ideep4py +from ideep4py import relu + +try: + import testing +except Exception as ex: + print('*** testing directory is missing: %s' % ex) + sys.exit(-1) + + +@testing.parameterize(*testing.product({ + 'shape': [(3, 2), (224, 224)], + 'dtype': [numpy.float32, ], +})) +@testing.fix_random() +class TestReluPyF32(unittest.TestCase): + + def setUp(self): + self.x = numpy.random.uniform(-1, 1, self.shape).astype(self.dtype) + self.y = numpy.maximum(self.x, 0, dtype=(self.x).dtype) + self.gy = numpy.random.uniform(-1, 1, self.shape).astype(self.dtype) + self.gx = (self.x > 0) * self.gy + + def check_forward(self, x, y): + mx = ideep4py.mdarray(x) + x2 = numpy.array(mx) + numpy.testing.assert_allclose(x, x2) + my = relu.Forward(mx) + y2 = numpy.array(my) + numpy.testing.assert_allclose(y, y2) + + def test_forward_cpu(self): + self.check_forward(self.x, self.y) + + def check_double_forward(self, x, y): + mx = ideep4py.mdarray(x) + x2 = numpy.array(mx) + numpy.testing.assert_allclose(x, x2) + my = relu.Forward(mx) + y2 = numpy.array(my) + numpy.testing.assert_allclose(y, y2) + my = relu.Forward(my) + y2 = numpy.array(my) + numpy.testing.assert_allclose(y, y2) + + def test_double_forward_cpu(self): + self.check_double_forward(self.x, self.y) + + def check_backward(self, x, gy, gx): + mx = ideep4py.mdarray(x) + mgy = ideep4py.mdarray(gy) + mgx = relu.Backward(mx, mgy) + gx1 = numpy.array(mgx) + numpy.testing.assert_allclose(gx1, gx) + + def test_backward_cpu(self): + self.check_backward(self.x, self.gy, self.gx) + + +testing.run_module(__name__, __file__) diff --git a/tests/ideep4py_tests/testing/__init__.py b/tests/ideep4py_tests/testing/__init__.py new file mode 100644 index 00000000..5c8e6e5f --- /dev/null +++ b/tests/ideep4py_tests/testing/__init__.py @@ -0,0 +1,18 @@ +from testing import parameterized # NOQA +from testing.parameterized import parameterize # NOQA +from testing.parameterized import product # NOQA +from testing.parameterized import product_dict # NOQA +from testing.random import fix_random # NOQA + + +def run_module(name, file): + """Run current test cases of the file. + + Args: + name: __name__ attribute of the file. + file: __file__ attribute of the file. + """ + + if name == '__main__': + import pytest + pytest.main([file, '-vvs', '-x', '--pdb']) diff --git a/tests/ideep4py_tests/testing/condition.py b/tests/ideep4py_tests/testing/condition.py new file mode 100644 index 00000000..cf7462d7 --- /dev/null +++ b/tests/ideep4py_tests/testing/condition.py @@ -0,0 +1,112 @@ +import functools +import unittest + +import six + + +class QuietTestRunner(object): + + def run(self, suite): + result = unittest.TestResult() + suite(result) + return result + + +def repeat_with_success_at_least(times, min_success): + """Decorator for multiple trial of the test case. + + The decorated test case is launched multiple times. + The case is judged as passed at least specified number of trials. + If the number of successful trials exceeds `min_success`, + the remaining trials are skipped. + + Args: + times(int): The number of trials. + min_success(int): Threshold that the decorated test + case is regarded as passed. + + """ + + assert times >= min_success + + def _repeat_with_success_at_least(f): + @functools.wraps(f) + def wrapper(*args, **kwargs): + assert len(args) > 0 + instance = args[0] + assert isinstance(instance, unittest.TestCase) + success_counter = 0 + failure_counter = 0 + results = [] + + def fail(): + msg = '\nFail: {0}, Success: {1}'.format( + failure_counter, success_counter) + if len(results) > 0: + first = results[0] + errs = first.failures + first.errors + if len(errs) > 0: + err_msg = '\n'.join(fail[1] for fail in errs) + msg += '\n\nThe first error message:\n' + err_msg + instance.fail(msg) + + for _ in six.moves.range(times): + suite = unittest.TestSuite() + # Create new instance to call the setup and the teardown only + # once. + ins = type(instance)(instance._testMethodName) + suite.addTest( + unittest.FunctionTestCase( + lambda: f(ins, *args[1:], **kwargs), + setUp=ins.setUp, + tearDown=ins.tearDown)) + + result = QuietTestRunner().run(suite) + if result.wasSuccessful(): + success_counter += 1 + else: + results.append(result) + failure_counter += 1 + if success_counter >= min_success: + instance.assertTrue(True) + return + if failure_counter > times - min_success: + fail() + return + fail() + return wrapper + return _repeat_with_success_at_least + + +def repeat(times): + """Decorator that imposes the test to be successful in a row. + + Decorated test case is launched multiple times. + The case is regarded as passed only if it is successful + specified times in a row. + + .. note:: + In current implementation, this decorator grasps the + failure information of each trial. + + Args: + times(int): The number of trials. + """ + return repeat_with_success_at_least(times, times) + + +def retry(times): + """Decorator that imposes the test to be successful at least once. + + Decorated test case is launched multiple times. + The case is regarded as passed if it is successful + at least once. + + .. note:: + In current implementation, this decorator grasps the + failure information of each trial. + + Args: + times(int): The number of trials. + """ + return repeat_with_success_at_least(times, 1) diff --git a/tests/ideep4py_tests/testing/conv.py b/tests/ideep4py_tests/testing/conv.py new file mode 100644 index 00000000..87169691 --- /dev/null +++ b/tests/ideep4py_tests/testing/conv.py @@ -0,0 +1,72 @@ +import numpy +import six + + +def get_conv_outsize(size, k, s, p, cover_all=False, d=1): + """Calculates output size of convolution. + + This function takes the size of input feature map, kernel, stride, and + pooling of one particular dimension, then calculates the output feature + map size of that dimension. + + .. seealso:: :func:`~chainer.utils.get_deconv_outsize` + + Args: + size (int): The size of input feature map. It usually is the length of + a side of feature map. + k (int): The size of convolution kernel. + s (int): The size of stride. + p (int): The size of padding. + cover_all (bool): Use ``cover_all`` option or not. + d (int): The size of dilation. + + Returns: + int: The expected output size of the convolution operation. + + """ + dk = k + (k - 1) * (d - 1) + if cover_all: + return (size + p * 2 - dk + s - 1) // s + 1 + else: + return (size + p * 2 - dk) // s + 1 + + +def im2col_cpu( + img, kh, kw, sy, sx, ph, pw, pval=0, cover_all=False, dy=1, dx=1, + out_h=None, out_w=None): + n, c, h, w = img.shape + if out_h is None: + out_h = get_conv_outsize(h, kh, sy, ph, cover_all, dy) + assert out_h > 0, 'Height in the output should be positive.' + if out_w is None: + out_w = get_conv_outsize(w, kw, sx, pw, cover_all, dx) + assert out_w > 0, 'Width in the output should be positive.' + + img = numpy.pad(img, + ((0, 0), (0, 0), (ph, ph + sy - 1), (pw, pw + sx - 1)), + mode='constant', constant_values=(pval,)) + col = numpy.ndarray((n, c, kh, kw, out_h, out_w), dtype=img.dtype) + + for j in six.moves.range(kh): + jdy = j * dy + j_lim = jdy + sy * out_h + for i in six.moves.range(kw): + idx = i * dx + i_lim = idx + sx * out_w + col[:, :, j, i, :, :] = img[:, :, jdy:j_lim:sy, idx:i_lim:sx] + + return col + + +def col2im_cpu(col, sy, sx, ph, pw, h, w, dy=1, dx=1): + n, c, kh, kw, out_h, out_w = col.shape + img = numpy.zeros((n, c, h + 2 * ph + sy - 1, w + 2 * pw + sx - 1), + dtype=col.dtype) + for j in six.moves.range(kh): + jdy = j * dy + j_lim = jdy + sy * out_h + for i in six.moves.range(kw): + idx = i * dx + i_lim = idx + sx * out_w + img[:, :, jdy:j_lim:sy, idx:i_lim:sx] += col[:, :, j, i] + return img[:, :, ph:h + ph, pw:w + pw] diff --git a/tests/ideep4py_tests/testing/parameterized.py b/tests/ideep4py_tests/testing/parameterized.py new file mode 100644 index 00000000..1865a0af --- /dev/null +++ b/tests/ideep4py_tests/testing/parameterized.py @@ -0,0 +1,93 @@ +import functools +import inspect +import itertools +import sys +import types +import unittest + +import six + + +def _gen_case(base, module, i, param): + cls_name = '%s_param_%d' % (base.__name__, i) + + # Add parameters as members + + def __str__(self): + name = base.__str__(self) + return '%s parameter: %s' % (name, param) + + mb = {'__str__': __str__} + for k, v in six.iteritems(param): + if isinstance(v, types.FunctionType): + + def create_new_v(): + f = v + + def new_v(self, *args, **kwargs): + return f(*args, **kwargs) + return new_v + + mb[k] = create_new_v() + else: + mb[k] = v + + cls = type(cls_name, (base,), mb) + + # Wrap test methods to generate useful error message + + def wrap_test_method(method): + @functools.wraps(method) + def wrap(*args, **kwargs): + try: + return method(*args, **kwargs) + except AssertionError as e: + s = six.StringIO() + s.write('Parameterized test failed.\n\n') + s.write('Base test method: {}.{}\n'.format( + base.__name__, method.__name__)) + s.write('Test parameters:\n') + for k, v in six.iteritems(param): + s.write(' {}: {}\n'.format(k, v)) + s.write('\n') + s.write('{}: {}\n'.format(type(e).__name__, e)) + raise AssertionError(s.getvalue()) + return wrap + + # ismethod for Python 2 and isfunction for Python 3 + members = inspect.getmembers( + cls, predicate=lambda _: inspect.ismethod(_) or inspect.isfunction(_)) + for name, method in members: + if name.startswith('test_'): + setattr(cls, name, wrap_test_method(method)) + + # Add new test class to module + setattr(module, cls_name, cls) + + +def _gen_cases(name, base, params): + module = sys.modules[name] + for i, param in enumerate(params): + _gen_case(base, module, i, param) + + +def parameterize(*params): + def f(klass): + assert issubclass(klass, unittest.TestCase) + _gen_cases(klass.__module__, klass, params) + # Remove original base class + return None + return f + + +def product(parameter): + keys = sorted(parameter) + values = [parameter[key] for key in keys] + values_product = itertools.product(*values) + return [dict(zip(keys, vals)) for vals in values_product] + + +def product_dict(*parameters): + return [ + {k: v for dic in dicts for k, v in six.iteritems(dic)} + for dicts in itertools.product(*parameters)] diff --git a/tests/ideep4py_tests/testing/random.py b/tests/ideep4py_tests/testing/random.py new file mode 100644 index 00000000..03d21f58 --- /dev/null +++ b/tests/ideep4py_tests/testing/random.py @@ -0,0 +1,132 @@ +from __future__ import absolute_import +import atexit +import functools +import numpy +import os +import random +import types +import unittest + +_old_python_random_state = None +_old_numpy_random_state = None + + +def _numpy_do_setup(deterministic=True): + global _old_python_random_state + global _old_numpy_random_state + _old_python_random_state = random.getstate() + _old_numpy_random_state = numpy.random.get_state() + if not deterministic: + numpy.random.seed() + else: + numpy.random.seed(100) + + +def _numpy_do_teardown(): + global _old_python_random_state + global _old_numpy_random_state + random.setstate(_old_python_random_state) + numpy.random.set_state(_old_numpy_random_state) + _old_python_random_state = None + _old_numpy_random_state = None + + +def do_setup(deterministic=True): + _numpy_do_setup(deterministic) + + +def do_teardown(): + _numpy_do_teardown() + + +# In some tests (which utilize condition.repeat or condition.retry), +# setUp/tearDown is nested. _setup_random() and _teardown_random() do their +# work only in the outermost setUp/tearDown pair. +_nest_count = 0 + + +@atexit.register +def _check_teardown(): + assert _nest_count == 0, ('_setup_random() and _teardown_random() ' + 'must be called in pairs.') + + +def _setup_random(): + """Sets up the deterministic random states of ``numpy`` and ``cupy``. + + """ + global _nest_count + if _nest_count == 0: + nondeterministic = bool(int(os.environ.get( + 'CHAINER_TEST_RANDOM_NONDETERMINISTIC', '0'))) + do_setup(not nondeterministic) + _nest_count += 1 + + +def _teardown_random(): + """Tears down the deterministic random states set up by ``_setup_random``. + + """ + global _nest_count + assert _nest_count > 0, '_setup_random has not been called' + _nest_count -= 1 + if _nest_count == 0: + do_teardown() + + +def generate_seed(): + assert _nest_count > 0, 'random is not set up' + return numpy.random.randint(0xffffffff) + + +def fix_random(): + """Decorator that fixes random numbers in a test. + + This decorator can be applied to either a test case class or a test method. + It should not be applied within ``condition.retry`` or + ``condition.repeat``. + """ + + # TODO(niboshi): Prevent this decorator from being applied within + # condition.repeat or condition.retry decorators. That would repeat + # tests with the same random seeds. It's okay to apply this outside + # these decorators. + + def decorator(impl): + if (isinstance(impl, types.FunctionType) and + impl.__name__.startswith('test_')): + # Applied to test method + @functools.wraps(impl) + def test_func(self, *args, **kw): + _setup_random() + try: + impl(self, *args, **kw) + finally: + _teardown_random() + return test_func + elif isinstance(impl, type) and issubclass(impl, unittest.TestCase): + # Applied to test case class + klass = impl + + setUp_ = klass.setUp + tearDown_ = klass.tearDown + + @functools.wraps(setUp_) + def setUp(self): + _setup_random() + setUp_(self) + + @functools.wraps(tearDown_) + def tearDown(self): + try: + tearDown_(self) + finally: + _teardown_random() + + klass.setUp = setUp + klass.tearDown = tearDown + return klass + else: + raise ValueError('Can\'t apply fix_random to {}'.format(impl)) + + return decorator