[xnnpack] update to 2024-05-06 (#194)

* [xnnpack] update to 2024-05-06 * [onnxruntime] Update to v1.17.3 (#192) * [tensorflow-lite] Update to 2.16.1 (#196) * [onnxruntime] Fix more CI failures (#197) * update baseline
luncliff · May 12, 2024 · 642018f · 642018f
1 parent 4276773
commit 642018f
Show file tree

Hide file tree

Showing 34 changed files with 3,374 additions and 1,018 deletions.
diff --git a/.circleci/port-windows.txt b/.circleci/port-windows.txt
@@ -1,2 +1,2 @@
-onnxruntime[training,directml,xnnpack]:x64-windows
+onnxruntime[directml,xnnpack]:x64-windows
 openssl3[tools]:x64-windows
diff --git a/.github/workflows/build-macos.yml b/.github/workflows/build-macos.yml
@@ -27,6 +27,10 @@ jobs:
         with:
           brew: ninja autoconf automake libtool
 
+      # - uses: mobiledevops/[email protected]
+      #   with:
+      #     xcode-select-version: "15.2"
+
       - name: "create cache folders"
         run: |
           mkdir -p ${VCPKG_DOWNLOADS}

diff --git a/.github/workflows/build-windows-hosted.yml b/.github/workflows/build-windows-hosted.yml
@@ -45,6 +45,16 @@ jobs:
         env:
           VCPKG_DEFAULT_TRIPLET: "x64-windows"
 
+      - uses: lukka/[email protected]
+        with:
+          vcpkgDirectory: "C:/vcpkg"
+          vcpkgGitCommitId: "943c5ef1c8f6b5e6ced092b242c8299caae2ff01" # 2024.04.26
+          vcpkgJsonGlob: "test/vcpkg.json"
+          runVcpkgInstall: true
+          runVcpkgFormatString: '[`install`, `--keep-going`, `--clean-buildtrees-after-build`, `--clean-packages-after-build`, `--triplet`, `$[env.VCPKG_DEFAULT_TRIPLET]`]'
+        env:
+          VCPKG_DEFAULT_TRIPLET: "arm64-windows"
+
       - uses: yumis-coconudge/[email protected]
         with:
           additional-path: "C:/vcpkg/installed"
diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml
@@ -26,6 +26,10 @@ jobs:
       - uses: microsoft/setup-msbuild@v2
         with:
           msbuild-architecture: x64
+      # - uses: humbletim/[email protected]
+      #   with:
+      #     version: 1.3.204.1
+      #     cache: true
 
       - name: "Setup Environment"
         run: |

diff --git a/ports/onnxruntime/fix-cmake.patch b/ports/onnxruntime/fix-cmake.patch
diff --git a/ports/onnxruntime/fix-onnxruntime-pr-19966.patch b/ports/onnxruntime/fix-onnxruntime-pr-19966.patch
diff --git a/ports/onnxruntime/fix-source-flatbuffers.patch b/ports/onnxruntime/fix-source-flatbuffers.patch
diff --git a/ports/onnxruntime/fix-sources.patch b/ports/onnxruntime/fix-sources.patch
@@ -1,3 +1,18 @@
+diff --git a/onnxruntime/core/framework/kernel_type_str_resolver_utils.h b/onnxruntime/core/framework/kernel_type_str_resolver_utils.h
+index 3d06013..4121534 100644
+--- a/onnxruntime/core/framework/kernel_type_str_resolver_utils.h
++++ b/onnxruntime/core/framework/kernel_type_str_resolver_utils.h
+@@ -10,9 +10,7 @@
+ #include "core/framework/kernel_type_str_resolver.h"
+ #include "core/graph/op_identifier.h"
+
+-namespace flatbuffers {
+-class DetachedBuffer;
+-}
++#include <flatbuffers/flatbuffers.h>
+
+ namespace onnxruntime::kernel_type_str_resolver_utils {
+
 diff --git a/onnxruntime/core/optimizer/compute_optimizer/upstream_gather_actors.h b/onnxruntime/core/optimizer/compute_optimizer/upstream_gather_actors.h
 index 0c21be1..3ee6061 100644
 --- a/onnxruntime/core/optimizer/compute_optimizer/upstream_gather_actors.h

diff --git a/ports/onnxruntime/fix-xnnpack.patch b/ports/onnxruntime/fix-xnnpack.patch
@@ -0,0 +1,169 @@
+diff --git a/onnxruntime/core/providers/xnnpack/math/softmax.cc b/onnxruntime/core/providers/xnnpack/math/softmax.cc
+index 87440b7..81baef0 100644
+--- a/onnxruntime/core/providers/xnnpack/math/softmax.cc
++++ b/onnxruntime/core/providers/xnnpack/math/softmax.cc
+@@ -159,26 +159,20 @@ Softmax::Softmax(const OpKernelInfo& info) : XnnpackKernel{info} {
+   axis_ = gsl::narrow<int>(HandleNegativeAxis(axis_, int64_t(rank)));
+
+   auto input_shape = utils::GetTensorShapeFromTensorShapeProto(*x_shape);
+-  int64_t channels = opset_ < 13 ? input_shape.SizeFromDimension(axis_) : input_shape[axis_];
++  // int64_t channels = opset_ < 13 ? input_shape.SizeFromDimension(axis_) : input_shape[axis_];
+
+   xnn_status xstatus = xnn_status_invalid_state;
+   struct xnn_operator* p = nullptr;
+   if (op_type_ == OpComputeType::op_compute_type_qu8) {
+     // the order of input tensor, x,x_scale, x_zp, y_scale, y_zp
+     OpQuantParam quant_param = ParseQuantParamForOp(info, x_dtype, 1);
+-    xstatus = xnn_create_softmax_nc_qu8(channels,
+-                                        channels,
+-                                        channels,
+-                                        quant_param[0].first[0],  // x_scale
++    xstatus = xnn_create_softmax_nc_qu8(quant_param[0].first[0],  // x_scale
+                                         quant_param[1].second,    // y_zp
+                                         quant_param[1].first[0],  // y_scale
+                                         0,                        // flags,
+                                         &p);
+   } else if (op_type_ == OpComputeType::op_compute_type_fp32) {
+-    xstatus = xnn_create_softmax_nc_f32(channels,
+-                                        channels,
+-                                        channels,
+-                                        0,  // flags,
++    xstatus = xnn_create_softmax_nc_f32(0,  // flags,
+                                         &p);
+   }
+
+@@ -205,7 +199,10 @@ Status Softmax::Compute(OpKernelContext* ctx) const {
+
+   auto reshape_fn = op_type_ == OpComputeType::op_compute_type_qu8 ? xnn_reshape_softmax_nc_qu8
+                                                                    : xnn_reshape_softmax_nc_f32;
+-  status = reshape_fn(op0_.get(), N, threadpool);
++  size_t channels = 0; // todo
++  size_t input_stride = 0; // todo
++  size_t output_stride = 0; // todo
++  status = reshape_fn(op0_.get(), channels, input_stride, output_stride, N, threadpool);
+
+   if (status != xnn_status_success) {
+     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_reshape_softmax_nc_", OpTypeToString(op_type_),
+diff --git a/onnxruntime/core/providers/xnnpack/nn/average_pool.cc b/onnxruntime/core/providers/xnnpack/nn/average_pool.cc
+index 58c209a..ccb0551 100644
+--- a/onnxruntime/core/providers/xnnpack/nn/average_pool.cc
++++ b/onnxruntime/core/providers/xnnpack/nn/average_pool.cc
+@@ -42,7 +42,6 @@ Status CreateXnnpackKernel(const PoolAttributes& pool_attrs,
+                                                    input_padding_bottom, input_padding_left,
+                                                    pooling_height, pooling_width,
+                                                    stride_height, stride_width,
+-                                                   C, C, C,  // channels, input_pixel_stride, output_pixel_stride
+                                                    foutput_min, foutput_max, flags, &p);
+   } else if (avgpool_type == OpComputeType::op_compute_type_qu8) {
+     const float output_scale = quant_param[1].first[0];
+@@ -53,7 +52,6 @@ Status CreateXnnpackKernel(const PoolAttributes& pool_attrs,
+                                                    input_padding_bottom, input_padding_left,
+                                                    pooling_height, pooling_width,
+                                                    stride_height, stride_width,
+-                                                   C, C, C,  // channels, input_pixel_stride, output_pixel_stride
+                                                    quant_param[0].second,
+                                                    quant_param[0].first[0],
+                                                    quant_param[1].second,
+@@ -247,7 +245,11 @@ Status AveragePool::Compute(OpKernelContext* context) const {
+                         ? xnn_reshape_average_pooling2d_nhwc_f32
+                         : xnn_reshape_average_pooling2d_nhwc_qu8;
+
++  size_t channels = 0; // todo
++    size_t input_stride = 0; // todo
++    size_t output_stride = 0; // todo
+   auto status = reshape_fn(op0_.get(), N, H, W,
++                           channels,input_stride,output_stride,
+                            &workspace_size, &workspace_alignment,
+                            /*output_height_out=*/nullptr, /*output_width_out=*/nullptr,
+                            threadpool);
+diff --git a/onnxruntime/core/providers/xnnpack/nn/max_pool.cc b/onnxruntime/core/providers/xnnpack/nn/max_pool.cc
+index 2ef9f97..0ad08f2 100644
+--- a/onnxruntime/core/providers/xnnpack/nn/max_pool.cc
++++ b/onnxruntime/core/providers/xnnpack/nn/max_pool.cc
+@@ -172,7 +172,6 @@ MaxPool::MaxPool(const OpKernelInfo& info)
+                                                pooling_height, pooling_width,
+                                                stride_height, stride_width,
+                                                dilation_height, dilation_width,
+-                                               C, C, C,  // channels, input_pixel_stride, output_pixel_stride
+                                                foutput_min, foutput_max, flags, &p);
+   } else if (input_dtype == ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
+     maxpool_type_ = OpComputeType::op_compute_type_qu8;
+@@ -183,7 +182,6 @@ MaxPool::MaxPool(const OpKernelInfo& info)
+                                               pooling_height, pooling_width,
+                                               stride_height, stride_width,
+                                               dilation_height, dilation_width,
+-                                              C, C, C,  // channels, input_pixel_stride, output_pixel_stride
+                                               output_min, output_max, flags, &p);
+   } else if (input_dtype == ONNX_NAMESPACE::TensorProto_DataType_INT8) {
+     maxpool_type_ = OpComputeType::op_compute_type_qs8;
+@@ -194,7 +192,6 @@ MaxPool::MaxPool(const OpKernelInfo& info)
+                                               pooling_height, pooling_width,
+                                               stride_height, stride_width,
+                                               dilation_height, dilation_width,
+-                                              C, C, C,  // channels, input_pixel_stride, output_pixel_stride
+                                               output_min, output_max, flags, &p);
+   } else {
+     auto stype = DataTypeImpl::ToString(DataTypeImpl::TypeFromProto(*X_arg.TypeAsProto()));
+@@ -232,8 +229,11 @@ Status MaxPool::Compute(OpKernelContext* context) const {
+   else if (maxpool_type_ == OpComputeType::op_compute_type_qs8) {
+     reshape_fn = xnn_reshape_max_pooling2d_nhwc_s8;
+   }
+-
++    size_t channels = 0; // todo
++    size_t input_stride = 0; // todo
++    size_t output_stride = 0; // todo
+   auto status = reshape_fn(op0_.get(), N, H, W,
++                           channels, input_stride, output_stride,
+                            /*output_height_out=*/nullptr, /*output_width_out=*/nullptr,
+                            threadpool);
+   if (status != xnn_status_success) {
+diff --git a/onnxruntime/core/providers/xnnpack/tensor/resize.cc b/onnxruntime/core/providers/xnnpack/tensor/resize.cc
+index 0c9e2e9..556956f 100644
+--- a/onnxruntime/core/providers/xnnpack/tensor/resize.cc
++++ b/onnxruntime/core/providers/xnnpack/tensor/resize.cc
+@@ -209,11 +209,11 @@ Resize::Resize(const OpKernelInfo& info) : UpsampleBase(info), XnnpackKernel{inf
+   xnn_status xstatus = xnn_status_invalid_state;
+   struct xnn_operator* p = nullptr;
+   if (op_type_ == OpComputeType::op_compute_type_fp32) {
+-    xstatus = xnn_create_resize_bilinear2d_nhwc_f32(channels, channels, channels, flags, &p);
++    xstatus = xnn_create_resize_bilinear2d_nhwc_f32(channels, channels, flags, &p);
+   } else if (op_type_ == OpComputeType::op_compute_type_qu8) {
+-    xstatus = xnn_create_resize_bilinear2d_nhwc_u8(channels, channels, channels, flags, &p);
++    xstatus = xnn_create_resize_bilinear2d_nhwc_u8(channels, channels, flags, &p);
+   } else {
+-    xstatus = xnn_create_resize_bilinear2d_nhwc_s8(channels, channels, channels, flags, &p);
++    xstatus = xnn_create_resize_bilinear2d_nhwc_s8(channels, channels, flags, &p);
+   }
+
+   ORT_ENFORCE(xstatus == xnn_status_success, "xnn_create_resize_bilinear2d_nhwc_", OpTypeToString(op_type_), " failed. Status:",
+@@ -247,7 +247,8 @@ Status Resize::ComputeInternal(OpKernelContext* ctx, const Tensor* input,
+     reshape_fn = xnn_reshape_resize_bilinear2d_nhwc_s8;
+   }
+
+-  auto status = reshape_fn(op0_.get(), N, H, W, output_dims[1], output_dims[2],
++  size_t channels = 0; // todo
++  auto status = reshape_fn(op0_.get(), N, H, W, output_dims[1], output_dims[2], channels,
+                            &workspace_size, &workspace_alignment, threadpool);
+   if (status != xnn_status_success) {
+     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_reshape_resize_bilinear2d_nhwc_", OpTypeToString(op_type_),
+diff --git a/onnxruntime/core/providers/xnnpack/xnnpack_kernel.h b/onnxruntime/core/providers/xnnpack/xnnpack_kernel.h
+index 0978a88..1191e5b 100644
+--- a/onnxruntime/core/providers/xnnpack/xnnpack_kernel.h
++++ b/onnxruntime/core/providers/xnnpack/xnnpack_kernel.h
+@@ -5,6 +5,7 @@
+ #include "core/framework/op_kernel.h"
+ #include "core/providers/xnnpack/xnnpack_execution_provider.h"
+ #include "xnnpack.h"
++#include <type_traits>
+
+ struct pthreadpool;
+
+@@ -57,7 +58,7 @@ class XnnpackKernel : public OpKernel {
+     }
+
+     // std::unique_ptr<xnn_code_cache, decltype(&xnn_release_code_cache)> auto_code_cache;
+-    std::unique_ptr<xnn_weights_cache, decltype(&xnn_delete_weights_cache)> auto_weights_cache;
++    std::unique_ptr<xnn_weights_cache_provider, decltype(&xnn_delete_weights_cache)> auto_weights_cache;
+
+     // private:
+     // #if defined(XNN_CACHE_ENABLE) && XNN_PLATFORM_JIT
diff --git a/ports/onnxruntime/onnxruntime_vcpkg_deps.cmake b/ports/onnxruntime/onnxruntime_vcpkg_deps.cmake
@@ -38,15 +38,15 @@ include(external/protobuf_function.cmake)
 find_package(date CONFIG REQUIRED)
 list(APPEND onnxruntime_EXTERNAL_LIBRARIES date::date)
 
-find_package(Boost REQUIRED)
+# see Hints of FindBoost.cmake
 find_path(BOOST_INCLUDEDIR "boost/mp11.hpp" REQUIRED)
+find_package(Boost REQUIRED)
 add_library(Boost::mp11 ALIAS Boost::headers)
 list(APPEND onnxruntime_EXTERNAL_LIBRARIES Boost::mp11)
 
 find_package(nlohmann_json CONFIG REQUIRED)
 list(APPEND onnxruntime_EXTERNAL_LIBRARIES nlohmann_json::nlohmann_json)
 
-#TODO: include clog first
 if (onnxruntime_ENABLE_CPUINFO)
   find_package(cpuinfo CONFIG REQUIRED)
   list(APPEND onnxruntime_EXTERNAL_LIBRARIES cpuinfo::cpuinfo)
@@ -70,10 +70,7 @@ list(APPEND onnxruntime_EXTERNAL_LIBRARIES Eigen3::Eigen)
 find_package(wil CONFIG REQUIRED)
 list(APPEND onnxruntime_EXTERNAL_LIBRARIES WIL::WIL)
 
-add_library(safeint_interface INTERFACE)
 find_path(SAFEINT_INCLUDE_DIRS "SafeInt.hpp" REQUIRED)
-target_include_directories(safeint_interface INTERFACE ${SAFEINT_INCLUDE_DIRS})
-list(APPEND onnxruntime_EXTERNAL_LIBRARIES safeint_interface)
 
 # XNNPACK EP
 if (onnxruntime_USE_XNNPACK)
@@ -83,8 +80,8 @@ if (onnxruntime_USE_XNNPACK)
   endif()
   find_package(cpuinfo CONFIG REQUIRED)
   find_library(PTHREADPOOL_LIBRARY NAMES pthreadpool REQUIRED)
-  find_package(xnnpack CONFIG REQUIRED) # xnnpack
-  list(APPEND onnxruntime_EXTERNAL_LIBRARIES cpuinfo::cpuinfo ${PTHREADPOOL_LIBRARY} xnnpack)
+  find_library(XNNPACK_LIBRARY NAMES XNNPACK REQUIRED)
+  list(APPEND onnxruntime_EXTERNAL_LIBRARIES cpuinfo::cpuinfo ${PTHREADPOOL_LIBRARY} ${XNNPACK_LIBRARY})
 endif()
 
 if (onnxruntime_USE_MIMALLOC)
@@ -115,7 +112,5 @@ endif()
 
 if (onnxruntime_USE_OPENVINO)
   find_package(OpenVINO REQUIRED)
-  # deceive ENV{INTEL_OPENVINO_DIR} usages in CMakeLists.txt
-  set(ENV{INTEL_OPENVINO_DIR} "${OpenVINO_VERSION_MAJOR}.${OpenVINO_VERSION_MINOR}") # "2023.0"
   # list(APPEND onnxruntime_EXTERNAL_LIBRARIES openvino::runtime)
 endif()
diff --git a/ports/onnxruntime/portfile.cmake b/ports/onnxruntime/portfile.cmake
@@ -1,23 +1,16 @@
-if(VCPKG_TARGET_IS_IOS)
-    vcpkg_check_linkage(ONLY_STATIC_LIBRARY)
-elseif(VCPKG_TARGET_IS_WINDOWS OR VCPKG_TARGET_IS_LINUX OR VCPKG_TARGET_IS_ANDROID)
-    vcpkg_check_linkage(ONLY_DYNAMIC_LIBRARY)
-endif()
-if("framework" IN_LIST FEATURES)
-    vcpkg_check_linkage(ONLY_DYNAMIC_LIBRARY)
-endif()
+vcpkg_check_linkage(ONLY_DYNAMIC_LIBRARY)
 string(COMPARE EQUAL "${VCPKG_LIBRARY_LINKAGE}" "dynamic" BUILD_SHARED)
 
-# requires https://github.com/microsoft/onnxruntime/pull/18038 for later version of XNNPACK
 vcpkg_from_github(
     OUT_SOURCE_PATH SOURCE_PATH
     REPO microsoft/onnxruntime
-    REF v1.17.0
-    SHA512 63f1b8a8ede1d45d68c341c0df60ee360e689d513626ac2ad07b50930651321bd6cf661f628bd6768c10a0b3029ced51ad0df05060be028f0e820512ad4c5bc1
+    REF v1.17.3
+    SHA512 f24e333ad113e15733867fae237c3495f93e373b2998508deeebb061ce9a56c444bf68fc49ae251bcc45539d0695f3ae758d73dc3c42bc01bbd7cfaa8561c793
     PATCHES
+        fix-onnxruntime-pr-19966.patch # https://github.com/microsoft/onnxruntime/pull/19966 for OpenVINO 2024.0+
         fix-cmake.patch
-        fix-source-flatbuffers.patch
         fix-sources.patch
+        fix-xnnpack.patch # todo: check xnnpack updates & tests
         fix-clang-cl-simd-compile.patch
         fix-llvm-rc-unicode.patch
 )
@@ -76,22 +69,10 @@ vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS
         cuda      onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION
 )
 
-
-if("training" IN_LIST FEATURES)
-    # check cmake/deps.txt
-    vcpkg_from_github(
-        OUT_SOURCE_PATH TENSORBOARD_SOURCE_PATH
-        REPO tensorflow/tensorboard
-        REF 373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81
-        SHA512 7f76af0ee40eba93aca58178315a2e6bb7b85eefe8721567ed77aeeece13190e28202fc067e9f84f84fab21d8ac7dfcbd00c75e6e0771ed9992ff6ac6bba67c7
-    )
-    list(APPEND FEATURE_OPTIONS "-Dtensorboard_SOURCE_DIR:PATH=${TENSORBOARD_SOURCE_PATH}")
-endif()
-
 if(VCPKG_TARGET_IS_WINDOWS OR VCPKG_TARGET_IS_UWP)
     # For some reason CUDA compiler detection is not working in WINDOWS_USE_MSBUILD
     if(NOT ("cuda" IN_LIST FEATURES))
-        # set(GENERATOR_OPTIONS WINDOWS_USE_MSBUILD)
+        set(GENERATOR_OPTIONS WINDOWS_USE_MSBUILD)
     endif()
 elseif(VCPKG_TARGET_IS_OSX OR VCPKG_TARGET_IS_IOS)
     set(GENERATOR_OPTIONS GENERATOR Xcode)
@@ -172,8 +153,8 @@ vcpkg_cmake_configure(
         -Donnxruntime_USE_NEURAL_SPEED=OFF
         -DUSE_NEURAL_SPEED=OFF
         # for ORT_BUILD_INFO
-        -DORT_GIT_COMMIT:STRING="5f0b62cde54f59bdeac7978c9f9c12d0a4bc56db"
-        -DORT_GIT_BRANCH:STRING="v1.17.0"
+        -DORT_GIT_COMMIT:STRING="56b660f36940a919295e6f1e18ad3a9a93a10bf7"
+        -DORT_GIT_BRANCH:STRING="v1.17.3"
     OPTIONS_DEBUG
         -Donnxruntime_ENABLE_MEMLEAK_CHECKER=OFF
         -Donnxruntime_ENABLE_MEMORY_PROFILE=OFF
@@ -192,14 +173,6 @@ vcpkg_cmake_config_fixup(CONFIG_PATH lib/cmake/onnxruntime PACKAGE_NAME onnxrunt
 vcpkg_copy_pdbs()
 vcpkg_fixup_pkgconfig() # pkg_check_modules(libonnxruntime)
 
-if("framework" IN_LIST FEATURES)
-    foreach(FRAMEWORK_NAME "onnxruntime.framework" "onnxruntime_objc.framework")
-        file(RENAME "${CURRENT_PACKAGES_DIR}/debug/bin/${FRAMEWORK_NAME}" "${CURRENT_PACKAGES_DIR}/debug/lib/${FRAMEWORK_NAME}")
-        file(RENAME "${CURRENT_PACKAGES_DIR}/bin/${FRAMEWORK_NAME}" "${CURRENT_PACKAGES_DIR}/lib/${FRAMEWORK_NAME}")
-    endforeach()
-    file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/bin" "${CURRENT_PACKAGES_DIR}/bin")
-endif()
-
 file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include")
 if(VCPKG_LIBRARY_LINKAGE STREQUAL "static")
     file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/bin" "${CURRENT_PACKAGES_DIR}/bin")