diff --git a/.github/workflows/android.yml b/.github/workflows/android.yml index 424a04c15a5..8d3031d8377 100644 --- a/.github/workflows/android.yml +++ b/.github/workflows/android.yml @@ -117,8 +117,10 @@ jobs: env: DEBIAN_FRONTEND: noninteractive run: | - sudo apt-get update - sudo apt-get install -y libncurses5 + pushd /usr/lib/x86_64-linux-gnu/ + sudo ln -s libncurses.so.6 libncurses.so.5 + sudo ln -s libtinfo.so.6 libtinfo.so.5 + popd wget -q https://dl.google.com/android/repository/android-ndk-r16b-linux-x86_64.zip -O $GITHUB_WORKSPACE/android-ndk-r16b-linux-x86_64.zip cd $GITHUB_WORKSPACE && unzip -q android-ndk-r16b-linux-x86_64.zip diff --git a/.github/workflows/pnnx.yml b/.github/workflows/pnnx.yml new file mode 100644 index 00000000000..3e4b354c3c2 --- /dev/null +++ b/.github/workflows/pnnx.yml @@ -0,0 +1,56 @@ +name: pnnx +on: + push: + branches: [master] + paths: + - '.github/workflows/pnnx.yml' + - 'tools/pnnx/**' + - '!tools/pnnx/README.md' + pull_request: + branches: [master] + paths: + - '.github/workflows/pnnx.yml' + - 'tools/pnnx/**' + - '!tools/pnnx/README.md' +concurrency: + group: pnnx-${{ github.ref }} + cancel-in-progress: true +permissions: + contents: read + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + + env: + PYTHONUSERBASE: ${{ github.workspace }}/torch + UseMultiToolTask: true + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: 3.12 + + - name: setup-pytorch + run: | + python3 -m pip config set global.break-system-packages true + pip3 install --user torch --index-url https://download.pytorch.org/whl/cpu + pip3 install --user numpy packaging + + - name: build-pnnx + run: | + cd tools/pnnx + mkdir build && cd build + cmake -DCMAKE_BUILD_TYPE=Release .. + cmake --build . --config Release -j 4 + + - name: quick-test + if: matrix.os != 'windows-latest' + run: | + cd tools/pnnx + cd build && ctest -C Release --output-on-failure -R test_nn_Conv diff --git a/.github/workflows/web-assembly.yml b/.github/workflows/web-assembly.yml index 1b5e8915a86..a650d6f62c4 100644 --- a/.github/workflows/web-assembly.yml +++ b/.github/workflows/web-assembly.yml @@ -62,7 +62,7 @@ jobs: - name: test-simd run: | cd build-simd - TESTS_EXECUTABLE_LOADER=node TESTS_EXECUTABLE_LOADER_ARGUMENTS="--experimental-wasm-simd" ctest --output-on-failure -j $(nproc) + TESTS_EXECUTABLE_LOADER=node ctest --output-on-failure -j $(nproc) - name: build-simd-omp run: | source emsdk/emsdk_env.sh @@ -73,4 +73,4 @@ jobs: - name: test-simd-omp run: | cd build-simd-omp - TESTS_EXECUTABLE_LOADER=node TESTS_EXECUTABLE_LOADER_ARGUMENTS="--experimental-wasm-simd;--experimental-wasm-threads" ctest --output-on-failure -j $(nproc) + TESTS_EXECUTABLE_LOADER=node ctest --output-on-failure -j $(nproc) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5851552b2a5..2b532d7c245 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -139,6 +139,7 @@ endif() ############################################## include(CheckCXXCompilerFlag) +set(CMAKE_TRY_COMPILE_CONFIGURATION release) # gnu inline assembly in clang msvc does not work actually if(NOT (CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))) @@ -523,7 +524,7 @@ else() check_cxx_source_compiles("#include \nint main() { __m512i _s, _a, _b; _s = _mm512_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI) set(CMAKE_REQUIRED_FLAGS "/arch:AVX512") - check_cxx_source_compiles("#include \nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16) + check_cxx_source_compiles("#include \nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }\n__m512i t(__m512 a) { __m256i _a = (__m256i)_mm512_cvtneps_pbh(a); return _mm512_inserti32x8(_mm512_castsi256_si512(_a), _a, 1); }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16) set(CMAKE_REQUIRED_FLAGS "/arch:AVX512") check_cxx_source_compiles("#include \nint main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16) @@ -560,7 +561,7 @@ else() check_cxx_source_compiles("#include \nint main() { __m512i _s, _a, _b; _s = _mm512_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI) set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16") - check_cxx_source_compiles("#include \nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16) + check_cxx_source_compiles("#include \nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }\n__m512i t(__m512 a) { __m256i _a = (__m256i)_mm512_cvtneps_pbh(a); return _mm512_inserti32x8(_mm512_castsi256_si512(_a), _a, 1); }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16) set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16") check_cxx_source_compiles("#include \nint main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16) @@ -595,7 +596,7 @@ else() check_cxx_source_compiles("#include \nint main() { __m512i _s, _a, _b; _s = _mm512_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI) set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16") - check_cxx_source_compiles("#include \nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16) + check_cxx_source_compiles("#include \nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }\n__m512i t(__m512 a) { __m256i _a = (__m256i)_mm512_cvtneps_pbh(a); return _mm512_inserti32x8(_mm512_castsi256_si512(_a), _a, 1); }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16) set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16") check_cxx_source_compiles("#include \nint main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16) diff --git a/src/layer/x86/gemm_int8.h b/src/layer/x86/gemm_int8.h index f9e0050fd55..132cf9d8cb9 100644 --- a/src/layer/x86/gemm_int8.h +++ b/src/layer/x86/gemm_int8.h @@ -2014,7 +2014,7 @@ static void pack_A_tile_fp32_to_int8(const Mat& A, Mat& AT, int i, int max_ii, i __m256i _pp = combine4x2_epi32(_pp0, _pp1); #if !__AVXVNNIINT8__ - _w_shift = _mm256_dpbusd_epi32(_w_shift, _v127, _pp); + _w_shift = _mm256_comp_dpbusd_epi32(_w_shift, _v127, _pp); #endif // !__AVXVNNIINT8__ _mm256_storeu_si256((__m256i*)pp, _pp); @@ -2108,7 +2108,7 @@ static void pack_A_tile_fp32_to_int8(const Mat& A, Mat& AT, int i, int max_ii, i __m256i _pp = combine4x2_epi32(_pp0, _pp1); #if !__AVXVNNIINT8__ - _w_shift = _mm256_dpbusd_epi32(_w_shift, _v127, _pp); + _w_shift = _mm256_comp_dpbusd_epi32(_w_shift, _v127, _pp); #endif // !__AVXVNNIINT8__ _mm256_storeu_si256((__m256i*)pp, _pp); diff --git a/src/layer/x86/x86_usability.h b/src/layer/x86/x86_usability.h index f25b06745e8..8628249e76a 100644 --- a/src/layer/x86/x86_usability.h +++ b/src/layer/x86/x86_usability.h @@ -1490,9 +1490,9 @@ static NCNN_FORCEINLINE __m256i float2bfloat_avx512(const __m512& v0) static NCNN_FORCEINLINE __m512i float2bfloat_avx512(const __m512& v0, const __m512& v1) { #if __AVX512BF16__ - __m256bh _v0 = _mm512_cvtneps_pbh(v0); - __m256bh _v1 = _mm512_cvtneps_pbh(v1); - __m512i _v = _mm512_inserti32x8(_mm512_castsi256_si512((__m256i)_v0), (__m256i)_v1, 1); + __m256i _v0 = (__m256i)_mm512_cvtneps_pbh(v0); + __m256i _v1 = (__m256i)_mm512_cvtneps_pbh(v1); + __m512i _v = _mm512_inserti32x8(_mm512_castsi256_si512(_v0), _v1, 1); #else __m512i _a = _mm512_castps_si512(v0); __m512i _b = _mm512_castps_si512(v1); diff --git a/tools/pnnx/CMakeLists.txt b/tools/pnnx/CMakeLists.txt index 65b15a3cf52..b09f4758ead 100644 --- a/tools/pnnx/CMakeLists.txt +++ b/tools/pnnx/CMakeLists.txt @@ -19,6 +19,11 @@ if(MSVC AND NOT CMAKE_VERSION VERSION_LESS "3.15") endif() endif() +if(POLICY CMP0094) + cmake_policy(SET CMP0094 NEW) # FindPython should return the first matching Python +endif() +set(Python_FIND_REGISTRY "LAST") +set(Python_FIND_FRAMEWORK "LAST") list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") include(PNNXPyTorch) diff --git a/tools/pnnx/src/CMakeLists.txt b/tools/pnnx/src/CMakeLists.txt index d56784646b5..b1ac6f5c024 100644 --- a/tools/pnnx/src/CMakeLists.txt +++ b/tools/pnnx/src/CMakeLists.txt @@ -625,6 +625,10 @@ if(PROTOBUF_FOUND) set_property(TARGET onnxruntime::onnxruntime APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_BINARY_DIR}) set_property(TARGET onnxruntime::onnxruntime APPEND PROPERTY INTERFACE_LINK_LIBRARIES protobuf::libprotobuf) endif() + + if(APPLE) + set_property(TARGET onnxruntime::onnxruntime APPEND PROPERTY INTERFACE_LINK_LIBRARIES "-framework CoreFoundation") + endif() endif() endif() diff --git a/tools/pnnx/src/pass_ncnn/torch_istft.cpp b/tools/pnnx/src/pass_ncnn/torch_istft.cpp index 3acbe654009..9d894aba7ab 100644 --- a/tools/pnnx/src/pass_ncnn/torch_istft.cpp +++ b/tools/pnnx/src/pass_ncnn/torch_istft.cpp @@ -106,10 +106,10 @@ static int detect_window_type(const std::vector& window_data) if (!NearlyEqual(window_data[i], 1.f, 0.001)) is_one = false; - if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * M_PI * i / winlen)), 0.001)) + if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * 3.14159265358979323846 * i / winlen)), 0.001)) is_hann = false; - if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * M_PI * i / winlen), 0.001)) + if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * 3.14159265358979323846 * i / winlen), 0.001)) is_hamming = false; } diff --git a/tools/pnnx/src/pass_ncnn/torch_stft.cpp b/tools/pnnx/src/pass_ncnn/torch_stft.cpp index 2b2296ccbc2..8993afeb12f 100644 --- a/tools/pnnx/src/pass_ncnn/torch_stft.cpp +++ b/tools/pnnx/src/pass_ncnn/torch_stft.cpp @@ -93,10 +93,10 @@ static int detect_window_type(const std::vector& window_data) if (!NearlyEqual(window_data[i], 1.f, 0.001)) is_one = false; - if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * M_PI * i / winlen)), 0.001)) + if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * 3.14159265358979323846 * i / winlen)), 0.001)) is_hann = false; - if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * M_PI * i / winlen), 0.001)) + if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * 3.14159265358979323846 * i / winlen), 0.001)) is_hamming = false; } diff --git a/tools/pnnx/src/pass_ncnn/torchaudio_F_inverse_spectrogram.cpp b/tools/pnnx/src/pass_ncnn/torchaudio_F_inverse_spectrogram.cpp index d712fcc2990..0c964790fb4 100644 --- a/tools/pnnx/src/pass_ncnn/torchaudio_F_inverse_spectrogram.cpp +++ b/tools/pnnx/src/pass_ncnn/torchaudio_F_inverse_spectrogram.cpp @@ -43,10 +43,10 @@ static int detect_window_type(const std::vector& window_data) if (!NearlyEqual(window_data[i], 1.f, 0.001)) is_one = false; - if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * M_PI * i / winlen)), 0.001)) + if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * 3.14159265358979323846 * i / winlen)), 0.001)) is_hann = false; - if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * M_PI * i / winlen), 0.001)) + if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * 3.14159265358979323846 * i / winlen), 0.001)) is_hamming = false; } diff --git a/tools/pnnx/src/pass_ncnn/torchaudio_F_spectrogram.cpp b/tools/pnnx/src/pass_ncnn/torchaudio_F_spectrogram.cpp index 04084ad0ba6..225fab7060e 100644 --- a/tools/pnnx/src/pass_ncnn/torchaudio_F_spectrogram.cpp +++ b/tools/pnnx/src/pass_ncnn/torchaudio_F_spectrogram.cpp @@ -43,10 +43,10 @@ static int detect_window_type(const std::vector& window_data) if (!NearlyEqual(window_data[i], 1.f, 0.001)) is_one = false; - if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * M_PI * i / winlen)), 0.001)) + if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * 3.14159265358979323846 * i / winlen)), 0.001)) is_hann = false; - if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * M_PI * i / winlen), 0.001)) + if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * 3.14159265358979323846 * i / winlen), 0.001)) is_hamming = false; }