diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml deleted file mode 100644 index 8626f9d609ed0e..00000000000000 --- a/.ci/azure/linux.yml +++ /dev/null @@ -1,590 +0,0 @@ -trigger: - branches: - include: - - 'master' - - 'releases/*' - paths: - exclude: - - '*/docs/*' - - 'docs/*' - - '*/*.md' - - '*.md' - - '*/layer_tests_summary/*' - - '*/conformance/*' - -pr: - branches: - include: - - 'master' - - 'releases/*' - paths: - exclude: - - '*/docs/*' - - 'docs/*' - - '*/*.md' - - '*.md' - - '*/layer_tests_summary/*' - - '*/conformance/*' - -resources: - repositories: - - repository: openvino_contrib - type: github - endpoint: openvinotoolkit - name: openvinotoolkit/openvino_contrib - ref: master - -variables: - - group: github - -jobs: -- job: Lin - strategy: - matrix: - # Dynamic: - # CMAKE_BUILD_SHARED_LIBS: 'ON' - # PYTHON_STATIC_ARGS: - # CMAKE_CPACK_GENERATOR: - # SAMPLES_INSTALL_DIR: $(INSTALL_DIR)/samples - # PYTHON_SAMPLES_INSTALL_DIR: $(SAMPLES_INSTALL_DIR)/python - # RUN_PREFIX: . $(SETUPVARS) -pyver 3.8 && - # Debian: - # CMAKE_BUILD_SHARED_LIBS: 'ON' - # PYTHON_STATIC_ARGS: - # CMAKE_CPACK_GENERATOR: 'DEB' - # SAMPLES_INSTALL_DIR: /usr/share/openvino/samples - # PYTHON_SAMPLES_INSTALL_DIR: $(INSTALL_DIR)/share/openvino/samples/python - # RUN_PREFIX: LD_LIBRARY_PATH=$(INSTALL_TEST_DIR):$(INSTALL_DIR)/opencv/lib:$LD_LIBRARY_PATH - Static: - CMAKE_BUILD_SHARED_LIBS: 'OFF' - PYTHON_STATIC_ARGS: -m "not dynamic_library" - CMAKE_CPACK_GENERATOR: "TGZ" - SAMPLES_INSTALL_DIR: $(INSTALL_DIR)/samples - PYTHON_SAMPLES_INSTALL_DIR: $(SAMPLES_INSTALL_DIR)/python - RUN_PREFIX: . $(SETUPVARS) && - maxParallel: '2' - - # About 150% of total time - timeoutInMinutes: '180' - - pool: - name: LIN_VMSS_VENV_F16S_U20_WU2 - - variables: - system.debug: true - VSTS_HTTP_RETRY: 5 - VSTS_HTTP_TIMEOUT: 200 - BUILD_TYPE: Release - REPO_DIR: $(Build.Repository.LocalPath) - OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)/../openvino_contrib - WORK_DIR: $(Pipeline.Workspace)/_w - BUILD_DIR: $(WORK_DIR)/build - BUILD_SAMPLES_DIR: $(WORK_DIR)/build_samples - BUILD_LAYER_TESTS_DIR: $(WORK_DIR)/build_layer_tests - BUILD_SAMPLES_TESTS_DIR: $(WORK_DIR)/build_samples_tests - INSTALL_DIR: $(WORK_DIR)/install_pkg - INSTALL_TEST_DIR: $(INSTALL_DIR)/tests - LAYER_TESTS_DIR: $(INSTALL_TEST_DIR)/layer_tests - SETUPVARS: $(INSTALL_DIR)/setupvars.sh - TMP_DIR: /mnt/tmp - SHARE_DIR: /mount/cinfsshare/onnxtestdata - CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux - CMAKE_VERSION: 3.24.0 - BUILD_PYTHON: $(WORK_DIR)/build_python - INSTALL_PYTHON: $(INSTALL_OPENVINO)/extras/python - LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib - OV_PYTHON_VERSION: 3.11.2 # Full version of Python its required for LD_LIBRARY_PATH. More details https://github.com/microsoft/azure-pipelines-tool-lib/blob/master/docs/overview.md#tool-cache - - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '$(OV_PYTHON_VERSION)' # Setting only major & minor version will download latest release from GH repo example 3.10 will be 3.10.10. - addToPath: true - disableDownloadFromRegistry: false - architecture: 'x64' - githubToken: $(auth_token) - displayName: Setup Python 3.11 - name: setupPython - - bash: | - #!/bin/bash - python -V - - - script: | - curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01" - whoami - uname -a - echo Python3 info ; which python3 ; python3 --version - echo Java info ; which java ; java -version - echo gcc info ; which gcc ; gcc --version - echo cmake info ; which cmake ; cmake --version - lsb_release - env - cat /proc/cpuinfo - cat /proc/meminfo - cat /etc/fstab - vmstat -s - df - lsblk -o NAME,HCTL,SIZE,MOUNTPOINT | grep -i "sd" - free -h - echo TargetBranch: $(System.PullRequest.TargetBranch) - echo SourceBranch: $(Build.SourceBranch) - displayName: 'System info' - - - script: | - set -e - rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR) - rm -rf $(BUILD_DIR) ; mkdir $(BUILD_DIR) - rm -rf $(BUILD_SAMPLES_DIR) ; mkdir $(BUILD_SAMPLES_DIR) - sudo rm -rf $(TMP_DIR) ; sudo mkdir $(TMP_DIR) ; sudo chmod 777 -R $(TMP_DIR) - sudo mkdir -p $(SHARE_DIR) - sudo apt --assume-yes update && sudo apt --assume-yes install nfs-common - sudo mount -vvv -t nfs cinfsshare.file.core.windows.net:/cinfsshare/onnxtestdata $(SHARE_DIR) -o vers=4,minorversion=1,sec=sys - mkdir -p $(CCACHE_DIR) - displayName: 'Make dir' - - - checkout: self - clean: 'true' - submodules: 'true' - path: openvino - - - checkout: openvino_contrib - clean: 'true' - submodules: 'true' - path: openvino_contrib - - - script: | - set -e - sudo -E $(REPO_DIR)/install_build_dependencies.sh - # Move jdk into contrib - # 'clang' compiler is used as a default compiler - sudo apt --assume-yes install openjdk-11-jdk libbz2-dev clang - # For Python API - python3 -m pip install --upgrade pip - python3 -m pip install -r $(REPO_DIR)/src/bindings/python/wheel/requirements-dev.txt - python3 -m pip install -r $(REPO_DIR)/src/bindings/python/requirements.txt - # For running Python API tests - python3 -m pip install -r $(REPO_DIR)/src/bindings/python/src/compatibility/openvino/requirements-dev.txt - # For running Paddle frontend unit tests - # TODO Reenable PDPD after paddlepaddle==2.5.0 with compliant protobuf is released (ticket 95904) - #python3 -m pip install -r $(REPO_DIR)/src/frontends/paddle/tests/requirements.txt - # For running ONNX frontend unit tests - python3 -m pip install -r $(REPO_DIR)/src/frontends/onnx/tests/requirements.txt - # For running TensorFlow frontend unit tests - python3 -m pip install -r $(REPO_DIR)/src/frontends/tensorflow/tests/requirements.txt - # For running torchvision -> OpenVINO preprocess converter - python3 -m pip install -r $(REPO_DIR)/src/bindings/python/src/openvino/preprocess/torchvision/requirements.txt - # For MO unit tests - python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_mxnet.txt - python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_caffe.txt - python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_kaldi.txt - python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_onnx.txt - python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_tf2.txt - python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_dev.txt - # Speed up build - sudo apt -y --no-install-recommends install unzip - wget https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip - unzip ninja-linux.zip - sudo cp -v ninja /usr/local/bin/ - displayName: 'Install dependencies' - - - script: | - curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01" - whoami - uname -a - echo Python3 info ; which python3 ; python3 --version - echo Python info ; which python ; python --version - echo Java info ; which java ; java -version - echo gcc info ; which gcc ; gcc --version - echo cmake info ; which cmake ; cmake --version - lsb_release - env - cat /proc/cpuinfo - cat /proc/meminfo - cat /etc/fstab - vmstat -s - df - lsblk -o NAME,HCTL,SIZE,MOUNTPOINT | grep -i "sd" - free -h - echo TargetBranch: $(System.PullRequest.TargetBranch) - echo SourceBranch: $(Build.SourceBranch) - displayName: 'System info' - - - task: CMake@1 - inputs: - # CMake must get Python 3.x version by default - cmakeArgs: > - -GNinja - -DCMAKE_VERBOSE_MAKEFILE=ON - -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) - -DCMAKE_COMPILE_WARNING_AS_ERROR=ON - -DENABLE_PYTHON=ON - -DBUILD_SHARED_LIBS=$(CMAKE_BUILD_SHARED_LIBS) - -DENABLE_ONEDNN_FOR_GPU=$(CMAKE_BUILD_SHARED_LIBS) - -DENABLE_TESTS=ON - -DENABLE_OV_ONNX_FRONTEND=ON - -DENABLE_FASTER_BUILD=ON - -DENABLE_STRICT_DEPENDENCIES=OFF - -DOPENVINO_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules - -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose" - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - -DCMAKE_C_COMPILER_LAUNCHER=ccache - -DCMAKE_CXX_LINKER_LAUNCHER=ccache - -DCMAKE_C_LINKER_LAUNCHER=ccache - -DCMAKE_CXX_COMPILER=clang++ - -DCMAKE_C_COMPILER=clang - -DENABLE_SYSTEM_SNAPPY=ON - -DENABLE_SYSTEM_TBB=ON - -DCPACK_GENERATOR=$(CMAKE_CPACK_GENERATOR) - -DBUILD_nvidia_plugin=OFF - -S $(REPO_DIR) - -B $(BUILD_DIR) - displayName: 'Cmake OpenVINO' - - - script: ls -alR $(REPO_DIR)/temp/ - displayName: 'List temp SDKs' - - - script: ccache --zero-stats --max-size=50G --show-config - displayName: 'Clean ccache stats' - - - script: cmake --build $(BUILD_DIR) --parallel --config $(BUILD_TYPE) - env: - CCACHE_DIR: $(CCACHE_DIR) - CCACHE_TEMPDIR: $(TMP_DIR)/ccache - CCACHE_BASEDIR: $(Pipeline.Workspace) - CCACHE_MAXSIZE: 50G - displayName: 'Build Lin' - - - script: ccache --show-stats - displayName: 'Show ccache stats' - - - script: ls -alR $(REPO_DIR)/bin/ - displayName: 'List bin files' - - - task: CMake@1 - inputs: - cmakeArgs: > - -GNinja - -S $(REPO_DIR)/tests/layer_tests - -B $(BUILD_LAYER_TESTS_DIR) - displayName: 'Cmake Layer Tests' - - - script: cmake --build $(BUILD_LAYER_TESTS_DIR) --parallel --config $(BUILD_TYPE) - displayName: 'Build Layer Tests' - - - script: sudo apt-get remove libtbb2 -y - displayName: 'Remove debian dependencies' - condition: eq(variables['CMAKE_CPACK_GENERATOR'], 'DEB') - - - script: cmake -DCOMPONENT=python_wheels -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P $(BUILD_DIR)/cmake_install.cmake - displayName: 'Install wheel packages' - - - script: cmake -DCOMPONENT=tests -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P $(BUILD_LAYER_TESTS_DIR)/cmake_install.cmake - displayName: 'Install Layer Tests' - - - script: python3 -m pip install openvino-dev --find-links=$(INSTALL_DIR)/tools - displayName: 'Install python wheels' - - - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P $(BUILD_DIR)/cmake_install.cmake - displayName: 'Install tests' - - - script: ls -alR $(INSTALL_DIR) - displayName: 'List install test files' - - - script: | - set -e - sudo apt-get install libtbb-dev libpugixml-dev -y - cmake --build $(BUILD_DIR) --target package --parallel - condition: eq(variables['CMAKE_CPACK_GENERATOR'], 'DEB') - displayName: 'Build Debian packages' - - - script: | - set -e - # install debian packages from previous release - sudo apt-get install --no-install-recommends gnupg wget -y - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - echo "deb https://apt.repos.intel.com/openvino/2022 focal main" | sudo tee /etc/apt/sources.list.d/intel-openvino-2022.list - sudo apt-get update -o Dir::Etc::sourcelist=/etc/apt/sources.list.d/intel-openvino-2022.list - sudo apt-get install openvino -y - # install our local one and make sure the conflicts are resolved - sudo apt-get install --no-install-recommends dpkg-dev -y - rm -r _CPack_Packages - dpkg-scanpackages . /dev/null | gzip -9c > Packages.gz - echo "deb [trusted=yes] file:$(BUILD_DIR) ./" | sudo tee /etc/apt/sources.list.d/openvino-local.list - sudo apt-get update -o Dir::Etc::sourcelist=/etc/apt/sources.list.d/openvino-local.list - sudo apt-get install openvino -y - workingDirectory: $(BUILD_DIR) - condition: eq(variables['CMAKE_CPACK_GENERATOR'], 'DEB') - displayName: 'Install Debian packages' - - - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P $(BUILD_DIR)/cmake_install.cmake - condition: ne(variables['CMAKE_CPACK_GENERATOR'], 'DEB') - displayName: 'Install openvino' - - - script: ls -alR $(INSTALL_DIR) - condition: ne(variables['CMAKE_CPACK_GENERATOR'], 'DEB') - displayName: 'List install files' - - - script: $(SAMPLES_INSTALL_DIR)/cpp/build_samples.sh -i $(INSTALL_DIR) -b $(BUILD_DIR)/cpp_samples - displayName: 'Build cpp samples - gcc' - - - script: $(SAMPLES_INSTALL_DIR)/cpp/build_samples.sh -b $(BUILD_DIR)/cpp_samples_clang - env: - CC: clang - CXX: clang++ - displayName: 'Build cpp samples - clang' - - - script: $(SAMPLES_INSTALL_DIR)/c/build_samples.sh -i $(INSTALL_DIR) -b $(BUILD_DIR)/c_samples - env: - VERBOSE: 1 - displayName: 'Build c samples' - - - script: rm -fr $(BUILD_DIR) - displayName: 'Clean build dir' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_core_unit_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-OVCoreUT.xml - displayName: 'OV Core UT' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_inference_functional_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-InferenceFunc.xml - displayName: 'Inference Func Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_inference_unit_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-InferenceUnit.xml - displayName: 'Inference Unit Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_proxy_plugin_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-OVProxyTests.xml - displayName: 'OV Proxy Plugin Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_hetero_unit_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-OVHeteroUnitTests.xml - displayName: 'OV Hetero Unit Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-OVHeteroFuncTests.xml - displayName: 'OV Hetero Func Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_conditional_compilation_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ConditionalCompilation.xml - displayName: 'Conditional Compilation Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_ir_frontend_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-IRFrontend.xml - displayName: 'IR Frontend Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_onnx_frontend_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ONNXFrontend.xml - displayName: 'ONNX Frontend Tests' - - # TODO Reenable PDPD after paddlepaddle==2.5.1 with compliant protobuf is released (ticket 95904) - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/paddle_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Paddle.xml - displayName: 'Paddle Frontend UT' - enabled: 'false' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_tensorflow_frontend_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Tensorflow.xml - displayName: 'TensorFlow Frontend Unit Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_tensorflow_common_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-TensorflowCommon.xml - displayName: 'TensorFlow Common Unit Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_tensorflow_lite_frontend_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-TensorflowLite.xml - displayName: 'TensorFlow Lite Frontend Unit Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_lp_transformations_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-LpTransformations.xml - displayName: 'Low Precision Transformations Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_transformations_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Transformations.xml - displayName: 'Transformations Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_legacy_transformations_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-LegacyTransformations.xml - displayName: 'Legacy Transformations Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_util_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-CommonUtilTests.xml - displayName: 'Common Utils Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-InferenceEngineUnitTests.xml - displayName: 'IE UT old' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_snippets_func_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_snippets_func_tests.xml - displayName: 'Snippets Func Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_cpu_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_cpu_unit_tests.xml - displayName: 'Intel CPU Unit Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_gna_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_gna_unit_tests.xml - displayName: 'GNA UT' - enabled: 'false' # TODO: fix - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_auto_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_unit_tests.xml - displayName: 'AUTO UT' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_auto_func_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_func_tests.xml - displayName: 'AUTO FuncTests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_auto_batch_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_batch_unit_tests.xml - displayName: 'AutoBatch UT' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_template_func_tests --gtest_filter=*smoke* --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-templateFuncTests.xml - displayName: 'TEMPLATE FuncTests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/InferenceEngineCAPITests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-InferenceEngineCAPITests.xml - displayName: 'IE CAPITests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_capi_test --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_capi_test.xml - displayName: 'OV CAPITests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_auto_batch_func_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_batch_func_tests.xml - displayName: 'AutoBatch FuncTests' - - # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time - - script: | - $(RUN_PREFIX) python3 -m pytest -s $(INSTALL_TEST_DIR)/pyngraph $(PYTHON_STATIC_ARGS) \ - --junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \ - --ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_zoo_models.py \ - --ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_backend.py - displayName: 'nGraph and IE Python Bindings Tests' - - - script: | - set -e - export LD_LIBRARY_PATH=$INSTALL_TEST_DIR:$LD_LIBRARY_PATH - $(RUN_PREFIX) python3 -m pytest -sv $(INSTALL_TEST_DIR)/pyopenvino $(PYTHON_STATIC_ARGS) \ - --junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \ - --ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_utils/test_utils.py - displayName: 'Python API 2.0 Tests' - - # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time - - script: | - python3 -m pytest -sv $(REPO_DIR)/src/frontends/onnx/tests $(PYTHON_STATIC_ARGS) \ - --ignore=$(REPO_DIR)/src/frontends/onnx/tests/test_python/test_zoo_models.py \ - --ignore=$(REPO_DIR)/src/frontends/onnx/tests/test_python/test_backend.py -v - displayName: 'ONNX Frontend Python Tests' - - - script: python3 -m pytest -s $(INSTALL_TEST_DIR)/mo/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-ModelOptimizer.xml - displayName: 'Model Optimizer UT' - - - script: python3 -m pytest -s $(REPO_DIR)/tools/ovc/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-OpenVinoConversion.xml - displayName: 'OpenVino Conversion UT' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_cpu_func_tests --gtest_filter=*smoke* --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_cpu_func_tests.xml - displayName: 'CPU FuncTests' - condition: and(succeeded(), eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'OFF')) - - - task: CMake@1 - inputs: - cmakeArgs: > - -GNinja - -S $(REPO_DIR)/tests/samples_tests - -B $(BUILD_SAMPLES_TESTS_DIR) - displayName: 'CMake Samples Tests' - - - script: cmake -DCOMPONENT=tests -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P $(BUILD_SAMPLES_TESTS_DIR)/cmake_install.cmake - displayName: 'Install Samples Tests' - - - script: python3 -m pip install -r $(INSTALL_TEST_DIR)/smoke_tests/requirements.txt - displayName: 'Install dependencies for samples smoke tests' - - - script: | - set -e - export PATH=$HOME/.local/bin:$PATH - export LD_LIBRARY_PATH=$IE_APP_PATH:$LD_LIBRARY_PATH - $(RUN_PREFIX) python3 -m pytest $(INSTALL_TEST_DIR)/smoke_tests/ \ - --env_conf $(INSTALL_TEST_DIR)/smoke_tests/env_config.yml \ - -s --junitxml=$(INSTALL_TEST_DIR)/TEST-SamplesSmokeTests.xml - env: - IE_APP_PATH: $(INSTALL_DIR)/samples_bin - IE_APP_PYTHON_PATH: $(PYTHON_SAMPLES_INSTALL_DIR)/ - SHARE: $(INSTALL_TEST_DIR)/smoke_tests/samples_smoke_tests_data/ - WORKSPACE: $(INSTALL_DIR) - displayName: 'Samples Smoke Tests' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/pytorch_tests/ -m precommit --junitxml=$(INSTALL_TEST_DIR)/TEST-pytorch.xmlTEST - env: - PYTHONPATH: $(REPO_DIR)/tools/mo/:$(LAYER_TESTS_DIR) - TEST_DEVICE: CPU - displayName: 'PyTorch Layer Tests' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/tensorflow_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=$(INSTALL_TEST_DIR)/TEST-tf_fe.xmlTEST - env: - PYTHONPATH: $(REPO_DIR)/tools/mo/:$(LAYER_TESTS_DIR) - TEST_DEVICE: CPU - displayName: 'TensorFlow 1 Layer Tests - TF FE' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/tensorflow2_keras_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=$(INSTALL_TEST_DIR)/TEST-tf2_fe.xmlTEST - env: - PYTHONPATH: $(REPO_DIR)/tools/mo/:$(LAYER_TESTS_DIR) - TEST_DEVICE: CPU - displayName: 'TensorFlow 2 Layer Tests - TF FE' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/jax_tests/ -m precommit --junitxml=$(INSTALL_TEST_DIR)/TEST-jax.xmlTEST - env: - PYTHONPATH: $(LAYER_TESTS_DIR) - TEST_DEVICE: CPU - displayName: 'JAX Layer Tests - TF FE' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/tensorflow_tests/test_tf_Roll.py --ir_version=10 --junitxml=$(INSTALL_TEST_DIR)/TEST-tf_Roll.xmlTEST - env: - PYTHONPATH: $(LAYER_TESTS_DIR) - displayName: 'TensorFlow 1 Layer Tests - Legacy FE' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/tensorflow2_keras_tests/test_tf2_keras_activation.py --ir_version=11 --junitxml=./TEST-tf2_Activation.xmlTEST -k "sigmoid" - env: - PYTHONPATH: $(LAYER_TESTS_DIR) - TEST_DEVICE: CPU - displayName: 'TensorFlow 2 Layer Tests - Legacy FE' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/tensorflow_lite_tests/ --junitxml=$(INSTALL_TEST_DIR)/TEST-tfl_fe.xmlTEST - env: - PYTHONPATH: $(REPO_DIR)/tools/mo/:$(LAYER_TESTS_DIR) - TEST_DEVICE: CPU - displayName: 'TensorFlow Lite Layer Tests - TFL FE' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/ovc_python_api_tests/ --junitxml=./TEST-test_ovc_convert.xmlTEST - env: - PYTHONPATH: $(LAYER_TESTS_DIR) - TEST_DEVICE: CPU - displayName: 'OVC Python API Tests' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/mo_python_api_tests/ --junitxml=./TEST-test_mo_convert.xmlTEST - env: - PYTHONPATH: $(LAYER_TESTS_DIR) - TEST_DEVICE: CPU - displayName: 'MO Python API Tests' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/py_frontend_tests --junitxml=./TEST-test_py_fontend.xml - displayName: 'Python Frontend tests' - - - task: PublishTestResults@2 - condition: always() - inputs: - testResultsFormat: 'JUnit' # Options: JUnit, NUnit, VSTest, xUnit, cTest - testResultsFiles: '**/TEST-*.xml' - #searchFolder: '$(BUILD_DIR)' - mergeTestResults: false # Optional - #failTaskOnFailedTests: false # Optional - #testRunTitle: 'Pre/Post-Commit' # Optional - buildPlatform: 'x64' # Optional - buildConfiguration: 'Linux' # Optional - #publishRunAttachments: true # Optional diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 3f2178a1c681e5..3727c4d88f6e8b 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -94,6 +94,7 @@ /tests/layer_tests/tensorflow_tests @openvinotoolkit/openvino-tf-frontend-maintainers /tests/layer_tests/jax_tests @openvinotoolkit/openvino-tf-frontend-maintainers /tests/model_hub_tests @openvinotoolkit/openvino-tf-frontend-maintainers +/tests/model_hub_tests/torch_tests @openvinotoolkit/openvino-pytorch-frontend-maintainers # Tools: /tools/ @openvinotoolkit/openvino-tools-maintainers diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml index e4360eb08d3850..fb4b36c69f5a55 100644 --- a/.github/workflows/android_arm64.yml +++ b/.github/workflows/android_arm64.yml @@ -35,11 +35,12 @@ jobs: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 volumes: - /mount/caches:/mount/caches + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING env: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input CMAKE_GENERATOR: 'Ninja' - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache OPENVINO_REPO: '/__w/openvino/openvino/openvino' VCPKG_ROOT: '/__w/openvino/openvino/vcpkg' BUILD_DIR: '/__w/openvino/openvino/build' @@ -49,10 +50,7 @@ jobs: ANDROID_ABI_CONFIG: arm64-v8a VCPKG_DEFAULT_BINARY_CACHE: '/mount/caches/ccache/android_arm64/vcpkg_cache' VCPKG_FORCE_SYSTEM_BINARIES: '1' - CCACHE_DIR: '/mount/caches/ccache/android_arm64' - CCACHE_TEMPDIR: '/__w/openvino/openvino/ccache_temp' - CCACHE_COMPILERCHECK: 'content' - CCACHE_MAXSIZE: 50G + SCCACHE_AZURE_KEY_PREFIX: android_arm64 steps: - name: Install git run: apt-get update && apt-get install --assume-yes --no-install-recommends git ca-certificates @@ -102,6 +100,11 @@ jobs: unzip commandlinetools-linux-7583922_latest.zip echo "yes" | ./cmdline-tools/bin/sdkmanager --sdk_root=${ANDROID_TOOLS} --install "ndk-bundle" "platform-tools" "platforms;android-${{ env.ANDROID_SDK_VERSION }}" + - name: Install sccache + uses: mozilla-actions/sccache-action@v0.0.3 + with: + version: "v0.5.4" + # # Build # @@ -138,10 +141,10 @@ jobs: -B ${BUILD_DIR} - name: Clean ccache stats - run: ccache --zero-stats --show-config + run: sccache --zero-stats - name: Cmake - build run: cmake --build ${BUILD_DIR} --parallel - name: Show ccache stats - run: ccache --show-stats + run: ${SCCACHE_PATH} --show-stats diff --git a/.github/workflows/fedora.yml b/.github/workflows/fedora.yml index a554dfa98b462b..f398b1a3623fc1 100644 --- a/.github/workflows/fedora.yml +++ b/.github/workflows/fedora.yml @@ -36,19 +36,18 @@ jobs: image: fedora:33 volumes: - /mount/caches:/mount/caches + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING env: CMAKE_BUILD_TYPE: 'Release' CMAKE_GENERATOR: 'Ninja' - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache GITHUB_WORKSPACE: '/__w/openvino/openvino' OPENVINO_REPO: /__w/openvino/openvino/openvino INSTALL_DIR: /__w/openvino/openvino/openvino_install INSTALL_TEST_DIR: /__w/openvino/openvino/tests_install BUILD_DIR: /__w/openvino/openvino/openvino_build - CCACHE_DIR: /mount/caches/ccache/fedora33_x86_64_Release - CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp - CCACHE_MAXSIZE: 50G + SCCACHE_AZURE_KEY_PREFIX: fedora33_x86_64_Release steps: - name: Install git run: yum update -y && yum install -y git @@ -66,6 +65,11 @@ jobs: - name: Install build dependencies run: bash ${OPENVINO_REPO}/install_build_dependencies.sh + - name: Install sccache + uses: mozilla-actions/sccache-action@v0.0.3 + with: + version: "v0.5.4" + - name: Install python dependencies run: | python3 -m pip install -U pip @@ -112,8 +116,8 @@ jobs: - name: Cmake build - OpenVINO run: cmake --build ${BUILD_DIR} --parallel --verbose - - name: Show ccache stats - run: ccache --show-stats + - name: Show sccache stats + run: ${SCCACHE_PATH} --show-stats - name: Cmake install - OpenVINO run: | diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 733dfed4c09d14..9050ab3d161509 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -43,12 +43,13 @@ jobs: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 volumes: - /mount/caches:/mount/caches + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING env: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input CMAKE_BUILD_TYPE: 'Release' CMAKE_GENERATOR: 'Ninja Multi-Config' - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache GITHUB_WORKSPACE: '/__w/openvino/openvino' OPENVINO_REPO: /__w/openvino/openvino/openvino OPENVINO_CONTRIB_REPO: /__w/openvino/openvino/openvino_contrib @@ -56,9 +57,7 @@ jobs: INSTALL_TEST_DIR: /__w/openvino/openvino/tests_install DEVELOPER_PACKAGE_DIR: /__w/openvino/openvino/developer_package_install BUILD_DIR: /__w/openvino/openvino/openvino_build - CCACHE_DIR: /mount/caches/ccache/ubuntu20_x86_64_Release - CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp - CCACHE_MAXSIZE: 50G + SCCACHE_AZURE_KEY_PREFIX: ubuntu20_x86_64_Release ONNX_RUNTIME_UTILS: /__w/openvino/openvino/openvino/.ci/azure/ci_utils/onnxruntime steps: @@ -92,6 +91,11 @@ jobs: # libssl1.1 - 'python3 -m pip' in self-hosted runner apt install --assume-yes --no-install-recommends default-jdk libssl1.1 + - name: Install sccache + uses: mozilla-actions/sccache-action@v0.0.3 + with: + version: "v0.5.4" + - uses: actions/setup-python@v4 with: python-version: ${{ env.PYTHON_VERSION }} @@ -143,14 +147,14 @@ jobs: -S ${OPENVINO_REPO} \ -B ${BUILD_DIR} - - name: Clean ccache stats - run: ccache --zero-stats --show-config + - name: Clean sccache stats + run: sccache --zero-stats - name: Cmake build - OpenVINO run: cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} - - name: Show ccache stats - run: ccache --show-stats + - name: Show sccache stats + run: ${SCCACHE_PATH} --show-stats - name: Cmake install - OpenVINO run: | @@ -497,16 +501,15 @@ jobs: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 volumes: - /mount/caches:/mount/caches + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING env: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input CMAKE_GENERATOR: 'Ninja Multi-Config' - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache OPENVINO_REPO: /__w/openvino/openvino/openvino INSTALL_DIR: /__w/openvino/openvino/install - CCACHE_DIR: /mount/caches/ccache/ubuntu20_x86_64_onnxruntime - CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp - CCACHE_MAXSIZE: 50G + SCCACHE_AZURE_KEY_PREFIX: ubuntu20_x86_64_onnxruntime ONNX_RUNTIME_REPO: /__w/openvino/openvino/onnxruntime ONNX_RUNTIME_UTILS: /__w/openvino/openvino/install/onnxruntime ONNX_RUNTIME_BUILD_DIR: /__w/openvino/openvino/onnxruntime/build @@ -561,6 +564,11 @@ jobs: - name: Install Build Dependencies run: bash ${OPENVINO_REPO}/install_build_dependencies.sh + - name: Install sccache + uses: mozilla-actions/sccache-action@v0.0.3 + with: + version: "v0.5.4" + - name: Build Lin ONNX Runtime run: | source ${INSTALL_DIR}/setupvars.sh @@ -576,6 +584,9 @@ jobs: env: CXXFLAGS: "-Wno-error=deprecated-declarations" + - name: Show sccache stats + run: ${SCCACHE_PATH} --show-stats + - name: Run onnxruntime_test_all run: | source ${INSTALL_DIR}/setupvars.sh @@ -760,6 +771,18 @@ jobs: ${INSTALL_TEST_DIR}/ov_cpu_unit_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-CPUUnitTests.xml + - name: SubgraphsDumper tests + run: | + source ${INSTALL_DIR}/setupvars.sh + ${INSTALL_TEST_DIR}/subgraphsDumperTests --gtest_print_time=1 \ + --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-SubgraphsDumperTests.xml + + - name: Template OpImpl tests + run: | + source ${INSTALL_DIR}/setupvars.sh + ${INSTALL_TEST_DIR}/conformanceTests --gtest_print_time=1 --device=TEMPLATE --gtest_filter=*OpImpl*\ + --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OpImplTests.xml + - name: AUTO unit tests run: | source ${INSTALL_DIR}/setupvars.sh @@ -1334,21 +1357,20 @@ jobs: image: openvinogithubactions.azurecr.io/dockerhub/nvidia/cuda:11.8.0-runtime-ubuntu20.04 volumes: - /mount/caches:/mount/caches + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING env: CMAKE_BUILD_TYPE: 'Release' CMAKE_GENERATOR: 'Ninja Multi-Config' - CMAKE_CUDA_COMPILER_LAUNCHER: ccache - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CUDA_COMPILER_LAUNCHER: sccache + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache INSTALL_DIR: /__w/openvino/openvino/install OPENVINO_DEVELOPER_PACKAGE: /__w/openvino/openvino/install/developer_package OPENVINO_REPO: /__w/openvino/openvino/openvino OPENVINO_CONTRIB_REPO: /__w/openvino/openvino/openvino_contrib NVIDIA_BUILD_DIR: /__w/openvino/openvino/nvidia_plugin_build DEBIAN_FRONTEND: 'noninteractive' - CCACHE_DIR: /mount/caches/ccache/ubuntu20_x86_64_Release - CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp - CCACHE_MAXSIZE: 50G + SCCACHE_AZURE_KEY_PREFIX: ubuntu20_x86_64_Release steps: - name: Install Prerequisites @@ -1405,7 +1427,12 @@ jobs: - name: Install build dependencies run: | ${OPENVINO_REPO}/install_build_dependencies.sh - apt -y --no-install-recommends install software-properties-common + apt -y --no-install-recommends install software-properties-common curl + + - name: Install sccache + uses: mozilla-actions/sccache-action@v0.0.3 + with: + version: "v0.5.4" - name: Install CUDA run: | @@ -1440,4 +1467,4 @@ jobs: cmake --build ${NVIDIA_BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --verbose -- ov_nvidia_func_tests ov_nvidia_unit_tests - name: Show ccache stats - run: ccache --show-stats + run: ${SCCACHE_PATH} --show-stats diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index c8fb34cca85244..15acba9d441696 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -39,21 +39,20 @@ jobs: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:22.04 volumes: - /mount/caches:/mount/caches + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING env: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input CMAKE_BUILD_TYPE: 'Release' CMAKE_GENERATOR: 'Ninja Multi-Config' - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache GITHUB_WORKSPACE: '/__w/openvino/openvino' OPENVINO_REPO: /__w/openvino/openvino/openvino INSTALL_DIR: /__w/openvino/openvino/openvino_install BUILD_DIR: /__w/openvino/openvino/openvino_build SELECTIVE_BUILD_STAT_DIR: /__w/openvino/openvino/selective_build_stat MODELS_PATH: /__w/openvino/openvino/testdata - CCACHE_DIR: /mount/caches/ccache/ubuntu22_x86_64_itt_clang_Release - CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp - CCACHE_MAXSIZE: 20G + SCCACHE_AZURE_KEY_PREFIX: ubuntu22_x86_64_itt_clang_Release steps: - name: Install git @@ -88,6 +87,11 @@ jobs: update-alternatives --install /usr/bin/cc cc /usr/bin/clang 100 update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang++ 100 + - name: Install sccache + uses: mozilla-actions/sccache-action@v0.0.3 + with: + version: "v0.5.4" + - uses: actions/setup-python@v4 with: python-version: ${{ env.PYTHON_VERSION }} @@ -141,8 +145,8 @@ jobs: cmake --build ${BUILD_DIR} --parallel 8 --config ${{ env.CMAKE_BUILD_TYPE }} cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --target sea_itt_lib - - name: Show ccache stats - run: ccache --show-stats + - name: Show sccache stats + run: ${SCCACHE_PATH} --show-stats - name: Cmake install - OpenVINO run: cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -P ${BUILD_DIR}/cmake_install.cmake @@ -205,18 +209,17 @@ jobs: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:22.04 volumes: - /mount/caches:/mount/caches + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING env: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache GITHUB_WORKSPACE: '/__w/openvino/openvino' OPENVINO_REPO: /__w/openvino/openvino/openvino BUILD_DIR: /__w/openvino/openvino/openvino_build SELECTIVE_BUILD_STAT_DIR: /__w/openvino/openvino/selective_build_stat MODELS_PATH: /__w/openvino/openvino/testdata - CCACHE_DIR: /mount/caches/ccache/ubuntu22_x86_64_cc_Release - CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp - CCACHE_MAXSIZE: 20G + SCCACHE_AZURE_KEY_PREFIX: ubuntu22_x86_64_cc_Release steps: - name: Install git @@ -252,6 +255,10 @@ jobs: - name: Install build dependencies run: bash ${OPENVINO_REPO}/install_build_dependencies.sh + - name: Install sccache + uses: mozilla-actions/sccache-action@v0.0.3 + with: + version: "v0.5.4" # # Build # @@ -281,7 +288,7 @@ jobs: run: cmake --build ${BUILD_DIR} --parallel 8 --target benchmark_app - name: Show ccache stats - run: ccache --show-stats + run: ${SCCACHE_PATH} --show-stats - name: Run with CC-ed runtime run: ${OPENVINO_REPO}/bin/intel64/Release/benchmark_app -niter 1 -nireq 1 -m ${MODELS_PATH}/models/test_model/test_model_fp32.xml -d CPU @@ -308,7 +315,7 @@ jobs: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} - - name: Extract OpenVINO packages + - name: Extract OpenVINO tests package run: tar -xvzf ${INSTALL_TEST_DIR}/openvino_tests.tar.gz -C ${INSTALL_TEST_DIR} - name: Install OpenVINO dependencies diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index d5084d7a5d19c6..487536f615a8a6 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -1,6 +1,9 @@ -name: macOS (macOS 12, Python 3.11) +name: macOS (Python 3.11) on: workflow_dispatch: + schedule: + # at 00:00 on workdays + - cron: '0 0 * * 1,2,3,4,5' # pull_request: # paths-ignore: # - '**/docs/**' @@ -9,17 +12,17 @@ on: # - '**.md' # - '**/layer_tests_summary/**' # - '**/conformance/**' - push: - paths-ignore: - - '**/docs/**' - - 'docs/**' - - '**/**.md' - - '**.md' - - '**/layer_tests_summary/**' - - '**/conformance/**' - branches: - - master - - 'releases/**' +# push: +# paths-ignore: +# - '**/docs/**' +# - 'docs/**' +# - '**/**.md' +# - '**.md' +# - '**/layer_tests_summary/**' +# - '**/conformance/**' +# branches: +# - master +# - 'releases/**' concurrency: # github.ref is not unique in post-commit @@ -34,11 +37,22 @@ jobs: defaults: run: shell: bash - runs-on: macos-12-large + strategy: + max-parallel: 2 + fail-fast: false + matrix: + include: + - arhitecture: 'x86_64' + machine: 'macos-13-large' + macos_deployment_target: '10.12' + - arhitecture: 'arm64' + machine: 'macos-13-xlarge' + macos_deployment_target: '11.0' + runs-on: ${{ matrix.machine }} env: CMAKE_BUILD_TYPE: 'Release' CMAKE_GENERATOR: 'Ninja Multi-Config' - MACOSX_DEPLOYMENT_TARGET: '10.12' + MACOSX_DEPLOYMENT_TARGET: ${{ matrix.macos_deployment_target }} CMAKE_CXX_COMPILER_LAUNCHER: ccache CMAKE_C_COMPILER_LAUNCHER: ccache OPENVINO_REPO: ${{ github.workspace }}/openvino @@ -100,9 +114,9 @@ jobs: # github.ref_name is 'ref/PR_#' in case of the PR, and 'branch_name' when executed on push save: ${{ github.ref_name == 'master' && 'true' || 'false' }} verbose: 2 - key: ${{ runner.os }}-main + key: ${{ runner.os }}-${{ matrix.arhitecture }}-main restore-keys: | - ${{ runner.os }}-main + ${{ runner.os }}-${{ matrix.arhitecture }}-main - name: CMake configure run: | @@ -144,6 +158,7 @@ jobs: run: | cmake \ -DBUILD_nvidia_plugin=OFF \ + -DBUILD_java_api=OFF \ -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose" \ -DOPENVINO_EXTRA_MODULES=${{ env.OPENVINO_CONTRIB_REPO }}/modules \ -S ${{ env.OPENVINO_REPO }} \ @@ -158,7 +173,7 @@ jobs: if: ${{ always() }} uses: actions/upload-artifact@v3 with: - name: openvino_package + name: openvino_package_${{ matrix.arhitecture }} path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz if-no-files-found: 'error' @@ -166,7 +181,7 @@ jobs: if: ${{ always() }} uses: actions/upload-artifact@v3 with: - name: openvino_tests + name: openvino_tests_${{ matrix.arhitecture }} path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz if-no-files-found: 'error' @@ -175,7 +190,16 @@ jobs: defaults: run: shell: bash - runs-on: macos-12 + strategy: + max-parallel: 2 + fail-fast: false + matrix: + include: + - arhitecture: 'x86_64' + machine: 'macos-13' + - arhitecture: 'arm64' + machine: 'macos-13-xlarge' + runs-on: ${{ matrix.machine }} env: INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests @@ -189,13 +213,13 @@ jobs: - name: Download OpenVINO package uses: actions/download-artifact@v3 with: - name: openvino_package + name: openvino_package_${{ matrix.arhitecture }} path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package uses: actions/download-artifact@v3 with: - name: openvino_tests + name: openvino_tests_${{ matrix.arhitecture }} path: ${{ env.INSTALL_TEST_DIR }} - name: Extract OpenVINO packages @@ -248,7 +272,7 @@ jobs: uses: actions/upload-artifact@v3 if: ${{ !cancelled() }} with: - name: test-results-samples + name: test-results-samples-${{ matrix.arhitecture }} path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml if-no-files-found: 'error' @@ -258,7 +282,16 @@ jobs: defaults: run: shell: bash - runs-on: macos-12 + strategy: + max-parallel: 2 + fail-fast: false + matrix: + include: + - arhitecture: 'x86_64' + machine: 'macos-13' + - arhitecture: 'arm64' + machine: 'macos-13-xlarge' + runs-on: ${{ matrix.machine }} env: INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests @@ -271,13 +304,13 @@ jobs: - name: Download OpenVINO package uses: actions/download-artifact@v3 with: - name: openvino_package + name: openvino_package_${{ matrix.arhitecture }} path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package uses: actions/download-artifact@v3 with: - name: openvino_tests + name: openvino_tests_${{ matrix.arhitecture }} path: ${{ env.INSTALL_TEST_DIR }} - name: Extract OpenVINO packages @@ -314,7 +347,11 @@ jobs: - name: Low Precision Transformations Tests run: | source ${{ env.INSTALL_DIR }}/setupvars.sh - ${{ env.INSTALL_TEST_DIR }}/ov_lp_transformations_tests --gtest_print_time=1 \ + + # Skips under Ticket: 122660 + skip_filter=${{ matrix.arhitecture == 'arm64' && '--gtest_filter=-*smoke_LPT/FoldFakeQuantizeInTransformations.CompareFunctions*' || '' }} + + ${{ env.INSTALL_TEST_DIR }}/ov_lp_transformations_tests --gtest_print_time=1 "$skip_filter" \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-LpTransformations.xml - name: OpenVINO Conditional compilation tests @@ -337,8 +374,10 @@ jobs: --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-PaddleTests.xml - name: ONNX frontend tests + if: ${{ matrix.arhitecture == 'x86_64' }} # Ticket for ARM64: 122663 run: | source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_onnx_frontend_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ONNXFrontend.xml @@ -351,7 +390,11 @@ jobs: - name: TensorFlow frontend tests run: | source ${{ env.INSTALL_DIR }}/setupvars.sh - ${{ env.INSTALL_TEST_DIR }}/ov_tensorflow_frontend_tests --gtest_print_time=1 \ + + # Skips under Ticket: 122666 + skip_filter=${{ matrix.arhitecture == 'arm64' && '--gtest_filter=-*CompileModelsTests.ModelWithSplitConvConcat*:*NgramCompilation*' || '' }} + + ${{ env.INSTALL_TEST_DIR }}/ov_tensorflow_frontend_tests --gtest_print_time=1 "$skip_filter" \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TensorFlowFrontend.xml - name: TensorFlow Lite frontend tests @@ -363,7 +406,11 @@ jobs: - name: Transformations func tests run: | source ${{ env.INSTALL_DIR }}/setupvars.sh - ${{ env.INSTALL_TEST_DIR }}/ov_transformations_tests --gtest_print_time=1 \ + + # Skips under Ticket: 122668 + skip_filter=${{ matrix.arhitecture == 'arm64' && '--gtest_filter=-*TransformationTestsF.CompressQuantizeWeights*:*TransformationTests/CompressQuantizeWeightsTests.FusionTest*' || '' }} + + ${{ env.INSTALL_TEST_DIR }}/ov_transformations_tests --gtest_print_time=1 "$skip_filter" \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-Transformations.xml - name: Common test utils tests @@ -384,6 +431,18 @@ jobs: ${{ env.INSTALL_TEST_DIR }}/ov_cpu_unit_tests --gtest_print_time=1 \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-CPUUnitTests.xml + - name: SubgraphsDumper tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/subgraphsDumperTests --gtest_print_time=1 \ + --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-SubgraphsDumperTests.xml + + - name: Template OpImpl tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/conformanceTests --gtest_print_time=1 --device=TEMPLATE --gtest_filter="*OpImpl*" \ + --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-TemplateOpImplTests.xml + - name: AUTO unit tests run: | source ${{ env.INSTALL_DIR }}/setupvars.sh @@ -444,7 +503,7 @@ jobs: uses: actions/upload-artifact@v3 if: ${{ always() }} with: - name: test-results-cpp + name: test-results-cpp-${{ matrix.arhitecture }} path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml if-no-files-found: 'error' @@ -454,7 +513,16 @@ jobs: defaults: run: shell: bash - runs-on: macos-12 + strategy: + max-parallel: 2 + fail-fast: false + matrix: + include: + - arhitecture: 'x86_64' + machine: 'macos-13' + - arhitecture: 'arm64' + machine: 'macos-13-xlarge' + runs-on: ${{ matrix.machine }} env: OPENVINO_REPO: ${{ github.workspace }}/openvino OPENVINO_CONTRIB_REPO: ${{ github.workspace }}/openvino_contrib @@ -479,13 +547,13 @@ jobs: - name: Download OpenVINO package uses: actions/download-artifact@v3 with: - name: openvino_package + name: openvino_package_${{ matrix.arhitecture }} path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package uses: actions/download-artifact@v3 with: - name: openvino_tests + name: openvino_tests_${{ matrix.arhitecture }} path: ${{ env.INSTALL_TEST_DIR }} - name: Extract OpenVINO packages @@ -511,10 +579,16 @@ jobs: # Install the core OV wheel python3 -m pip install ${{ env.INSTALL_DIR }}/tools/openvino-*.whl + # mxnet is only available on x86_64 + extras_to_install="caffe,kaldi,onnx,tensorflow2,pytorch" + if [[ "${{ matrix.arhitecture }}" == "x86_64" ]]; then + extras_to_install="mxnet,$extras_to_install" + fi + # Find and install OV dev wheel pushd ${{ env.INSTALL_DIR }}/tools ov_dev_wheel_name=$(find . -name 'openvino_dev*.whl') - python3 -m pip install $ov_dev_wheel_name[mxnet,caffe,kaldi,onnx,tensorflow2,pytorch] + python3 -m pip install $ov_dev_wheel_name[$extras_to_install] popd - name: Python API 1.0 Tests @@ -597,6 +671,7 @@ jobs: TEST_DEVICE: CPU - name: TensorFlow 2 Layer Tests - TF FE + if: ${{ 'false' }} # Ticket: 123322 run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH @@ -634,6 +709,7 @@ jobs: TEST_PRECISION: FP16 - name: Python ONNX operators tests + if: ${{ 'false' }} # Ticket: 123325 run: | # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time - ONNX Model Zoo tests are run separately python3 -m pytest -sv ${{ env.OPENVINO_REPO }}/src/frontends/onnx/tests -k 'not cuda' \ @@ -657,18 +733,27 @@ jobs: uses: actions/upload-artifact@v3 if: ${{ always() }} with: - name: test-results-python + name: test-results-python-${{ matrix.arhitecture }} path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml if-no-files-found: 'error' CPU_Functional_Tests: name: CPU functional tests - if: ${{ 'false' }} # Ticket: 122001 needs: Build defaults: run: shell: bash - runs-on: macos-12 + strategy: + max-parallel: 2 + fail-fast: false + matrix: + include: + # ticket: 122001 + # - arhitecture: 'x86_64' + # machine: 'macos-13' + - arhitecture: 'arm64' + machine: 'macos-13-xlarge' + runs-on: ${{ matrix.machine }} env: INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests @@ -680,33 +765,37 @@ jobs: - name: Download OpenVINO package uses: actions/download-artifact@v3 with: - name: openvino_package + name: openvino_package_${{ matrix.arhitecture }} path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package uses: actions/download-artifact@v3 with: - name: openvino_tests + name: openvino_tests_${{ matrix.arhitecture }} path: ${{ env.INSTALL_TEST_DIR }} - name: Extract OpenVINO packages run: | pushd ${{ env.INSTALL_DIR }} - tar -xzf openvino_package.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_package.tar.gz || exit 1 + tar -xzf openvino_package.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_package.tar.gz popd pushd ${{ env.INSTALL_TEST_DIR }} - tar -xzf openvino_tests.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_tests.tar.gz || exit 1 + tar -xzf openvino_tests.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_tests.tar.gz popd - - name: Intel CPU plugin func tests + - name: CPU plugin func tests run: | source ${{ env.INSTALL_DIR }}/setupvars.sh - ${{ env.INSTALL_TEST_DIR }}/ov_cpu_func_tests --gtest_print_time=1 --gtest_filter=*smoke* --gtest_output=xml:"${{ env.INSTALL_TEST_DIR }}/TEST-CPUFuncTests.xml" + + # Skips under Ticket: 122769 + skip_filter=${{ matrix.arhitecture == 'arm64' && '--gtest_filter=-*smoke_nonzero/NonZeroLayerTest.Inference/IS*:*smoke_NormalizeL2_*:*Extension.XmlModelWithExtensionFromDSO*:*Extension.OnnxModelWithExtensionFromDSO*:*ONNXQuantizedModels/QuantizedModelsTests.MaxPool*:*ONNXQuantizedModels/QuantizedModelsTests.Convolution*:**' || '' }} + + ${{ env.INSTALL_TEST_DIR }}/ov_cpu_func_tests --gtest_print_time=1 --gtest_filter=*smoke* "$skip_filter" --gtest_output=xml:"${{ env.INSTALL_TEST_DIR }}/TEST-CPUFuncTests.xml" - name: Upload Test Results uses: actions/upload-artifact@v3 if: ${{ always() }} with: - name: test-results-functional-cpu + name: test-results-functional-cpu-${{ matrix.arhitecture }} path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml if-no-files-found: 'error' diff --git a/.github/workflows/webassembly.yml b/.github/workflows/webassembly.yml index 1cedaa107bf63d..0dc685275d2f84 100644 --- a/.github/workflows/webassembly.yml +++ b/.github/workflows/webassembly.yml @@ -36,15 +36,14 @@ jobs: image: emscripten/emsdk volumes: - /mount/caches:/mount/caches + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING env: CMAKE_BUILD_TYPE: 'Release' - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache OPENVINO_REPO: /__w/openvino/openvino/openvino OPENVINO_BUILD_DIR: /__w/openvino/openvino/openvino_build - CCACHE_DIR: /mount/caches/ccache/webassembly_Release - CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp - CCACHE_MAXSIZE: 50G + SCCACHE_AZURE_KEY_PREFIX: webassembly_Release steps: - name: Install git run: apt-get update && apt-get install --assume-yes --no-install-recommends git ca-certificates @@ -55,8 +54,10 @@ jobs: path: 'openvino' submodules: 'true' - - name: Install ccache - run: apt-get install --assume-yes --no-install-recommends ccache + - name: Install sccache + uses: mozilla-actions/sccache-action@v0.0.3 + with: + version: "v0.5.4" - name: emcmake cmake - configure run: | @@ -64,8 +65,8 @@ jobs: -DCMAKE_CXX_FLAGS="-Wno-deprecated-declarations" \ -DCMAKE_C_FLAGS="-Wno-deprecated-declarations" \ -DCMAKE_COMPILE_WARNING_AS_ERROR=ON \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=${{ env.CMAKE_CXX_COMPILER_LAUNCHER }} \ + -DCMAKE_C_COMPILER_LAUNCHER=${{ env.CMAKE_C_COMPILER_LAUNCHER }} \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -S ${OPENVINO_REPO} \ -B ${OPENVINO_BUILD_DIR} @@ -74,4 +75,4 @@ jobs: run: emmake make -j$(nproc) hello_query_device -C ${OPENVINO_BUILD_DIR} - name: Show ccache stats - run: ccache --show-stats + run: ${SCCACHE_PATH} --show-stats diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 6aed320376c21e..e6763d2a696377 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -1,6 +1,9 @@ name: Windows (VS 2022, Python 3.11) on: - workflow_dispatch: + schedule: + # at 00:00 on workdays + - cron: '0 0 * * 1,2,3,4,5' +# workflow_dispatch: # pull_request: # paths-ignore: # - '**/docs/**' @@ -9,16 +12,16 @@ on: # - '**.md' # - '**/layer_tests_summary/**' # - '**/conformance/**' - push: - paths-ignore: - - '**/docs/**' - - 'docs/**' - - '**/**.md' - - '**.md' - - '**/layer_tests_summary/**' - - '**/conformance/**' - branches: - - master +# push: +# paths-ignore: +# - '**/docs/**' +# - 'docs/**' +# - '**/**.md' +# - '**.md' +# - '**/layer_tests_summary/**' +# - '**/conformance/**' +# branches: +# - master concurrency: # github.ref is not unique in post-commit @@ -336,7 +339,7 @@ jobs: shell: cmd run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - + :: requires 'unit_tests' from 'tools/mo' set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/onnx_tests -m "not launch_only_if_manually_specified and precommit" --junitxml=${INSTALL_TEST_DIR}/TEST-onnx.xml @@ -404,7 +407,7 @@ jobs: shell: cmd run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - + :: TODO: remove setupvars.bat from here; currently, it's used for 'test_utils' installed in '/python/openvino' call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/mo_python_api_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_mo_convert.xml env: @@ -415,7 +418,7 @@ jobs: shell: cmd run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - + :: TODO: remove setupvars.sh from here; currently, it's used for 'test_utils' installed in '/python/openvino' call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/ovc_python_api_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_ovc_convert.xml env: @@ -561,6 +564,14 @@ jobs: run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/ov_cpu_unit_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-CPUUnitTests.xml + - name: SubgraphsDumper tests + run: | + call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/subgraphsDumperTests --gtest_print_time=1 --gtest_print_time=1 --device=TEMPLATE --gtest_filter="*OpImpl*" --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-SubgraphsDumperTests.xml + + - name: Template OpImpl tests + run: | + call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/conformanceTests --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TemplateOpImplTests.xml + - name: GNA plugin unit tests shell: cmd run: | diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml index 93f947ee071df1..f0a9741aee9537 100644 --- a/.github/workflows/windows_conditional_compilation.yml +++ b/.github/workflows/windows_conditional_compilation.yml @@ -1,4 +1,4 @@ -name: Tests on Windows Conditional Compilation (VS 2022, Python 3.11) +name: Windows Conditional Compilation (VS 2022, Python 3.11) on: workflow_dispatch: schedule: @@ -24,36 +24,30 @@ on: # - master concurrency: - group: ${{ github.head_ref || github.run_id }}-windows-cc + # github.ref is not unique in post-commit + group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-windows-cc cancel-in-progress: true env: - CMAKE_BUILD_TYPE: 'Release' - CMAKE_GENERATOR: 'Ninja' - CMAKE_CXX_COMPILER_LAUNCHER: sccache - CMAKE_C_COMPILER_LAUNCHER: sccache - OPENVINO_REPO: "${{ github.workspace }}\\openvino" - OPENVINO_CONTRIB_REPO: "${{ github.workspace }}\\openvino_contrib" - INSTALL_DIR: "${{ github.workspace }}\\install_pkg" - INSTALL_TEST_DIR: "${{ github.workspace }}\\install\\tests" - SAMPLES_INSTALL_DIR: "${{ github.workspace }}\\install\\samples" - LAYER_TESTS_INSTALL_DIR: "${{ github.workspace }}\\install\\tests\\layer_tests" - BUILD_DIR: "${{ github.workspace }}\\build" - BUILD_DIR_2: "${{ github.workspace }}\\build_s" - MODELS_PATH: "${{ github.workspace }}\\testdata" - OV_TEMP: "${{ github.workspace }}\\openvino_temp" - BUILD_TYPE: "Release" - PYTHON_STATIC_ARGS: -m "not dynamic_library and not template_plugin" - VCVARSPATH: "C:\\Program Files\\Microsoft Visual Studio\\2022\\Enterprise\\VC\\Auxiliary\\Build\\vcvarsall.bat" + PYTHON_VERSION: '3.11' jobs: Build: - # TODO: remove. Temporary measure to prevent the workflow from scheduling on forks. - if: ${{ github.repository_owner == 'openvinotoolkit' }} defaults: run: shell: pwsh runs-on: windows-latest-8-cores + env: + CMAKE_BUILD_TYPE: 'Release' + CMAKE_GENERATOR: 'Ninja Multi-Config' + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache + OPENVINO_REPO: "${{ github.workspace }}\\openvino" + INSTALL_DIR: "${{ github.workspace }}\\openvino_install" + INSTALL_TEST_DIR: "${{ github.workspace }}\\tests_install" + BUILD_DIR: "${{ github.workspace }}\\openvino_build" + MODELS_PATH: "${{ github.workspace }}\\testdata" + SELECTIVE_BUILD_STAT_DIR: "${{ github.workspace }}\\selective_build_stat" steps: - name: Clone OpenVINO uses: actions/checkout@v4 @@ -75,21 +69,17 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: '3.11' + python-version: ${{ env.PYTHON_VERSION }} - name: Install build dependencies - run: | - choco install --no-progress ninja + run: choco install --no-progress ninja # # Build # - - name: Get number of CPU cores - uses: SimenB/github-actions-cpu-cores@v2 - id: cpu-cores - - - uses: ilammy/msvc-dev-cmd@v1 + - name: Configure Developer Command Prompt for Microsoft Visual C++ + uses: ilammy/msvc-dev-cmd@v1 - name: Setup sccache uses: hendrikmuhs/ccache-action@v1.2 @@ -99,71 +89,227 @@ jobs: # Should save cache only if run in the master branch of the base repo # github.ref_name is 'ref/PR_#' in case of the PR, and 'branch_name' when executed on push save: ${{ github.ref_name == 'master' && 'true' || 'false' }} - key: ${{ github.job }}-windows-cc + key: ${{ github.job }}-${{ runner.os }}-itt restore-keys: | - ${{ github.job }}-windows-cc + ${{ github.job }}-${{ runner.os }}-itt - - name: CMake CC COLLECT + - name: CMake configure - CC COLLECT run: | - & "${{ env.VCVARSPATH }}" x64 && cmake -G Ninja ` + cmake -G "${{ env.CMAKE_GENERATOR }}" ` + -DBUILD_SHARED_LIBS=OFF ` + -DENABLE_TESTS=ON ` -DENABLE_CPPLINT=OFF ` - -DENABLE_GAPI_PREPROCESSING=OFF ` - -DENABLE_PLUGINS_XML=ON ` - -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF ` - -DCMAKE_BUILD_TYPE=${{ env.BUILD_TYPE }} ` + -DENABLE_NCC_STYLE=OFF ` + -DENABLE_INTEL_GNA=OFF ` + -DCMAKE_COMPILE_WARNING_AS_ERROR=ON ` -DENABLE_PROFILING_ITT=ON ` -DSELECTIVE_BUILD=COLLECT ` + -DCMAKE_DISABLE_FIND_PACKAGE_PkgConfig=ON ` -S ${{ env.OPENVINO_REPO }} ` -B ${{ env.BUILD_DIR }} - - name: Build CC COLLECT + - name: Cmake build - CC COLLECT run: | - & "${{ env.VCVARSPATH }}" x64 && cmake --build ${{ env.BUILD_DIR }} --parallel ${{ steps.cpu-cores.outputs.count }} --config ${{ env.BUILD_TYPE }} ` - --target openvino_intel_cpu_plugin openvino_ir_frontend benchmark_app sea_itt_lib + cmake --build ${{ env.BUILD_DIR }} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} + cmake --build ${{ env.BUILD_DIR }} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --target sea_itt_lib - - name: List bin files + - name: Cmake install - OpenVINO + run: cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake + + - name: Build C++ samples - OpenVINO build tree + run: | + cmake -G "${{ env.CMAKE_GENERATOR }}" -DOpenVINO_DIR=${{ env.BUILD_DIR }} -S ${{ env.INSTALL_DIR }}/samples/cpp -B ${{ env.BUILD_DIR }}/cpp_samples + cmake --build ${{ env.BUILD_DIR }}/cpp_samples --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --target hello_query_device + + - name: Build C samples - OpenVINO install tree + run: | + & ${{ env.INSTALL_DIR }}/samples/c/build_samples_msvc.bat -i ${{ env.INSTALL_DIR }} -b ${{ env.BUILD_DIR }}/c_samples + + - name: Ctest - OpenVINO unit tests shell: cmd - run: dir ${{ env.OPENVINO_REPO }}\bin\ /s + run: | + set path=%path%;${{ env.OPENVINO_REPO }}\temp\tbb\bin + ctest -C ${{ env.CMAKE_BUILD_TYPE }} --test-dir ${{ env.BUILD_DIR }} -V -L UNIT - - name: Code usage analysis + - name: Perform code tracing via ITT collector shell: cmd - working-directory: ${{ env.OPENVINO_REPO }} run: | set path=%path%;${{ env.OPENVINO_REPO }}\temp\tbb\bin - call "${{ env.VCVARSPATH }}" && python thirdparty\itt_collector\runtool\sea_runtool.py --bindir ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.BUILD_TYPE }} -o ${{ env.BUILD_DIR }}\itt_stat ! ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.BUILD_TYPE }}\benchmark_app.exe -niter 1 -nireq 1 -m ${{ env.MODELS_PATH }}\models\test_model\test_model_fp32.xml -d CPU + + python3 ${{ env.OPENVINO_REPO }}\thirdparty\itt_collector\runtool\sea_runtool.py ^ + --bindir ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.CMAKE_BUILD_TYPE }} ^ + -o ${{ env.SELECTIVE_BUILD_STAT_DIR }}\itt_stat ! ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.CMAKE_BUILD_TYPE }}\benchmark_app.exe ^ + -niter 1 ^ + -nireq 1 ^ + -m ${{ env.MODELS_PATH }}\models\test_model\test_model_fp32.xml ^ + -d CPU + + - name: List bin files + shell: cmd + run: dir ${{ env.OPENVINO_REPO }}\bin\ /s - - name: List csv files + - name: List install files shell: cmd - run: dir ${{ env.BUILD_DIR }}\*.csv /s /p + run: dir ${{ env.INSTALL_DIR }} /s + + - name: Pack Artifacts + run: | + $file=Get-ChildItem -Path "${{ env.SELECTIVE_BUILD_STAT_DIR }}" + $compress = @{ + Path = $file + CompressionLevel = "Optimal" + DestinationPath = "${{ env.BUILD_DIR }}/openvino_selective_build_stat.zip" + } + Compress-Archive @compress + + $compress = @{ + Path = "${{ env.OPENVINO_REPO }}/bin/intel64/${{ env.CMAKE_BUILD_TYPE }}/ov_cpu_func_tests.exe", "${{ env.OPENVINO_REPO }}/src/tests/test_utils/functional_test_utils/layer_tests_summary", "${{ env.INSTALL_DIR }}/runtime/3rdparty/tbb" + CompressionLevel = "Optimal" + DestinationPath = "${{ env.BUILD_DIR }}/openvino_tests.zip" + } + Compress-Archive @compress + + - name: Upload selective build statistics package + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: openvino_selective_build_stat + path: ${{ env.BUILD_DIR }}/openvino_selective_build_stat.zip + if-no-files-found: 'error' + + - name: Upload OpenVINO tests package + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: openvino_tests + path: ${{ env.BUILD_DIR }}/openvino_tests.zip + if-no-files-found: 'error' + + CC_Build: + name: Conditional Compilation + needs: Build + defaults: + run: + shell: pwsh + runs-on: windows-latest-8-cores + env: + CMAKE_BUILD_TYPE: 'Release' + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache + OPENVINO_REPO: "${{ github.workspace }}\\openvino" + BUILD_DIR: "${{ github.workspace }}\\openvino_build" + MODELS_PATH: "${{ github.workspace }}\\testdata" + SELECTIVE_BUILD_STAT_DIR: "${{ github.workspace }}\\selective_build_stat" + steps: + - name: Clone OpenVINO + uses: actions/checkout@v4 + with: + path: 'openvino' + submodules: 'true' - - name: CMake CC ON + - name: Clone test models + uses: actions/checkout@v4 + with: + repository: 'openvinotoolkit/testdata' + path: 'testdata' + lfs: 'true' + ref: 'master' + + - name: Download selective build statistics package + uses: actions/download-artifact@v3 + with: + name: openvino_selective_build_stat + path: ${{ env.SELECTIVE_BUILD_STAT_DIR }} + + - name: Extract selective build statistics package + run: Expand-Archive ${{ env.SELECTIVE_BUILD_STAT_DIR }}/openvino_selective_build_stat.zip -DestinationPath "${{ env.SELECTIVE_BUILD_STAT_DIR }}" + + - name: CMake configure - CC ON run: | - & "${{ env.VCVARSPATH }}" x64 && cmake -G "Visual Studio 17 2022" ` - -DCMAKE_VERBOSE_MAKEFILE=ON ` + cmake ` + -DBUILD_SHARED_LIBS=OFF ` -DENABLE_CPPLINT=OFF ` - -DENABLE_GAPI_PREPROCESSING=OFF ` - -DENABLE_PROFILING_ITT=OFF ` -DSELECTIVE_BUILD=ON ` - -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF ` - -DSELECTIVE_BUILD_STAT=${{ env.BUILD_DIR }}\*.csv ` + -DENABLE_TEMPLATE=OFF ` + -DENABLE_INTEL_GPU=OFF ` + -DENABLE_INTEL_GNA=OFF ` + -DENABLE_OV_TF_FRONTEND=OFF ` + -DENABLE_OV_TF_LITE_FRONTEND=OFF ` + -DENABLE_OV_PADDLE_FRONTEND=OFF ` + -DENABLE_OV_PYTORCH_FRONTEND=OFF ` + -DENABLE_OV_ONNX_FRONTEND=OFF ` + -DSELECTIVE_BUILD_STAT=${{ env.SELECTIVE_BUILD_STAT_DIR }}\*.csv ` -S ${{ env.OPENVINO_REPO }} ` - -B ${{ env.BUILD_DIR_2 }} + -B ${{ env.BUILD_DIR }} - - name: Build CC ON - run: | - & "${{ env.VCVARSPATH }}" x64 && cmake --build ${{ env.BUILD_DIR_2 }} --parallel ${{ steps.cpu-cores.outputs.count }} --config ${{ env.BUILD_TYPE }} ` - --target openvino_intel_cpu_plugin openvino_ir_frontend benchmark_app + - name: Cmake build - CC ON + run: cmake --build ${{ env.BUILD_DIR }} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --target benchmark_app - - name: List bin files ON + - name: List bin files shell: cmd run: dir ${{ env.OPENVINO_REPO }}\bin\ /s - - name: Check conditional_compilation_gen.h header + - name: Run with CC-ed runtime shell: cmd - run: type ${{ env.BUILD_DIR_2 }}\src\common\conditional_compilation\conditional_compilation_gen.h + run: | + set path=%path%;${{ env.OPENVINO_REPO }}\temp\tbb\bin + ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.CMAKE_BUILD_TYPE }}\benchmark_app.exe -niter 1 -nireq 1 -m ${{ env.MODELS_PATH }}\models\test_model\test_model_fp32.xml -d CPU - - name: Use OpenVINO after CC + CPU_Functional_Tests: + name: CPU functional tests + needs: Build + defaults: + run: + shell: pwsh + runs-on: windows-latest-8-cores + env: + INSTALL_TEST_DIR: "${{ github.workspace }}\\tests_install" + PARALLEL_TEST_SCRIPT: "${{ github.workspace }}\\tests_install\\layer_tests_summary\\run_parallel.py" + PARALLEL_TEST_CACHE: "${{ github.workspace }}\\tests_install\\test_cache.lst" + + steps: + - name: Download OpenVINO tests package + uses: actions/download-artifact@v3 + with: + name: openvino_tests + path: ${{ env.INSTALL_TEST_DIR }} + + - name: Extract OpenVINO tests package + run: Expand-Archive ${{ env.INSTALL_TEST_DIR }}/openvino_tests.zip -DestinationPath "${{ env.INSTALL_TEST_DIR }}" + + - uses: actions/setup-python@v4 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install python dependencies for run_parallel.py + run: python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/layer_tests_summary/requirements.txt + + # Windows pipeline is in nightly mode, uncomment once there is a consistent cache creation + # - name: Restore tests execution time + # uses: actions/cache/restore@v3 + # with: + # path: ${{ env.PARALLEL_TEST_CACHE }} + # key: ${{ runner.os }}-tests-functional-cpu-stamp-${{ github.sha }} + # restore-keys: | + # ${{ runner.os }}-tests-functional-cpu-stamp + + - name: Intel CPU plugin func tests (parallel) shell: cmd run: | - set path=%path%;${{ env.OPENVINO_REPO }}\temp\tbb\bin - ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.BUILD_TYPE }}\benchmark_app.exe -niter 1 -nireq 1 -m ${{ env.MODELS_PATH }}\models\test_model\test_model_fp32.xml -d CPU + set path=%path%;${{ env.INSTALL_TEST_DIR }}\tbb\bin;${{ env.INSTALL_TEST_DIR }}\tbb + python3 ${{ env.PARALLEL_TEST_SCRIPT }} -e ${{ env.INSTALL_TEST_DIR }}\ov_cpu_func_tests.exe -w ${{ env.INSTALL_TEST_DIR }} -s suite -rf 0 -- --gtest_print_time=1 --gtest_filter=*smoke* + timeout-minutes: 45 + + - name: Upload Test Results + uses: actions/upload-artifact@v3 + if: ${{ !cancelled() }} + with: + name: test-results-functional-cpu + path: | + ${{ env.INSTALL_TEST_DIR }}/TEST*.xml + ${{ env.INSTALL_TEST_DIR }}/logs/failed/*.log + ${{ env.INSTALL_TEST_DIR }}/logs/crashed/*.log + ${{ env.INSTALL_TEST_DIR }}/logs/hanged/*.log + ${{ env.INSTALL_TEST_DIR }}/logs/interapted/*.log + ${{ env.INSTALL_TEST_DIR }}/logs/disabled_tests.log + if-no-files-found: 'error' diff --git a/README.md b/README.md index bfc4a722c2680d..489ef7803ccd80 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,16 @@ OpenVINOâ„¢ Toolkit also contains several plugins which simplify loading models OpenVINOâ„¢ Toolkit is licensed under [Apache License Version 2.0](LICENSE). By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms. +## Telemetry +OpenVINOâ„¢ collects software performance and usage data for the purpose of improving OpenVINOâ„¢ tools. This data is collected directly by OpenVINOâ„¢ or through the use of Google Analytics 4. +You can opt-out at any time by running the command: + +``` bash +opt_in_out --opt_out +``` + +More Information is available at https://docs.openvino.ai/latest/openvino_docs_telemetry_information.html. + ## Documentation ### User documentation diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index 5c86bdea57620c..257263f663bec6 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -104,10 +104,10 @@ function(ov_download_tbb) elseif(LINUX AND X86_64 AND OV_GLIBC_VERSION VERSION_GREATER_EQUAL 2.17) # build oneTBB 2021.2.1 with gcc 4.8 (glibc 2.17) RESOLVE_DEPENDENCY(TBB - ARCHIVE_LIN "oneapi-tbb-2021.2.3-lin.tgz" + ARCHIVE_LIN "oneapi-tbb-2021.2.3-lin-20231012.tgz" TARGET_PATH "${TEMP}/tbb" ENVIRONMENT "TBBROOT" - SHA256 "f3f2edd8e7875b02220f11ab5b201411d5af6822e525e8da5444b4a666514e8b" + SHA256 "6f39d18783b37fdcc15ca137fbf70bc78206848af1a510cada806279fae49718" USE_NEW_LOCATION TRUE) elseif(YOCTO_AARCH64) RESOLVE_DEPENDENCY(TBB @@ -135,10 +135,10 @@ function(ov_download_tbb) elseif(LINUX AND AARCH64 AND OV_GLIBC_VERSION VERSION_GREATER_EQUAL 2.17) # build oneTBB 2021.2.1 with gcc 4.8 (glibc 2.17) RESOLVE_DEPENDENCY(TBB - ARCHIVE_LIN "oneapi-tbb-2021.2.1-lin-arm64-canary.tgz" + ARCHIVE_LIN "oneapi-tbb-2021.2.1-lin-arm64-20231012.tgz" TARGET_PATH "${TEMP}/tbb" ENVIRONMENT "TBBROOT" - SHA256 "042fdac53be65841a970b05d892f4b20b556b06fd3b20d2d0068e49c4fd74f07" + SHA256 "cbb239cbda7ea2937cec7008c12fe628dd44488e1eafd9630f8814f9eb2c13e2" USE_NEW_LOCATION TRUE) elseif(APPLE AND AARCH64) # build oneTBB 2021.2.1 with export MACOSX_DEPLOYMENT_TARGET=11.0 @@ -204,10 +204,10 @@ function(ov_download_tbbbind_2_5) USE_NEW_LOCATION TRUE) elseif(LINUX AND X86_64) RESOLVE_DEPENDENCY(TBBBIND_2_5 - ARCHIVE_LIN "tbbbind_2_5_static_lin_v3.tgz" + ARCHIVE_LIN "tbbbind_2_5_static_lin_v4.tgz" TARGET_PATH "${TEMP}/tbbbind_2_5" ENVIRONMENT "TBBBIND_2_5_ROOT" - SHA256 "d39deb262c06981b5e2d2e3c593e9fc9be62ce4feb91dd4e648e92753659a6b3" + SHA256 "4ebf30246530795f066fb9616e6707c6b17be7a65d29d3518b578a769dd54eea" USE_NEW_LOCATION TRUE) else() # TMP: for Apple Silicon TBB does not provide TBBBind diff --git a/cmake/developer_package/frontends/frontends.cmake b/cmake/developer_package/frontends/frontends.cmake index a86c57c6c87845..a20b1665fb7d29 100644 --- a/cmake/developer_package/frontends/frontends.cmake +++ b/cmake/developer_package/frontends/frontends.cmake @@ -125,17 +125,24 @@ macro(ov_add_frontend) source_group("public include" FILES ${LIBRARY_PUBLIC_HEADERS}) # Generate protobuf file on build time for each '.proto' file in src/proto - file(GLOB proto_files ${frontend_root_dir}/src/proto/*.proto) + set(protofiles_root_dir "${frontend_root_dir}/src/proto") + file(GLOB_RECURSE proto_files ${protofiles_root_dir}/*.proto) foreach(proto_file IN LISTS proto_files) + # filter out standaard google proto files + if(proto_file MATCHES ".*google.*") + continue() + endif() + file(RELATIVE_PATH proto_file_relative "${CMAKE_SOURCE_DIR}" "${proto_file}") - get_filename_component(FILE_DIR ${proto_file} DIRECTORY) get_filename_component(FILE_WE ${proto_file} NAME_WE) - set(OUTPUT_PB_SRC ${CMAKE_CURRENT_BINARY_DIR}/${FILE_WE}.pb.cc) - set(OUTPUT_PB_HEADER ${CMAKE_CURRENT_BINARY_DIR}/${FILE_WE}.pb.h) + file(RELATIVE_PATH relative_path ${protofiles_root_dir} ${proto_file}) + get_filename_component(relative_path ${relative_path} DIRECTORY) + set(OUTPUT_PB_SRC ${CMAKE_CURRENT_BINARY_DIR}/${relative_path}/${FILE_WE}.pb.cc) + set(OUTPUT_PB_HEADER ${CMAKE_CURRENT_BINARY_DIR}/${relative_path}/${FILE_WE}.pb.h) add_custom_command( OUTPUT "${OUTPUT_PB_SRC}" "${OUTPUT_PB_HEADER}" - COMMAND ${PROTOC_EXECUTABLE} ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} -I ${FILE_DIR} ${FILE_WE}.proto + COMMAND ${PROTOC_EXECUTABLE} ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} -I ${protofiles_root_dir} ${proto_file} DEPENDS ${PROTOC_DEPENDENCY} ${proto_file} COMMENT "Running C++ protocol buffer compiler (${PROTOC_EXECUTABLE}) on ${proto_file_relative}" VERBATIM diff --git a/cmake/developer_package/packaging/common-libraries.cmake b/cmake/developer_package/packaging/common-libraries.cmake index 4fbce5b4a58ca7..9671d827521d20 100644 --- a/cmake/developer_package/packaging/common-libraries.cmake +++ b/cmake/developer_package/packaging/common-libraries.cmake @@ -4,14 +4,6 @@ include(GNUInstallDirs) -if(CPACK_GENERATOR STREQUAL "BREW") - # brew relies on RPATHs - # set(CMAKE_SKIP_INSTALL_RPATH OFF) -else() - # we don't need RPATHs, because libraries are searched by standard paths - set(CMAKE_SKIP_INSTALL_RPATH ON) -endif() - # # ov_common_libraries_cpack_set_dirs() # @@ -115,3 +107,12 @@ macro(ov_define_component_include_rules) endmacro() ov_define_component_include_rules() + +if(CPACK_GENERATOR STREQUAL "BREW") + # brew relies on RPATHs + set(CMAKE_SKIP_INSTALL_RPATH OFF) + set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${OV_CPACK_LIBRARYDIR}") +else() + # we don't need RPATHs, because libraries are searched by standard paths + set(CMAKE_SKIP_INSTALL_RPATH ON) +endif() diff --git a/docs/articles_en/about_openvino/additional_resources/telemetry_information.md b/docs/articles_en/about_openvino/additional_resources/telemetry_information.md index 4340a40923770b..b23a763ff97e70 100644 --- a/docs/articles_en/about_openvino/additional_resources/telemetry_information.md +++ b/docs/articles_en/about_openvino/additional_resources/telemetry_information.md @@ -3,13 +3,11 @@ @sphinxdirective .. meta:: - :description: Learn about OpenVINOâ„¢ telemetry, that with your explicit consent - collects only usage data to simplify debugging and further development. + :description: Learn about OpenVINOâ„¢ telemetry, that collects anonymous usage data for the purpose of improving OpenVINOâ„¢ tools. -To facilitate debugging and further development, OpenVINOâ„¢ asks its users for -a permission to collect telemetry data. It will not be collected -without an explicit consent on your part and will cover only OpenVINOâ„¢ usage information. +To facilitate debugging and further development, OpenVINOâ„¢ collects anonymous telemetry data. Anonymous telemetry data is collected by default, +but you can stop data collection anytime by running the command ``opt_in_out --opt_out``. It does not extend to any other Intel software, hardware, website usage, or other products. Google Analytics is used for telemetry purposes. Refer to @@ -18,34 +16,6 @@ Google Analytics is used for telemetry purposes. Refer to Enable or disable Telemetry reporting ########################################################### -First-run consent -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -On the first run of an application that collects telemetry data, you will be prompted -to opt in or out of telemetry collection with the following telemetry message: - -.. code-block:: console - - Intel would like your permission to collect software performance and usage data - for the purpose of improving Intel products and services. This data will be collected - directly by Intel or through the use of Google Analytics. This data will be stored - in countries where Intel or Google operate. - - You can opt-out at any time in the future by running ``opt_in_out --opt_in``. - - More Information is available at docs.openvino.ai. - - Please type ``Y`` to give your consent or ``N`` to decline. - -Choose your preference by typing ``Y`` to enable or ``N`` to disable telemetry. Your choice will -be confirmed by a corresponding disclaimer. If you do not reply to the telemetry message, -your telemetry data will not be collected. - -For the Neural Network Compression Framework (NNCF), which is not a command line application, -the telemetry message will not display. Telemetry data will only be collected from NNCF -if you have explicitly provided consent in another OpenVINO tool. - - Changing consent decision +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -82,8 +52,8 @@ Telemetry Data Collection Details .. tab-item:: Telemetry Data Retention :sync: telemetry-data-retention - Telemetry data is retained in Google Analytics for a maximum of 26 months. - Any raw data that has reached the 26-month threshold is deleted from Google Analytics on a monthly basis. + Telemetry data is retained in Google Analytics for a maximum of 14 months. + Any raw data that has reached the 14-month threshold is deleted from Google Analytics on a monthly basis. @endsphinxdirective \ No newline at end of file diff --git a/docs/dev/build_windows.md b/docs/dev/build_windows.md index 28001328ea3013..b6321785c38970 100644 --- a/docs/dev/build_windows.md +++ b/docs/dev/build_windows.md @@ -78,7 +78,7 @@ Supported configurations: ```sh call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\VC\Auxiliary\Build\vcvars64.bat" cmake -G Ninja -Wno-dev -DCMAKE_BUILD_TYPE=Release .. -ninja . +cmake --build . --parallel ``` ## See also diff --git a/docs/requirements.txt b/docs/requirements.txt index 69433a40eb64ff..2e643842f24861 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -44,6 +44,6 @@ sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.5 toml==0.10.2 -urllib3==1.26.17 +urllib3==1.26.18 zipp==3.4.1 docs/openvino_custom_sphinx_sitemap diff --git a/src/bindings/python/requirements.txt b/src/bindings/python/requirements.txt index 72438eeb2ecd91..c4d3c3e35568aa 100644 --- a/src/bindings/python/requirements.txt +++ b/src/bindings/python/requirements.txt @@ -1,3 +1,3 @@ numpy>=1.16.6 singledispatchmethod; python_version<'3.8' -openvino-telemetry>=2023.1.0 +openvino-telemetry>=2023.2.1 diff --git a/src/bindings/python/setup.cfg b/src/bindings/python/setup.cfg index 083c8e1de85cb1..b9b15ef0ca1214 100644 --- a/src/bindings/python/setup.cfg +++ b/src/bindings/python/setup.cfg @@ -13,6 +13,7 @@ setenv = OV_BACKEND = {env:OV_BACKEND:"CPU"} PYTHONPATH = {env:PYTHONPATH} OpenVINO_DIR = {env:OpenVINO_DIR} + CI = True passenv = http_proxy https_proxy diff --git a/src/bindings/python/src/compatibility/ngraph/opset3/ops.py b/src/bindings/python/src/compatibility/ngraph/opset3/ops.py index 82846826111751..7d7c757d9cd5dc 100644 --- a/src/bindings/python/src/compatibility/ngraph/opset3/ops.py +++ b/src/bindings/python/src/compatibility/ngraph/opset3/ops.py @@ -550,9 +550,9 @@ def shuffle_channels(data: Node, axis: int, group: int, name: Optional[str] = No `data_reshaped` = reshape(`data`, [N, group, C / group, H * W]) - `data_trnasposed` = transpose(`data_reshaped`, [0, 2, 1, 3]) + `data_transposed` = transpose(`data_reshaped`, [0, 2, 1, 3]) - `output` = reshape(`data_trnasposed`, [N, C, H, W]) + `output` = reshape(`data_transposed`, [N, C, H, W]) For example: diff --git a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py index 45a662e4e45fd1..479e1a5cb1c622 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py @@ -7,10 +7,9 @@ from openvino.frontend.pytorch.py_pytorch_frontend import _FrontEndPytorchDecoder as Decoder from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType from openvino.runtime import op, PartialShape, Type as OVType, OVAny, Shape -from openvino.frontend.pytorch.utils import maybe_convert_max_int, make_constant, fetch_attr, pt_to_ov_type_map, ov_to_c_type_map +from openvino.frontend.pytorch.utils import maybe_convert_max_int, make_constant, fetch_attr, pt_to_ov_type_map import torch -import ctypes class TorchFXPythonDecoder (Decoder): @@ -224,11 +223,7 @@ def as_constant(self): if self.pt_module.op == 'get_attr': # Extract Constant from FX module field ret = fetch_attr(self.fx_gm, self.pt_module.target) - ovshape = PartialShape(ret.size()) - ovtype = pt_to_ov_type_map[str(ret.type())] - c_type = ctypes.POINTER(ov_to_c_type_map[ovtype]) - data_c_ptr = ctypes.cast(ret.data_ptr(), c_type) - ov_const = op.Constant(ovtype, ovshape.get_shape(), data_c_ptr[:ret.nelement()]) + ov_const = op.Constant(ret.numpy(), shared_memory=True) return ov_const.outputs() @@ -370,7 +365,7 @@ def inlined_inputs(self, index): return result def may_produce_alias(self, in_index: int, out_index: int) -> bool: - if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d"]: + if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d", "aten::matmul"]: # AliasDB::may_contain_alias sometimes return True for tensors produced by convnd, we have to workaround that return False try: diff --git a/src/bindings/python/src/openvino/frontend/pytorch/gptq.py b/src/bindings/python/src/openvino/frontend/pytorch/gptq.py new file mode 100644 index 00000000000000..b4bd06552b2a1e --- /dev/null +++ b/src/bindings/python/src/openvino/frontend/pytorch/gptq.py @@ -0,0 +1,140 @@ + +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# flake8: noqa +# mypy: ignore-errors + +import torch +from functools import partial + +# Wraps a single tensor to a module to prevent it from jit.freezing +# It depends on a tensor dtype whether it will be preserved from freezing. Refer to the decoder code to learn which types will be preserved. +class KeepWeight(torch.nn.Module): + + def __init__(self, weight): + super().__init__() + self.weight = torch.nn.Parameter(weight, requires_grad=False) + + def forward(self): + return self.weight + + +# Produces a pattern that can be captured later and represented as a single u4 constant node +def decompression_pattern(weights): + mask = torch.tensor(15, dtype=torch.uint8).to(weights.device) + return torch.stack((torch.bitwise_and(weights, mask), torch.bitwise_right_shift(weights, 4)), dim=-1) + + +def patched_forward(self, *args, **kwargs): + if hasattr(self, '_hf_hook'): + args, kwargs = self._hf_hook.pre_forward(self, *args, **kwargs) + + x = args[0] + dtype = x.dtype + outshape = x.shape[:-1] + (self.width,) + x = x.view(-1, x.shape[-1]) + groups = self.qzeros.shape[0] + height = self.qweight.shape[0] + + unpacked_weights = decompression_pattern( + self._openvino_u4_compression_submodule_qweights()).contiguous().view(height, -1, 8) + unpacked_weights = torch.transpose( + unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width) + unpacked_zp = decompression_pattern( + self._openvino_u4_compression_submodule_qzeros()).contiguous().view(groups, 1, -1) + + unpacked_zp = unpacked_zp.to(dtype) + 1 + + unpacked_weights = (unpacked_weights.to(dtype) - unpacked_zp) * self.scales + unpacked_weights = unpacked_weights.view(-1, self.width) + + out = x @ unpacked_weights + + out = out.view(outshape) + if self.bias is not None: + out.add_(self.bias) + + if hasattr(self, '_hf_hook'): + out = self._hf_hook.post_forward(self, out) + return out + + +# All the following AutoGPTQ's quant types are supposed to have the same weights packing schema +supported_quant_types = ['triton', 'exllama', 'cuda', 'exllamav2', 'cuda-old'] + + +def patch_model(model): + for name, m in model.named_modules(): + if hasattr(m, '_openvino_patch_orig_forward'): + # already patched, skipping + continue + # TODO: Check module type + is_quantized = getattr(m, 'is_quantized', None) + if is_quantized is not None: + m.is_quantized = False + m.float() # enables tracing on CPU, applied for all modules + if hasattr(m, 'QUANT_TYPE'): + if m.QUANT_TYPE not in supported_quant_types: + raise ValueError( + f'Unsupported QUANT_TYPE == {m.QUANT_TYPE} is discovered for AutoGPTQ model, only the following types are supported: {supported_quant_types}') + if m.bits != 4: + raise ValueError( + f'Unsupported bits == {m.bits} is discovered in module {name} in AutoGPTQ model, only bits == 4 is supported.') + + int4_in_int32 = 8 + groups = m.qzeros.shape[0] + m.width = m.qweight.shape[1] + assert m.group_size == m.qweight.shape[0] * int4_in_int32 // groups + + m._openvino_patch_orig_forward = m.forward + m.forward = partial(patched_forward, m) + + # Keep original field properties to be used when model is returned back to its original state + m._openvino_patch_orig_qweights_type = m.qweight.dtype + m._openvino_patch_orig_qzeros_type = m.qzeros.dtype + m._openvino_patch_orig_scale_shape = m.scales.shape + + m.qweight = m.qweight.view(dtype=torch.uint8) + m.qzeros = m.qzeros.view(dtype=torch.uint8) + + # TODO: Redundant tensor copy? Try to remove m.qweigh and m.qzeros after keeping modified values as submodules + m.add_module( + '_openvino_u4_compression_submodule_qweights', KeepWeight(m.qweight)) + m.add_module('_openvino_u4_compression_submodule_qzeros', + KeepWeight(m.qzeros)) + + m.scales = m.scales.view(-1, 1, m.width) + + +def unpatch_model(model): + for _, m in model.named_modules(): + if hasattr(m, '_openvino_patch_orig_forward'): + try: + m.forward = m._openvino_patch_orig_forward + del m._openvino_patch_orig_forward + + m.qweight = m.qweight.view( + dtype=m._openvino_patch_orig_qweights_type) + del m._openvino_patch_orig_qweights_type + + m.qzeros = m.qzeros.view( + dtype=m._openvino_patch_orig_qzeros_type) + del m._openvino_patch_orig_qzeros_type + + m.scales = m.scales.view(m._openvino_patch_orig_scale_shape) + del m._openvino_patch_orig_scale_shape + + del m._openvino_u4_compression_submodule_qweights + del m._openvino_u4_compression_submodule_qzeros + except Exception as error: + print('[ WARNING ] Exception raised during GPTQ model unpatching. Depending on the exact issue it may lead to broken original model') + print(error) + + +def detect_gptq_model_raw(model): + return model and getattr(model, 'config', None) and getattr(model.config, 'quantization_config', None) and model.config.quantization_config.quant_method == 'gptq' + + +def detect_gptq_model(model): + return detect_gptq_model_raw(model) or getattr(model, 'model', None) and detect_gptq_model_raw(model.model) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py index 726f3b598bc15e..4a76d90b160553 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py @@ -41,6 +41,7 @@ def __init__(self): "torch.ops.aten.arange.default": None, "torch.ops.aten.argmax.default": None, "torch.ops.aten.avg_pool2d.default": None, + "torch.ops.aten.baddbmm.default": None, "torch.ops.aten.bitwise_and.Tensor": None, "torch.ops.aten.bmm.default": None, "torch.ops.aten.cat.default": None, @@ -67,6 +68,7 @@ def __init__(self): "torch.ops.aten.hardswish_.default": None, "torch.ops.aten.hardtanh_.default": None, "torch.ops.aten.index.Tensor": None, + "torch.ops.aten.leaky_relu_.default": None, "torch.ops.aten.lift_fresh_copy.default": None, "torch.ops.aten.linalg_vector_norm.default": None, "torch.ops.aten.lt.Tensor": None, @@ -89,6 +91,7 @@ def __init__(self): "torch.ops.aten.relu.default": None, "torch.ops.aten.relu_.default": None, "torch.ops.aten.rsub.Scalar": None, + "torch.ops.aten._scaled_dot_product_flash_attention.default": None, "torch.ops.aten.select.int": None, "torch.ops.aten.sigmoid.default": None, "torch.ops.aten.silu.default": None, diff --git a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py index b6caf22cfc7b68..a57393e7638d67 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py @@ -9,6 +9,7 @@ from openvino.runtime import op, PartialShape, Type as OVType, OVAny from openvino.frontend.pytorch.utils import ivalue_to_constant, get_value_from_getattr, pt_to_ov_type_map, prepare_example_inputs_and_model, convert_quantized_tensor from openvino.runtime import opset11 as ops +from openvino.frontend.pytorch import gptq import typing import torch @@ -84,12 +85,32 @@ def _get_scripted_model(self, pt_module, example_inputs=None, skip_freeze=False) if example_inputs is None: scripted = torch.jit.script(pt_module) else: - input_parameters, input_signature, pt_module, self._input_is_list = prepare_example_inputs_and_model(example_inputs, input_params, pt_module) - scripted = torch.jit.trace(pt_module, **input_parameters, strict=False) + input_parameters, input_signature, pt_module, self._input_is_list = prepare_example_inputs_and_model( + example_inputs, input_params, pt_module) + gptq_patched = False + + if gptq.detect_gptq_model(pt_module): + try: + gptq.patch_model(pt_module) + gptq_patched = True + except Exception as error: + print('[ WARNING ] Failed patching of AutoGPTQ model. Error message:\n', error) + print('[ WARNING ] Tracing of the model will likely be unsuccesfull or incorrect') + gptq.unpatch_model(pt_module) + gptq_patched = False + + try: + scripted = torch.jit.trace( + pt_module, **input_parameters, strict=False) + finally: + if gptq_patched: + gptq.unpatch_model(pt_module) + if not skip_freeze: + ops_kind_no_freeze = ["quantize", "aten::as_strided"] for n in scripted.inlined_graph.nodes(): # TODO: switch off freezing for all traced models - if "quantize" in n.kind(): + if any(kind in n.kind() for kind in ops_kind_no_freeze): # do not freeze quantized models skip_freeze = True break @@ -130,6 +151,16 @@ def get_input_shape(self, index: int): raw_input = self._raw_input(index) return self.get_shape_for_value(raw_input) + def get_input_strides(self, index: int) -> typing.List[int]: + raw_input = self._raw_input(index) + if isinstance(raw_input, torch.Value): + inp_type = raw_input.type() + if isinstance(inp_type, torch.TensorType): + strides = inp_type.strides() + if strides: + return strides + return [] + def get_input_type(self, index: int): raw_input = self._raw_input(index) return self.get_type_for_value(raw_input) @@ -341,8 +372,8 @@ def input_is_none(self, index: int) -> bool: return False def may_produce_alias(self, in_index: int, out_index: int) -> bool: - if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d"]: - # AliasDB::may_contain_alias sometimes return True for tensors produced by convnd, we have to workaround that + if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d", "aten::_convolution", "aten::matmul"]: + # AliasDB::may_contain_alias sometimes return True for tensors produced by convolution or matmul, we have to workaround that return False try: return self.alias_db.may_contain_alias(self._raw_input(in_index), self._raw_output(out_index)) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/utils.py b/src/bindings/python/src/openvino/frontend/pytorch/utils.py index 97d237fb0efda1..a3ac46e701119b 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/utils.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/utils.py @@ -7,7 +7,6 @@ import torch import numpy as np -import ctypes from openvino.runtime import op, Type as OVType, Shape, Tensor from openvino.runtime import opset11 as ops @@ -132,13 +131,6 @@ def get_value_from_getattr(getattr_node, self_module): "torch.qint32": OVType.i32 } -ov_to_c_type_map = { - OVType.f32: ctypes.c_float, - OVType.f64: ctypes.c_double, - OVType.i32: ctypes.c_int, - OVType.i64: ctypes.c_int64, -} - wrapper_template = """ import torch diff --git a/src/bindings/python/src/openvino/runtime/opset3/ops.py b/src/bindings/python/src/openvino/runtime/opset3/ops.py index 979fda8a782a02..8a1d81d9703ffb 100644 --- a/src/bindings/python/src/openvino/runtime/opset3/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset3/ops.py @@ -575,9 +575,9 @@ def shuffle_channels(data: Node, axis: int, group: int, name: Optional[str] = No `data_reshaped` = reshape(`data`, [N, group, C / group, H * W]) - `data_trnasposed` = transpose(`data_reshaped`, [0, 2, 1, 3]) + `data_transposed` = transpose(`data_reshaped`, [0, 2, 1, 3]) - `output` = reshape(`data_trnasposed`, [N, C, H, W]) + `output` = reshape(`data_transposed`, [N, C, H, W]) For example: diff --git a/src/bindings/python/src/openvino/runtime/utils/types.py b/src/bindings/python/src/openvino/runtime/utils/types.py index 5eeeb021a7c724..aa986d4f873c9c 100644 --- a/src/bindings/python/src/openvino/runtime/utils/types.py +++ b/src/bindings/python/src/openvino/runtime/utils/types.py @@ -23,6 +23,7 @@ openvino_to_numpy_types_map = [ (Type.boolean, bool), + (Type.boolean, np.bool_), (Type.f16, np.float16), (Type.f32, np.float32), (Type.f64, np.float64), @@ -39,6 +40,7 @@ openvino_to_numpy_types_str_map = [ ("boolean", bool), + ("boolean", np.bool_), ("f16", np.float16), ("f32", np.float32), ("f64", np.float64), diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp index b78d3ea4c37bff..7b473929a63396 100644 --- a/src/bindings/python/src/pyopenvino/core/common.cpp +++ b/src/bindings/python/src/pyopenvino/core/common.cpp @@ -8,6 +8,7 @@ #include "Python.h" #include "openvino/core/except.hpp" +#include "openvino/runtime/shared_buffer.hpp" #include "openvino/util/common_util.hpp" #define C_CONTIGUOUS py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_ @@ -170,13 +171,12 @@ ov::op::v0::Constant create_copied(ov::Tensor& tensor) { return ov::op::v0::Constant(tensor.get_element_type(), tensor.get_shape(), const_cast(tensor.data())); } -OPENVINO_SUPPRESS_DEPRECATED_START template <> ov::op::v0::Constant create_shared(py::array& array) { // Check if passed array has C-style contiguous memory layout. // If memory is going to be shared it needs to be contiguous before passing to the constructor. if (array_helpers::is_contiguous(array)) { - auto memory = std::make_shared>( + auto memory = std::make_shared>( static_cast(array.ndim() == 0 ? array.mutable_data() : array.mutable_data(0)), array.ndim() == 0 ? array.itemsize() : array.nbytes(), array); @@ -185,7 +185,6 @@ ov::op::v0::Constant create_shared(py::array& array) { // If passed array is not C-style, throw an error. OPENVINO_THROW("SHARED MEMORY MODE FOR THIS CONSTANT IS NOT APPLICABLE! Passed numpy array must be C contiguous."); } -OPENVINO_SUPPRESS_DEPRECATED_END template <> ov::op::v0::Constant create_shared(ov::Tensor& tensor) { diff --git a/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp b/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp index a1136e4cda6f66..024b03b2ff4cd9 100644 --- a/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp +++ b/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp @@ -34,6 +34,10 @@ class PyDecoder : public ov::frontend::pytorch::TorchDecoder { PYBIND11_OVERRIDE_PURE(ov::PartialShape, TorchDecoder, get_input_shape, index); } + const std::vector& get_input_strides(size_t index) const override { + PYBIND11_OVERRIDE_PURE(const std::vector&, TorchDecoder, get_input_strides, index); + } + ov::Any get_input_type(size_t index) const override { PYBIND11_OVERRIDE_PURE(ov::Any, TorchDecoder, get_input_type, index); } diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_onnx_external_data.py b/src/bindings/python/tests_compatibility/test_onnx/test_onnx_external_data.py index ec8f6c49e7ffb6..025c438fedf5d2 100644 --- a/src/bindings/python/tests_compatibility/test_onnx/test_onnx_external_data.py +++ b/src/bindings/python/tests_compatibility/test_onnx/test_onnx_external_data.py @@ -1,15 +1,19 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform import os import numpy as np import ngraph as ng +import pytest from openvino.inference_engine import IECore from tests_compatibility.runtime import get_runtime +@pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122712') def test_import_onnx_with_external_data(): model_path = os.path.join(os.path.dirname(__file__), "models/external_data.onnx") ie = IECore() diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_ops_nonlinear.py b/src/bindings/python/tests_compatibility/test_onnx/test_ops_nonlinear.py index 60ab593d097250..7b1ebc7295ce96 100644 --- a/src/bindings/python/tests_compatibility/test_onnx/test_ops_nonlinear.py +++ b/src/bindings/python/tests_compatibility/test_onnx/test_ops_nonlinear.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import onnx import pytest @@ -45,6 +47,8 @@ def relu(x): assert_onnx_import_equals_callable("Relu", relu, [[-3, -2, -1], [1, 2, 3]]) +@pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122712') def test_leaky_relu(): def leaky_relu(x, alpha=0.01): return np.maximum(alpha * x, x) @@ -79,6 +83,8 @@ def parametic_relu(x, slope): assert np.allclose(output, expected_output) +@pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122712') def test_selu(): # f(x) = gamma * (alpha * exp(x) - alpha) for x <= 0, y = gamma * x for x > 0 def selu(x, alpha=1.67326319217681884765625, gamma=1.05070102214813232421875): diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_ops_unary.py b/src/bindings/python/tests_compatibility/test_onnx/test_ops_unary.py index ddbd8dd53e4a4a..ad7b8e8ffbaf85 100644 --- a/src/bindings/python/tests_compatibility/test_onnx/test_ops_unary.py +++ b/src/bindings/python/tests_compatibility/test_onnx/test_ops_unary.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import onnx import onnx.mapping @@ -210,6 +212,8 @@ def hardmax_2d(data): assert np.allclose(ng_results, [expected]) +@pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122712') def test_hardsigmoid(): def hardsigmoid(data, alpha=0.2, beta=0.5): return np.clip(alpha * data + beta, 0, 1) @@ -447,6 +451,8 @@ def test_cast_errors(): @pytest.mark.parametrize("value_type", [pytest.param(np.float64), pytest.param(np.float32)]) +@pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122712') def test_constant(value_type): values = np.random.randn(5, 5).astype(value_type) node = onnx.helper.make_node( diff --git a/src/common/offline_transformations/src/compress_quantize_weigths.cpp b/src/common/offline_transformations/src/compress_quantize_weigths.cpp index 6c9e4554782a96..2b0687c86cde53 100644 --- a/src/common/offline_transformations/src/compress_quantize_weigths.cpp +++ b/src/common/offline_transformations/src/compress_quantize_weigths.cpp @@ -357,7 +357,7 @@ static void compute_scale_and_zero_point_internal(const std::shared_ptr::epsilon(); *zero_point++ = zero_point_value; diff --git a/src/common/snippets/include/snippets/emitter.hpp b/src/common/snippets/include/snippets/emitter.hpp index e1ff08abbf7da2..a2aa4923c2eef4 100644 --- a/src/common/snippets/include/snippets/emitter.hpp +++ b/src/common/snippets/include/snippets/emitter.hpp @@ -12,7 +12,6 @@ namespace ov { namespace snippets { -using code = const uint8_t *; using RegInfo = std::pair, std::vector>; /** diff --git a/src/common/snippets/include/snippets/generator.hpp b/src/common/snippets/include/snippets/generator.hpp index b0d30f602a5a88..32b44b9e6abc81 100644 --- a/src/common/snippets/include/snippets/generator.hpp +++ b/src/common/snippets/include/snippets/generator.hpp @@ -11,12 +11,32 @@ #include "snippets_isa.hpp" #include "snippets/lowered/linear_ir.hpp" -#include "snippets/lowered/pass/pass.hpp" #include "snippets/shape_types.hpp" +#include "target_machine.hpp" namespace ov { namespace snippets { + +class Generator; +/** + * @interface LoweringResult + * @brief Holds all relevant information produced during lowering + * @param compiled_snippet pointer to interface class that encapsulates compiled binary code + * @param buffer_scratchpad_size the amount of additional memory required by the binary code to execute. + * Must be allocated and freed by the backend. + */ +class LoweringResult { + friend class Generator; + // Some emitters rely on other precompiled kernels. + // We need to keep the pointers to such emitters alive, so the kernels would still be accessible at runtime. + std::vector> m_saved_emitters{}; + +public: + std::shared_ptr compiled_snippet = nullptr; + size_t buffer_scratchpad_size = 0; +}; + /** * @interface Schedule * @brief Return scheduling information and pointer to generated kernel code @@ -26,20 +46,21 @@ class Schedule { public: Schedule() = default; /** - * @brief Default to create schedule out of specific parameters - * @param wd work domain for kernel execution - * @param p pointer to generated code + * @brief Create schedule out of specific parameters + * @param domain work domain for kernel execution + * @param lr lowering result produced during code generation */ - Schedule(const VectorDims& wd, code p) : parallel_exec_domain(wd), ptr(p) {} + Schedule(std::vector&& domain, LoweringResult&& lr) : parallel_exec_domain(domain), lowering_result(lr) {} + Schedule(std::vector domain, LoweringResult&& lr) : parallel_exec_domain(std::move(domain)), lowering_result(lr) {} /** * @brief Returns callable instanse of code pointer */ template K get_callable() const { - return reinterpret_cast(const_cast(ptr)); + return reinterpret_cast(const_cast(lowering_result.compiled_snippet->get_code())); } VectorDims parallel_exec_domain {}; - code ptr {nullptr}; + LoweringResult lowering_result {}; }; /** @@ -52,7 +73,7 @@ class Generator { /** * @brief Default constructor */ - Generator(const std::shared_ptr& t) : target(t), lowered_saved{} {} + Generator(const std::shared_ptr& t) : target(t) {} /** * @brief Default destructor */ @@ -62,17 +83,13 @@ class Generator { * @brief Allows to tweak the lowering process. */ /** - * @brief virtual method any specific implementation should implement - * @param m model in canonical for for table-based code generation - * @param config config with transformation and optimization parameters - * @param compile_params parameters for generated code - * @return pointer to generated code + * @brief generates executable code + * @param linear_ir lowered IR for code generation + * @param result variable to hande the result, only compiled_snippet and m_saved_emitters field will be modified + * @param compile_params compile-time parameters used for code generation + * @return void */ - struct LoweringResult { - LoweringResult(code c) : binary_code(c) {} - code binary_code = nullptr; - }; - LoweringResult generate(lowered::LinearIR& linear_ir, const lowered::Config& config, const void* compile_params = nullptr); + void generate(lowered::LinearIR& linear_ir, LoweringResult& result, const void* compile_params = nullptr) const; /** * @brief gets target machine @@ -96,17 +113,21 @@ class Generator { */ opRegType get_op_reg_type(const std::shared_ptr& op) const; + virtual std::shared_ptr clone() const = 0; + protected: /** * @brief gets register type by specific plugin op type * @return register type */ virtual opRegType get_specific_op_reg_type(const std::shared_ptr& op) const; + /** + * @brief returns true if an emitter can use precompiled kernel. + * @return bool + */ + virtual bool uses_precompiled_kernel(const std::shared_ptr& emitter) const { return false; } std::shared_ptr target; - // todo: we need to save lowered code to access compiled brgemm kernels on execution time (normally lowered is destructed by then). - // This is temporary solution, remove this when kernel caching is implemented. Don't forget to make generate const method. - lowered::LinearIR lowered_saved; }; } // namespace snippets diff --git a/src/common/snippets/include/snippets/lowered/expression.hpp b/src/common/snippets/include/snippets/lowered/expression.hpp index c5a1b2b8cb6f5e..289e52e0f59a73 100644 --- a/src/common/snippets/include/snippets/lowered/expression.hpp +++ b/src/common/snippets/include/snippets/lowered/expression.hpp @@ -74,7 +74,6 @@ class Expression : public std::enable_shared_from_this { std::vector m_loop_ids{}; std::shared_ptr m_shapeInference{nullptr}; }; -using ExpressionPtr = std::shared_ptr; class IOExpression : public Expression { friend class LinearIR; diff --git a/src/common/snippets/include/snippets/lowered/expression_factory.hpp b/src/common/snippets/include/snippets/lowered/expression_factory.hpp index bb238356dfa9d2..f179abf746c313 100644 --- a/src/common/snippets/include/snippets/lowered/expression_factory.hpp +++ b/src/common/snippets/include/snippets/lowered/expression_factory.hpp @@ -27,6 +27,13 @@ class LinearIR::ExpressionFactory { } return create(n, params...); } + template::value, bool>::type = true> + static ExpressionPtr shallow_copy(const std::shared_ptr& expr) { + if (const auto& io_expr = std::dynamic_pointer_cast(expr)) + return std::make_shared(*io_expr); + else + return std::make_shared(*expr); + } private: /* -- Default Builders - initialize input port connectors from parents and create new output port connectors themselves */ diff --git a/src/common/snippets/include/snippets/lowered/linear_ir.hpp b/src/common/snippets/include/snippets/lowered/linear_ir.hpp index 8b6a320e18cad7..6d4a357914da39 100644 --- a/src/common/snippets/include/snippets/lowered/linear_ir.hpp +++ b/src/common/snippets/include/snippets/lowered/linear_ir.hpp @@ -116,6 +116,7 @@ class LinearIR { IShapeInferSnippets::Result shape_infer(const std::vector& input_shapes); const std::shared_ptr& get_shape_infer_instance() const {return m_shape_infer; } VectorDims get_master_shape() const; + LinearIR deep_copy() const; private: std::shared_ptr m_shape_infer = nullptr; diff --git a/src/common/snippets/include/snippets/lowered/pass/insert_broadcastmove.hpp b/src/common/snippets/include/snippets/lowered/pass/insert_broadcastmove.hpp new file mode 100644 index 00000000000000..fe4f9956d81c66 --- /dev/null +++ b/src/common/snippets/include/snippets/lowered/pass/insert_broadcastmove.hpp @@ -0,0 +1,28 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "pass.hpp" + +namespace ov { +namespace snippets { +namespace lowered { +namespace pass { + +/** + * @interface InsertMovebroadcast + * @brief Injects explicit Movebroadcast operations when the most varying dim is broadcasted + * @ingroup snippets + */ +class InsertBroadcastMove : public Pass { +public: + OPENVINO_RTTI("InsertBroadcastMove", "Pass") + bool run(LinearIR& linear_ir) override; +}; + +} // namespace pass +} // namespace lowered +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp b/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp index 8b5634ebb29fa4..795dc0d3725f1c 100644 --- a/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp @@ -18,8 +18,8 @@ namespace pass { */ class SoftmaxDecomposition : public Pass { public: - explicit SoftmaxDecomposition(size_t vector_size); OPENVINO_RTTI("SoftmaxDecomposition", "Pass") + explicit SoftmaxDecomposition(size_t vector_size); bool run(LinearIR& linear_ir) override; private: diff --git a/src/common/snippets/include/snippets/lowered/pass/validate_shapes.hpp b/src/common/snippets/include/snippets/lowered/pass/validate_shapes.hpp new file mode 100644 index 00000000000000..08243c96beedf5 --- /dev/null +++ b/src/common/snippets/include/snippets/lowered/pass/validate_shapes.hpp @@ -0,0 +1,31 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "pass.hpp" + +#include "snippets/lowered/loop_manager.hpp" + +namespace ov { +namespace snippets { +namespace lowered { +namespace pass { + +/** + * @interface ValidateShapes + * @brief The pass checks that there are no dynamic shapes in the IR + * @ingroup snippets + */ +class ValidateShapes : public Pass { +public: + OPENVINO_RTTI("ValidateShapes", "Pass") + ValidateShapes() = default; + bool run(LinearIR& linear_ir) override; +}; + +} // namespace pass +} // namespace lowered +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/op/rank_normalization.hpp b/src/common/snippets/include/snippets/op/rank_normalization.hpp new file mode 100644 index 00000000000000..c1ed530ce05832 --- /dev/null +++ b/src/common/snippets/include/snippets/op/rank_normalization.hpp @@ -0,0 +1,54 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/op.hpp" +#include "snippets/shape_inference/shape_inference.hpp" + +namespace ov { +namespace snippets { +namespace op { + +/** + * @interface RankNormalization + * @brief Generated by Canonicalization for rank normalization purposes. It can prepend input shapes with seve1s only first or last dimensions. + * @arg num_prepend - num `1`s that will be inserted at the beginning of the input shape. Any value is allowed. + * @arg num_append - num `1`s that will be inserted at the end of the input shape. Could be either 0 (default) or 1; + * @ingroup snippets + */ + // Note that technically the same goal could be achieved using op::Unsqueeze operation, + // but RankNormalization has a much narrower semantics, and hence allows for an easier control and a more efficient shape infer. + // +class RankNormalization : public ov::op::Op { +public: + OPENVINO_OP("RankNormalization", "SnippetsOpset"); + + RankNormalization() = default; + RankNormalization(const Output& data, size_t num_prepend, size_t num_append); + + void validate_and_infer_types() override; + bool visit_attributes(AttributeVisitor& visitor) override; + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + + size_t get_num_append() const { return m_num_append; } + size_t get_num_prepend() const { return m_num_prepend; } + + class ShapeInfer : public IShapeInferSnippets { + size_t m_num_prepend = 0; + size_t m_num_append = 0; + public: + explicit ShapeInfer(const std::shared_ptr& n); + IShapeInferSnippets::Result + infer(const std::vector& input_shapes) override; + }; + +private: + size_t m_num_prepend = 0; + size_t m_num_append = 0; +}; + +} // namespace op +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/op/subgraph.hpp b/src/common/snippets/include/snippets/op/subgraph.hpp index a9321e957e273c..b17031e2a67d1c 100644 --- a/src/common/snippets/include/snippets/op/subgraph.hpp +++ b/src/common/snippets/include/snippets/op/subgraph.hpp @@ -12,6 +12,7 @@ #include "openvino/core/rt_info.hpp" #include "snippets/pass_manager.hpp" #include "snippets/shape_inference/shape_inference.hpp" +#include "snippets/lowered/pass/pass.hpp" #include "snippets/generator.hpp" @@ -68,7 +69,8 @@ class Subgraph : public ov::op::util::SubGraphOp { // // D = < 1, 3, 17, 15, 32> < 0, 1, 2, 3, 4> // E = < 1, 3, 17, 1, 32> < 0, 1, 2, 3, 4> - using BlockedShape = std::tuple; + using Layout = std::vector; + using BlockedShape = std::pair; using BlockedShapeVector = std::vector; Subgraph() = default; @@ -94,43 +96,36 @@ class Subgraph : public ov::op::util::SubGraphOp { const std::shared_ptr& get_generator() const { return m_generator; } std::shared_ptr& get_generator() { return m_generator; } - size_t get_buffer_scratchpad_size() const { return m_buffer_scratchpad; } size_t get_virtual_port_count() const { return m_virtual_port_count; } bool is_quantized() const { return config.m_is_quantized; } bool has_domain_sensitive_ops() const { return config.m_has_domain_sensitive_ops; } - snippets::Schedule generate(const BlockedShapeVector& output_shapes, - const BlockedShapeVector& input_shapes, - const std::vector& data_flow_passes, - const lowered::pass::PassPipeline& control_flow_passes_pre_common, - const lowered::pass::PassPipeline& control_flow_passes_post_common, - const std::shared_ptr& shape_infer_factory = nullptr, - const void* compile_params = nullptr); - snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, const void* compile_params = nullptr); - snippets::Schedule generate(const std::vector& data_flow_passes, - const lowered::pass::PassPipeline& control_flow_passes_pre_common, - const lowered::pass::PassPipeline& control_flow_passes_post_common, - const std::shared_ptr& shape_infer_factory = nullptr, + + snippets::Schedule generate(const BlockedShapeVector& blocked_input_shapes = {}, + const std::vector& input_precisions = {}, + const std::vector& output_precisions = {}, + const std::vector& data_flow_passes = {}, + const lowered::pass::PassPipeline& control_flow_passes_pre_common = {}, + const lowered::pass::PassPipeline& control_flow_passes_post_common = {}, + const std::shared_ptr& factory = nullptr, const void* compile_params = nullptr); - snippets::Schedule generate(const void* compile_params = nullptr); - ov::PartialShape canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes); - ov::PartialShape canonicalized_body_shape_infer(const BlockedShapeVector& input_shapes); - std::vector reshape_body(const std::vector& input_shapes); - std::vector reshape_body(const std::vector& input_shapes); + snippets::Schedule generate_from_linear_ir(const lowered::pass::PassPipeline& backend_passes_pre_common = {}, + const lowered::pass::PassPipeline& backend_passes_post_common = {}, + const void* compile_params = nullptr) const; IShapeInferSnippets::Result shape_infer(const std::vector& input_shapes); // plugin sets generator for a snippet to some specific generator. // it's going to be replaced with Jitters table later void set_generator(std::shared_ptr generator); void set_tile_rank(size_t newRank) {tileRank = newRank;} - void set_virtual_port_count(const size_t count); - void set_min_jit_work_amount(const size_t jit_work_amount); - void set_min_parallel_work_amount(const size_t parallel_work_amount); + void set_virtual_port_count(size_t count); + void set_min_jit_work_amount(size_t jit_work_amount); + void set_min_parallel_work_amount(size_t parallel_work_amount); void print() const; void serialize() const; - void set_master_shape(ov::PartialShape new_shape) {master_shape = std::move(new_shape);} + VectorDims infer_master_shape(); static auto wrap_node_as_subgraph(const std::shared_ptr& node) -> std::shared_ptr; static void fill_empty_output_names(const Output& target_output_node, const Output& replacement_output_node); @@ -143,28 +138,30 @@ class Subgraph : public ov::op::util::SubGraphOp { // Return estimated unique buffer count (upper bound). It's needed for tokenization static auto get_estimated_buffer_count(const ov::NodeVector& ops) -> size_t; static auto is_domain_sensitive_op(const std::shared_ptr& op) -> bool; + + void data_flow_transformations(const BlockedShapeVector& blocked_input_shapes = {}, + const std::vector& input_precisions = {}, + const std::vector& output_precisions = {}, + const std::vector& = {}); std::shared_ptr - convert_body_to_linear_ir(const std::shared_ptr& shape_infer_factory = std::make_shared()) const; + convert_body_to_linear_ir(const std::shared_ptr& shape_infer_factory = std::make_shared()); + std::shared_ptr clone() const; private: - void align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes); - void data_flow_transformations(const std::vector& backend_passes); void control_flow_transformations(lowered::LinearIR& linear_ir, + LoweringResult& lowering_result, const lowered::pass::PassPipeline& backend_passes_pre_common, - const lowered::pass::PassPipeline& backend_passes_post_common); + const lowered::pass::PassPipeline& backend_passes_post_common) const; void init_config(); // Count of Subgraph virtual ports: // - Potential non-scalar Constants that will be created after some transformations (At the moment it's relevant only for FakeQuantize decomposition) // NOTE: To avoid overheads in each calculation of this count (for example, in validate_and_type_infer()), // we should MANUALLY calculate it where it needed. size_t m_virtual_port_count = 0; - size_t m_buffer_scratchpad = 0lu; Shape exec_domain = {}; std::shared_ptr m_generator = nullptr; - ov::PartialShape master_shape; size_t tileRank = 0; // set by plugin to specify the number of dimensions processed in a single kernel call - size_t maxInputRank = 0; std::vector appendOnesForCanonical; std::shared_ptr m_linear_ir = nullptr; diff --git a/src/common/snippets/include/snippets/pass/align_element_types.hpp b/src/common/snippets/include/snippets/pass/align_element_types.hpp new file mode 100644 index 00000000000000..9a8a5ff880aeab --- /dev/null +++ b/src/common/snippets/include/snippets/pass/align_element_types.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/pass.hpp" +#include "transformations_visibility.hpp" +#include "snippets/op/subgraph.hpp" + +namespace ov { +namespace snippets { +namespace pass { + +/** + * @interface AlignElementTypes + * @brief Align body precision with expected input/output precision. Insert op::ConvertSaturation if necessary. + * @ingroup snippets + */ +class AlignElementTypes: public ov::pass::ModelPass { +public: + OPENVINO_RTTI("AlignElementTypes"); + AlignElementTypes(std::vector input_precisions, + std::vector output_precisions); + bool run_on_model(const std::shared_ptr& m) override; + +private: + std::vector m_input_precisions; + std::vector m_output_precisions; +}; + +} // namespace pass +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/pass/canonicalization.hpp b/src/common/snippets/include/snippets/pass/canonicalization.hpp new file mode 100644 index 00000000000000..f57218328ca57c --- /dev/null +++ b/src/common/snippets/include/snippets/pass/canonicalization.hpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/pass.hpp" +#include "transformations_visibility.hpp" +#include "snippets/op/subgraph.hpp" +#include "snippets/shape_types.hpp" + +namespace ov { +namespace snippets { +namespace pass { + +/** + * @interface Canonicalization + * @brief Canonicalization inserts RankNormalization (ov::op::Unsqueeze analogue) operations to account for: + * - input ranks mismatch, then inputs with smaller ranks are prepeneded with 1 + * - layouts mismatch (only planar + blocked is supported), planar shapes are postpended with 1 + * @ingroup snippets + */ +class Canonicalization: public ov::pass::ModelPass { +public: + OPENVINO_RTTI("Canonicalization"); + using BlockedShapeVector = op::Subgraph::BlockedShapeVector; + using Layout = std::vector; + explicit Canonicalization(const BlockedShapeVector& blocked_input_shapes); + bool run_on_model(const std::shared_ptr& m) override; + +private: + std::vector m_in_shapes; + std::vector m_in_layouts; + bool m_has_dynamic_inputs = false; +}; + +} // namespace pass +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/snippets_isa.hpp b/src/common/snippets/include/snippets/snippets_isa.hpp index 87579feebb1796..ba85ae68eeb634 100644 --- a/src/common/snippets/include/snippets/snippets_isa.hpp +++ b/src/common/snippets/include/snippets/snippets_isa.hpp @@ -24,6 +24,7 @@ #include "op/loop.hpp" #include "op/brgemm.hpp" #include "op/vector_buffer.hpp" +#include "op/rank_normalization.hpp" namespace ov { namespace snippets { diff --git a/src/common/snippets/include/snippets/snippets_isa_tbl.hpp b/src/common/snippets/include/snippets/snippets_isa_tbl.hpp index b0a87a8a82a1f9..351770bdab746f 100644 --- a/src/common/snippets/include/snippets/snippets_isa_tbl.hpp +++ b/src/common/snippets/include/snippets/snippets_isa_tbl.hpp @@ -22,6 +22,7 @@ OV_OP(Store, ov::snippets::op) OV_OP(BroadcastMove, ov::snippets::op) OV_OP(Scalar, ov::snippets::op) OV_OP(Nop, ov::snippets::op) +OV_OP(RankNormalization, ov::snippets::op) // Layout-oblivious from opset1 diff --git a/src/common/snippets/include/snippets/target_machine.hpp b/src/common/snippets/include/snippets/target_machine.hpp index a4d15463f2972a..d42779bcd7153c 100644 --- a/src/common/snippets/include/snippets/target_machine.hpp +++ b/src/common/snippets/include/snippets/target_machine.hpp @@ -13,6 +13,15 @@ namespace ov { namespace snippets { + +struct CompiledSnippet { + virtual const uint8_t* get_code() const = 0; + virtual size_t get_code_size() const = 0; + virtual bool empty() const = 0; + virtual ~CompiledSnippet() = default; +}; +using CompiledSnippetPtr = std::shared_ptr; + typedef std::pair(const lowered::ExpressionPtr&)>, std::function(const std::shared_ptr&)>> jitters_value; @@ -33,7 +42,7 @@ class TargetMachine { * @brief finalizes code generation * @return generated kernel binary */ - virtual code get_snippet() const = 0; + virtual CompiledSnippetPtr get_snippet() = 0; /** * @brief gets number of lanes supported by target's vector ISA diff --git a/src/common/snippets/include/snippets/utils.hpp b/src/common/snippets/include/snippets/utils.hpp index 525de3e03b2118..d10930125e0ed0 100644 --- a/src/common/snippets/include/snippets/utils.hpp +++ b/src/common/snippets/include/snippets/utils.hpp @@ -58,6 +58,7 @@ constexpr inline bool implication(bool cause, bool cond) { VectorDims get_planar_vdims(const VectorDims& shape, const std::vector& layout); VectorDims get_planar_vdims(const snippets::lowered::PortDescriptorPtr& port_desc); VectorDims get_planar_vdims(const snippets::lowered::ExpressionPort& expr_port); +bool is_dynamic_vdims(const VectorDims& shape); } // namespace utils } // namespace snippets diff --git a/src/common/snippets/src/generator.cpp b/src/common/snippets/src/generator.cpp index 1d1d733277f99b..cede4c4a6e532c 100644 --- a/src/common/snippets/src/generator.cpp +++ b/src/common/snippets/src/generator.cpp @@ -15,7 +15,7 @@ namespace ov { namespace snippets { -Generator::LoweringResult Generator::generate(lowered::LinearIR& linear_ir, const lowered::Config& config, const void* compile_params) { +void Generator::generate(lowered::LinearIR& linear_ir, LoweringResult& result, const void* compile_params) const { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::Generator::generate") OV_ITT_TASK_CHAIN(GENERATE, ov::pass::itt::domains::SnippetsTransform, "Snippets::Generator", "::Transformations") if (!target->is_supported()) @@ -28,7 +28,6 @@ Generator::LoweringResult Generator::generate(lowered::LinearIR& linear_ir, cons lowered_pipeline.register_pass(reg_type_mapper); lowered_pipeline.register_pass(); lowered_pipeline.run(linear_ir); - linear_ir.init_emitters(target); OV_ITT_TASK_NEXT(GENERATE, "::EmitCode") @@ -45,12 +44,15 @@ Generator::LoweringResult Generator::generate(lowered::LinearIR& linear_ir, cons } OV_ITT_TASK_NEXT(GENERATE, "::GetSnippet") - // todo: we save lowered to access compiled brgemm kernels on execution time (normally lowered is destructed by then) - // remove this when kernel caching is implemented. Don't forget to make generate const method. - if (config.m_save_expressions) - lowered_saved = linear_ir; - - return { target->get_snippet() }; + // Note: some emitters use precompiled kernels. They need to be saved, so the kernels are accessible at runtime. + if (linear_ir.get_config().m_save_expressions) { + for (const auto& expr : linear_ir) { + const auto& emitter = expr->get_emitter(); + if (uses_precompiled_kernel(emitter)) + result.m_saved_emitters.emplace_back(emitter); + } + } + result.compiled_snippet = target->get_snippet(); } std::shared_ptr Generator::get_target_machine() const { @@ -63,7 +65,8 @@ Generator::opRegType Generator::get_op_reg_type(const std::shared_ptr& op) std::dynamic_pointer_cast(op) || std::dynamic_pointer_cast(op) || std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op)) + std::dynamic_pointer_cast(op) || + std::dynamic_pointer_cast(op)) return gpr2gpr; else if (std::dynamic_pointer_cast(op) || std::dynamic_pointer_cast(op)) diff --git a/src/common/snippets/src/lowered/linear_ir.cpp b/src/common/snippets/src/lowered/linear_ir.cpp index cc0ace467dd6e3..adf3894f71b8b7 100644 --- a/src/common/snippets/src/lowered/linear_ir.cpp +++ b/src/common/snippets/src/lowered/linear_ir.cpp @@ -122,6 +122,59 @@ LinearIR::container LinearIR::deep_copy_range(LinearIR::container::const_iterato return result; } +LinearIR LinearIR::deep_copy() const { + // todo: implement the same functionality using standard copy constructor + auto clone_ports_descriptors = [](std::vector& ports) { + std::for_each(ports.begin(), ports.end(), [](PortDescriptorPtr& pd) { pd = pd->clone(); }); + }; + const auto& original_lir = *this; + LinearIR new_lir; + new_lir.m_config = original_lir.m_config; + new_lir.m_shape_infer = original_lir.m_shape_infer; + NodeVector original_nodes; + original_nodes.reserve(original_lir.m_expressions.size()); + std::unordered_map connectors_map; + for (const auto& orig_expr : original_lir) { + original_nodes.push_back(orig_expr->get_node()); + const auto& copy_expr = ExpressionFactory::shallow_copy(orig_expr); + clone_ports_descriptors(copy_expr->m_input_port_descriptors); + clone_ports_descriptors(copy_expr->m_output_port_descriptors); + + for (auto& orig_con : copy_expr->m_output_port_connectors) { + const auto& copy_source = copy_expr->get_output_port(orig_con->get_source().get_index()); + const auto& copy_con = std::make_shared(copy_source); + connectors_map[orig_con] = copy_con; + orig_con = copy_con; + } + for (size_t i = 0; i < copy_expr->get_input_count(); i++) { + const auto& copy_connector = connectors_map[copy_expr->get_input_port_connector(i)]; + const auto& copy_consumer = copy_expr->get_input_port(i); + copy_connector->add_consumer(copy_consumer); + copy_expr->replace_input(i, copy_connector); + } + + if (auto io_expr = std::dynamic_pointer_cast(copy_expr)) + new_lir.m_io_expressions.push_back(io_expr); + new_lir.m_expressions.push_back(copy_expr); + } + // node_map and expr_map map original node pointer (expression) to a new pointer (expression) + ngraph::NodeMap node_map; + OPENVINO_SUPPRESS_DEPRECATED_START + ngraph::clone_nodes(original_nodes, node_map); + OPENVINO_SUPPRESS_DEPRECATED_END + new_lir.m_node2expression_map.clear(); + for (const auto& copy_expr : new_lir.m_expressions) { + copy_expr->m_source_node = node_map[copy_expr->m_source_node.get()]; + new_lir.m_node2expression_map[copy_expr->m_source_node] = copy_expr; + } + new_lir.m_loop_manager = std::make_shared(); + // It's Ok to share shapeInfer factory, since LIR doesn't change it + new_lir.m_shape_infer_factory = m_shape_infer_factory; + // Note: shapeInfer stores expression pointers. we re-create it, so shape inference is performed on cloned exprs. + new_lir.m_shape_infer = std::make_shared(new_lir.m_expressions, new_lir.m_io_expressions); + return new_lir; +} + void LinearIR::debug_print(bool tds_as_pointers) const { auto print_rinfo = [](const RegInfo& rinfo) { std::cerr << " : {"; @@ -320,7 +373,7 @@ VectorDims LinearIR::get_master_shape() const { for (const auto& oe : out_exprs) { const auto& port_desc = oe->get_input_port_descriptor(0); OPENVINO_ASSERT(ov::snippets::broadcast_merge_into(master_shape, port_desc->get_shape()), - "Failed to merge input shapes in OptimizeDomain pass"); + "Failed to merge input shapes in infer_master_shape"); } } return master_shape; @@ -339,6 +392,19 @@ LinearIR::LIRShapeInfer::LIRShapeInfer(container& body_exprs, io_container& io_e OPENVINO_THROW("Invalid io expression type detected"); } } + // Note that if all output shapes are static, as in the case when the first shape infer was performed on nGraph, + // we can treat them as the last result + std::vector outputDims; + outputDims.reserve(m_output_exprs.size()); + for (const auto& expr : m_output_exprs) { + const auto &shape = expr->get_input_port_descriptor(0)->get_shape(); + if (utils::is_dynamic_vdims(shape)) { + outputDims.clear(); + break; + } + outputDims.push_back(shape); + } + m_last_result = {outputDims, ShapeInferStatus::success}; } IShapeInferSnippets::Result LinearIR::LIRShapeInfer::infer(const std::vector& input_shapes) { diff --git a/src/common/snippets/src/lowered/pass/assign_registers.cpp b/src/common/snippets/src/lowered/pass/assign_registers.cpp index 638845ec6929ad..7755cfebe7cc38 100644 --- a/src/common/snippets/src/lowered/pass/assign_registers.cpp +++ b/src/common/snippets/src/lowered/pass/assign_registers.cpp @@ -46,12 +46,21 @@ bool AssignRegisters::run(LinearIR& linear_ir) { for (const auto& expr : expressions) { auto op = expr->get_node(); if (const auto io_expr = std::dynamic_pointer_cast(expr)) { - if (io_expr->get_type() == IOExpression::io_type::INPUT) - manually_assigned_gprs[expr->get_output_port_connector(0)] = io_expr->get_index(); - else if (io_expr->get_type() == IOExpression::io_type::OUTPUT) + if (io_expr->get_type() == IOExpression::io_type::INPUT) { + const auto& out_connector = expr->get_output_port_connector(0); + manually_assigned_gprs[out_connector] = io_expr->get_index(); + const auto& consumer_inputs = out_connector->get_consumers(); + const auto& first_consumer = consumer_inputs.begin()->get_expr(); + // TODO [96434]: Support RankNormalization (Reshape) in arbitrary place in pipeline, not just after inputs + if (ov::is_type(first_consumer->get_node())) { + OPENVINO_ASSERT(consumer_inputs.size() == 1, "RankNormalization is supposed to be the only consumer"); + manually_assigned_gprs[first_consumer->get_output_port_connector(0)] = io_expr->get_index(); + } + } else if (io_expr->get_type() == IOExpression::io_type::OUTPUT) { manually_assigned_gprs[expr->get_input_port_connector(0)] = num_parameters + io_expr->get_index(); - else + } else { OPENVINO_THROW("Unsupported io_type detected"); + } } else if (const auto& buffer = ov::as_type_ptr(op)) { const auto buffer_id = buffer->get_id(); // All buffers have one common data pointer diff --git a/src/common/snippets/src/lowered/pass/insert_broadcastmove.cpp b/src/common/snippets/src/lowered/pass/insert_broadcastmove.cpp new file mode 100644 index 00000000000000..a70698580a61e3 --- /dev/null +++ b/src/common/snippets/src/lowered/pass/insert_broadcastmove.cpp @@ -0,0 +1,90 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/lowered/pass/insert_broadcastmove.hpp" +#include "snippets/utils.hpp" +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_manager.hpp" +#include "snippets/snippets_isa.hpp" +#include "snippets/itt.hpp" + +namespace ov { +namespace snippets { +namespace lowered { +namespace pass { + +bool InsertBroadcastMove::run(LinearIR& linear_ir) { + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::InsertBroadcastMove") + bool modified = false; + const auto& loop_manager = linear_ir.get_loop_manager(); + + auto supports_broadcasting = [](const std::shared_ptr& n) { + return ov::op::util::supports_auto_broadcast(n) || + n->get_autob().m_type == ov::op::AutoBroadcastType::NUMPY || + is_type(n); + }; + auto dont_need_broadcasting = [](const ov::Output& v){ + // We don't need to insert BroadcastMove after the following operations: + // - Scalar has emitter with explicit broadcasting + // - VectorBuffer has scalar output shape to avoid broadcast conflicts and manually shape insertion. + // - Fill can be inserted only after VectorBuffer, and should be ignored as well. + return utils::is_scalar_constant(v.get_node_shared_ptr()) || + ov::is_type(v.get_node_shared_ptr()) || + ov::is_type(v.get_node_shared_ptr()); + }; + for (auto expr_it = linear_ir.begin(); expr_it != linear_ir.end(); expr_it++) { + const auto& expr = *expr_it; + const auto& node = expr->get_node(); + const auto& descriptors = expr->get_input_port_descriptors(); + if (!supports_broadcasting(node) || descriptors.size() < 2) + continue; + const auto& connectors = expr->get_input_port_connectors(); + OPENVINO_ASSERT(connectors.size() == descriptors.size(), + "Invalid expression configuration: connectors and descriptors size mismatch"); + + std::vector last_dims(descriptors.size()); + std::transform(descriptors.begin(), descriptors.end(), last_dims.begin(), + [](const std::shared_ptr& d){ + return d->get_shape().back(); + }); + const auto broadcasted_dim = *std::max_element(last_dims.begin(), last_dims.end()); + for (size_t i = 0; i < last_dims.size(); i++) { + const auto& parent_port = connectors[i]->get_source(); + if (last_dims[i] != broadcasted_dim && + !dont_need_broadcasting(parent_port.get_expr()->get_node())) { + OPENVINO_ASSERT(last_dims[i] == 1, + "Attempt to broadcast non-1 dimension. Target dim: ", broadcasted_dim, + " This dim: ", last_dims[i]); + auto input_shape = descriptors[i]->get_shape(); + // Note that input_shape could be empty (aka ngraph scalar), so we can't just replace the last dim + if (input_shape.empty()) + input_shape.resize(1); + input_shape.back() = last_dims[i]; + const auto broadcast = std::make_shared(node->get_input_source_output(i), utils::vdims_to_pshape(input_shape)); + + PortDescriptorUtils::set_port_descriptor_ptr(broadcast->output(0), connectors[i]->get_source().get_descriptor_ptr()->clone()); + const auto broadcast_expr = linear_ir.create_expression(broadcast, {connectors[i]}); + linear_ir.insert(expr_it, broadcast_expr); + linear_ir.replace_input(expr->get_input_port(i), broadcast_expr->get_output_port_connector(0)); + // Note that BroadcastMove modified the next expr input shape, so we need to set update + // expr's input port descriptor to reflect the changes + expr->get_input_port_descriptor(i)->set_shape(broadcast_expr->get_output_port_descriptor(0)->get_shape()); + + // Copy Loop identifies + const auto& loop_ids = expr->get_loop_ids(); + broadcast_expr->set_loop_ids(loop_ids); + loop_manager->update_loops_port(loop_ids, expr->get_input_port(0), {broadcast_expr->get_input_port(0)}, true); + + modified = true; + } + } + } + return modified; +} + +} // namespace pass +} // namespace lowered +} // namespace snippets +} // namespace ov + diff --git a/src/common/snippets/src/lowered/pass/insert_buffers.cpp b/src/common/snippets/src/lowered/pass/insert_buffers.cpp index 91cbe55ef98b34..da5ffc11c3169d 100644 --- a/src/common/snippets/src/lowered/pass/insert_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/insert_buffers.cpp @@ -35,10 +35,9 @@ std::vector get_buffer_loop_ids(const std::vector& lhs, const st ov::Shape compute_allocation_shape(const LinearIR::LoopManagerPtr& loop_manager, const std::vector& buffer_loop_ids, const std::vector& parent_loop_ids, - const ov::Output& parent_output, + const ExpressionPort& expr_port, const int allocation_rank) { - const auto& port = lowered::PortDescriptorUtils::get_port_descriptor_ptr(parent_output); - const auto planar_shape = utils::get_planar_vdims(port); + const auto& planar_shape = utils::get_planar_vdims(expr_port); const size_t rank = allocation_rank >= 0 ? std::min(static_cast(allocation_rank), planar_shape.size()) : planar_shape.size(); ov::Shape allocation_shape(rank); @@ -123,9 +122,9 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt& for (const auto& entry_point : loop_entries) { const auto& entry_port = entry_point.expr_port; const auto& expr = entry_port->get_expr(); - const auto port = entry_port->get_index(); + const auto port_idx = entry_port->get_index(); const auto node = expr->get_node(); - const auto& input_connector = expr->get_input_port_connector(port); + const auto& input_connector = expr->get_input_port_connector(port_idx); const auto& parent_expr_output = input_connector->get_source(); const auto& parent_expr = parent_expr_output.get_expr(); const auto parent_port = parent_expr_output.get_index(); @@ -140,7 +139,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt& const auto parent_ma = ov::as_type_ptr(parent); const auto node_ma = ov::as_type_ptr(node); bool is_buffer_needed = (parent_ma && parent_ma->is_memory_access_output_port(parent_port)) || - (node_ma && node_ma->is_memory_access_input_port(port)); + (node_ma && node_ma->is_memory_access_input_port(port_idx)); const auto current_loops = expr->get_loop_ids(); const auto parent_loops = parent_expr->get_loop_ids(); const auto buffer_loop_ids = get_buffer_loop_ids(current_loops, parent_loops, is_buffer_needed); @@ -154,7 +153,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt& const auto allocation_shape = compute_allocation_shape(loop_manager, buffer_loop_ids, parent_loops, - parent->output(parent_port), + parent_expr_output, m_buffer_allocation_rank); const auto buffer = std::make_shared(parent->output(parent_port), allocation_shape); PortDescriptorUtils::set_port_descriptor_ptr(buffer->output(0), parent_expr_output.get_descriptor_ptr()->clone()); @@ -169,7 +168,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt& for (const auto& exit_point : loop_exits) { const auto& exit_port = exit_point.expr_port; const auto& expr = exit_port->get_expr(); - const auto port = exit_port->get_index(); + const auto port_idx = exit_port->get_index(); const auto node = expr->get_node(); const auto output_connector = exit_port->get_port_connector_ptr(); const auto child_exprs_inputs = output_connector->get_consumers(); @@ -200,7 +199,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt& const auto child_ma = ov::as_type_ptr(child); const auto node_ma = ov::as_type_ptr(node); bool is_buffer_needed = (child_ma && child_ma->is_memory_access_input_port(child_port)) || - (node_ma && node_ma->is_memory_access_output_port(port)); + (node_ma && node_ma->is_memory_access_output_port(port_idx)); const auto local_buffer_loop_ids = get_buffer_loop_ids(current_loops, child_expr->get_loop_ids(), is_buffer_needed); if (is_buffer_needed) { @@ -247,9 +246,9 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt& const auto allocation_shape = compute_allocation_shape(loop_manager, buffer_loop_ids, current_loops, - node->output(port), + *exit_port, m_buffer_allocation_rank); - auto buffer = std::make_shared(node->output(port), allocation_shape); + auto buffer = std::make_shared(node->output(port_idx), allocation_shape); PortDescriptorUtils::set_port_descriptor_ptr(buffer->output(0), exit_port->get_descriptor_ptr()->clone()); // We cannot insert Node output connector on Buffer output because not all consumers of Node needs Buffer // Example: diff --git a/src/common/snippets/src/lowered/pass/insert_load_store.cpp b/src/common/snippets/src/lowered/pass/insert_load_store.cpp index 47fa93f699354b..ff75a5be0e6c5c 100644 --- a/src/common/snippets/src/lowered/pass/insert_load_store.cpp +++ b/src/common/snippets/src/lowered/pass/insert_load_store.cpp @@ -3,7 +3,7 @@ // #include "snippets/lowered/pass/insert_load_store.hpp" - +#include "snippets/op/rank_normalization.hpp" #include "snippets/lowered/linear_ir.hpp" #include "snippets/lowered/loop_manager.hpp" #include "snippets/snippets_isa.hpp" @@ -30,14 +30,18 @@ size_t InsertLoadStore::get_count(const PortDescriptorPtr& port_desc) const { } bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it) { + std::shared_ptr data_expr = *data_expr_it; + auto consumer_inputs = data_expr->get_output_port_connector(0)->get_consumers(); + const auto& first_consumer = consumer_inputs.begin()->get_expr(); + if (is_type(first_consumer->get_node())) { + OPENVINO_ASSERT(consumer_inputs.size() == 1, "RankNormalization is supposed to be the only consumer"); + data_expr = first_consumer; + } const auto& loop_manager = linear_ir.get_loop_manager(); - const auto& data_expr = *data_expr_it; - const auto& data_node = data_expr->get_node(); + const auto& data_ngraph_output = data_expr->get_node()->output(0); const auto& output_connector = data_expr->get_output_port_connector(0); - const auto consumer_inputs = output_connector->get_consumers(); - bool was_inserted = false; - for (const auto& consumer_input : consumer_inputs) { + for (const auto& consumer_input : output_connector->get_consumers()) { const auto& consumer_expr = consumer_input.get_expr(); const auto port = consumer_input.get_index(); const auto& consumer = consumer_expr->get_node(); @@ -46,7 +50,7 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr return false; const auto loop_ids = consumer_expr->get_loop_ids(); - const auto load = std::make_shared(data_node->output(0), get_count(data_expr->get_output_port_descriptor(0))); + const auto load = std::make_shared(data_ngraph_output, get_count(data_expr->get_output_port_descriptor(0))); PortDescriptorUtils::set_port_descriptor_ptr(load->output(0), consumer_input.get_descriptor_ptr()->clone()); const auto load_expr = linear_ir.create_expression(load, {output_connector}); linear_ir.insert(linear_ir.find_after(data_expr_it, consumer_expr), load_expr); @@ -55,7 +59,7 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr load_expr->set_loop_ids(loop_ids); // Need to update all the corresponding Loops with the same Entry Point - const auto prev_entry_point = consumer_input; + const auto& prev_entry_point = consumer_input; const auto new_entry_point = load_expr->get_input_port(0); loop_manager->update_loops_port(loop_ids, prev_entry_point, {new_entry_point}, true); was_inserted = true; @@ -116,20 +120,14 @@ bool InsertLoadStore::run(LinearIR& linear_ir) { const auto& node = expr->get_node(); if (ov::is_type(node)) { modified |= insert_load(linear_ir, expr_it); - continue; - } - if (ov::is_type(node)) { + } else if (ov::is_type(node)) { modified |= insert_store(linear_ir, expr_it); - continue; - } - if (auto buffer = ov::as_type_ptr(node)) { + } else if (auto buffer = ov::as_type_ptr(node)) { modified |= insert_load(linear_ir, expr_it); if (buffer->is_intermediate_memory()) modified |= insert_store(linear_ir, expr_it); - continue; } } - return modified; } diff --git a/src/common/snippets/src/lowered/pass/mark_loops.cpp b/src/common/snippets/src/lowered/pass/mark_loops.cpp index 86246ce61f1be6..05d38e111927c4 100644 --- a/src/common/snippets/src/lowered/pass/mark_loops.cpp +++ b/src/common/snippets/src/lowered/pass/mark_loops.cpp @@ -29,7 +29,8 @@ bool MarkLoops::run(LinearIR& linear_ir) { auto is_not_start_point = [](const std::shared_ptr& node) { return ov::is_type(node) || ov::is_type(node) || - ov::is_type(node); + ov::is_type(node) || + ov::is_type(node); }; auto are_conflicted = [](const ExpressionPort& lhs, const ExpressionPort& rhs) { diff --git a/src/common/snippets/src/lowered/pass/optimize_domain.cpp b/src/common/snippets/src/lowered/pass/optimize_domain.cpp index 09061e63250464..f2d2fd43baf96c 100644 --- a/src/common/snippets/src/lowered/pass/optimize_domain.cpp +++ b/src/common/snippets/src/lowered/pass/optimize_domain.cpp @@ -8,6 +8,7 @@ #include "snippets/lowered/linear_ir.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/shape_inference/shape_inference.hpp" +#include "snippets/utils.hpp" namespace ov { @@ -79,18 +80,28 @@ bool OptimizeDomain::run(snippets::lowered::LinearIR& linear_ir) { return false; } OPENVINO_ASSERT(config.m_min_parallel_work_amount != 0, "OptimizeDomain: Min parallel work amount can't equal to zero"); - std::vector> input_exprs; std::vector input_shapes; VectorDims master_shape = linear_ir.get_master_shape(); - for (const auto& expr : linear_ir.get_IO_ops()) { - if (expr->get_type() == snippets::lowered::IOExpression::io_type::INPUT) { - input_exprs.push_back(expr); - const auto& shape = expr->get_output_port_descriptor(0)->get_shape(); + bool blocked_input_shapes = false; + for (const auto& io_expr : linear_ir.get_IO_ops()) { + if (io_expr->get_type() == snippets::lowered::IOExpression::io_type::INPUT) { + auto consumer_inputs = io_expr->get_output_port_connector(0)->get_consumers(); + const auto& first_consumer = consumer_inputs.begin()->get_expr(); + if (auto rank_norm = as_type_ptr(first_consumer->get_node())) { + // If RankNormalization appends dims, then the appended dims will be broadcasted + // so collapsing is not allowed. We may increment tile rank though. + if (rank_norm->get_num_append() != 0) + blocked_input_shapes = true; + // If RankNormalization prepends dims, then the dims should be ignored during domain optimization + // to avoid passing already incremented shapes to linear_ir.shape_infer() + } + const ExpressionPtr& shape_producing_expr = blocked_input_shapes ? + first_consumer : + io_expr; + const auto& shape = utils::get_planar_vdims(shape_producing_expr->get_output_port_descriptor(0)); OPENVINO_ASSERT(std::none_of(shape.begin(), shape.end(), [](size_t d) {return d == snippets::IShapeInferSnippets::DYNAMIC_DIMENSION; }), "OptimizeDomain pass does not support dynamic shapes"); - OPENVINO_ASSERT(ov::snippets::broadcast_merge_into(master_shape, shape), - "Failed to merge input shapes in OptimizeDomain pass"); input_shapes.emplace_back(shape); } } @@ -98,7 +109,9 @@ bool OptimizeDomain::run(snippets::lowered::LinearIR& linear_ir) { master_shape.end(), (size_t)1, std::multiplies()); - const auto num_dims_collapsed = optimize(input_shapes, + const auto num_dims_collapsed = blocked_input_shapes ? + 0 : + optimize(input_shapes, master_shape, total_work_amount, config.m_min_parallel_work_amount, diff --git a/src/common/snippets/src/lowered/pass/propagate_layout.cpp b/src/common/snippets/src/lowered/pass/propagate_layout.cpp index 7b69c82777d90e..aea3cf99858622 100644 --- a/src/common/snippets/src/lowered/pass/propagate_layout.cpp +++ b/src/common/snippets/src/lowered/pass/propagate_layout.cpp @@ -19,23 +19,25 @@ bool PropagateLayout::run(LinearIR& linear_ir) { if (linear_ir.empty()) return false; - for (auto expr_it = linear_ir.begin(); expr_it != linear_ir.end(); expr_it++) { - const auto& expr = *expr_it; + for (const auto& expr : linear_ir) { const auto io_expr = std::dynamic_pointer_cast(expr); if (!io_expr) continue; const bool is_input = io_expr->get_type() == IOExpression::io_type::INPUT; const auto& connectors = is_input ? expr->get_output_port_connectors() : expr->get_input_port_connectors(); - if (connectors.size() != 1) - OPENVINO_THROW("Parameter/Results should have exactly one output/input"); + OPENVINO_ASSERT(connectors.size() == 1, "Parameter/Results should have exactly one output/input"); // If input - we should be looking downstream, if output - upstream const auto& target_connector = connectors.front(); if (is_input) { - const auto consumer_inputs = target_connector->get_consumers(); // Note that here we consider only the first child (which is usually load), // but often there is another child - LoopEnd + auto consumer_inputs = target_connector->get_consumers(); + const auto& first_consumer = consumer_inputs.begin()->get_expr(); + // If there is a RankNormalization op after a parameter - we should skip it + if (is_type(first_consumer->get_node())) + consumer_inputs = first_consumer->get_output_port_connector(0)->get_consumers(); std::set> child_layouts; for (const auto& child_input : consumer_inputs) { const auto& child = child_input.get_expr(); diff --git a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp index e868d75e5dd5ea..b434e0f974beb3 100644 --- a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp +++ b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp @@ -44,13 +44,15 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { // Float constant values in byte representation const auto float_min_constant = uint32_t(0xff7fffff); const auto zero_constant = uint32_t(0x00000000); - + const bool is_dynamic = softmax->is_dynamic(); // We need an iterator to the inserted element - auto push_node = [&linear_ir, &expr_it](const std::shared_ptr& n) { + auto push_node = [&linear_ir, &expr_it, is_dynamic](const std::shared_ptr& n) { const auto expr = linear_ir.insert(expr_it, n); + if (is_dynamic) + expr->get()->updateShapes(); return std::make_pair(expr, n); }; - + const ov::PartialShape broadcasted_shape(softmax_expr->get_input_port_descriptor(0)->get_shape()); // Note: VectorBuffer is a special case, since it should go before the initial Load. So we handle it separately const auto& vector_buffer_max = push_node(std::make_shared()); // Init value of vector buffer for ReduceMax is -FLOAT_MIN. @@ -65,9 +67,8 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { std::vector{(*max.first)->get_input_port(0), (*max.first)->get_input_port(1)}, std::vector{(*max.first)->get_output_port(0)}); - const auto broadcast_horizon_max = push_node( - std::make_shared(horizon_max.second, horizon_max.second->get_input_partial_shape(0))); + std::make_shared(horizon_max.second, broadcasted_shape)); const auto vector_buffer_sum = push_node(std::make_shared()); // Init value of vector buffer for ReduceSum is zero. const auto fill_sum = push_node(std::make_shared(vector_buffer_sum.second, 0, zero_constant)); @@ -89,7 +90,7 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { // Divide is expensive operation, so we decompose it into 1 / x * y, where 1 / x is executed outside loop const auto pow = push_node(std::make_shared(horizon_sum.second, -1.f)); - const auto broadcast_pow = push_node(std::make_shared(pow.second, horizon_sum.second->get_input_partial_shape(0))); + const auto broadcast_pow = push_node(std::make_shared(pow.second, broadcasted_shape)); // Mul (pseudo-Divide loop) const auto mul = push_node(std::make_shared(exp.second, broadcast_pow.second)); diff --git a/src/common/snippets/src/lowered/pass/validate_shapes.cpp b/src/common/snippets/src/lowered/pass/validate_shapes.cpp new file mode 100644 index 00000000000000..8d12004313e0bf --- /dev/null +++ b/src/common/snippets/src/lowered/pass/validate_shapes.cpp @@ -0,0 +1,48 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/lowered/pass/validate_shapes.hpp" + +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/shape_inference/shape_inference.hpp" +#include "snippets/itt.hpp" + +namespace ov { +namespace snippets { +namespace lowered { +namespace pass { + +bool ValidateShapes::run(LinearIR& linear_ir) { + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::ValidateShapes") + + for (const auto& expr : linear_ir) { + const auto num_inputs = expr->get_input_count(); + const auto& port_connectors = expr->get_input_port_connectors(); + const auto& port_descriptors = expr->get_input_port_descriptors(); + OPENVINO_ASSERT(port_connectors.size() == num_inputs, "Invalid number of port connectors detected"); + OPENVINO_ASSERT(port_descriptors.size() == num_inputs, "Invalid number of port descriptors detected"); + for (size_t i = 0; i < num_inputs; i++) { + const auto& descr = port_descriptors[i]; + const auto& layout = descr->get_layout(); + const auto& shape = descr->get_shape(); + const auto& n = expr->get_node(); + OPENVINO_ASSERT(std::none_of(shape.begin(), shape.end(), + [](size_t d) {return d == IShapeInferSnippets::DYNAMIC_DIMENSION;}), + "Dynamic dimensions are not allowed at this point of pipeline. ", + "Check the expr for node ", n->get_friendly_name()); + OPENVINO_ASSERT(layout.size() == shape.size(), "Layout and shape sizes must match. ", + "Check the expr for node ", n->get_friendly_name()); + const auto& parent_desc = port_connectors[i]->get_source().get_descriptor_ptr(); + const auto& parent_shape = parent_desc->get_shape(); + OPENVINO_ASSERT(parent_shape == shape, "Parent shape must be equal to the expression shape. ", + "Check the expr for node ", n->get_friendly_name()); + } + } + return false; +} + +} // namespace pass +} // namespace lowered +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/src/lowered/port_descriptor.cpp b/src/common/snippets/src/lowered/port_descriptor.cpp index 96e8c718cc972b..e8c4bdd0626b47 100644 --- a/src/common/snippets/src/lowered/port_descriptor.cpp +++ b/src/common/snippets/src/lowered/port_descriptor.cpp @@ -3,6 +3,7 @@ // #include "snippets/lowered/port_descriptor.hpp" +#include namespace ov { namespace snippets { @@ -12,13 +13,15 @@ size_t PortDescriptor::ServiceDimensions::FULL_DIM = SIZE_MAX; PortDescriptor::PortDescriptor(const ov::Input& in, VectorDims subtensor_shape, std::vector layout) : PortDescriptor(ov::Input(in.get_node(), in.get_index()), std::move(subtensor_shape), std::move(layout)) {} -PortDescriptor::PortDescriptor(const ov::Input& in, VectorDims subtensor_shape, std::vector layout) - : PortDescriptor(in.get_shape(), std::move(subtensor_shape), std::move(layout)) {} + +PortDescriptor::PortDescriptor(const ov::Input& in, std::vector subtensor_shape, std::vector layout) + : PortDescriptor(utils::pshape_to_vdims(in.get_partial_shape()), std::move(subtensor_shape), std::move(layout)) {} PortDescriptor::PortDescriptor(const ov::Output& out, VectorDims subtensor_shape, std::vector layout) : PortDescriptor(ov::Output(out.get_node(), out.get_index()), std::move(subtensor_shape), std::move(layout)) {} -PortDescriptor::PortDescriptor(const ov::Output& out, VectorDims subtensor_shape, std::vector layout) - : PortDescriptor(out.get_shape(), std::move(subtensor_shape), std::move(layout)) {} + +PortDescriptor::PortDescriptor(const ov::Output& out, std::vector subtensor_shape, std::vector layout) + : PortDescriptor(utils::pshape_to_vdims(out.get_partial_shape()), std::move(subtensor_shape), std::move(layout)) {} PortDescriptor::PortDescriptor(VectorDims shape, VectorDims subtensor_shape, std::vector layout) : m_tensor_shape(std::move(shape)), m_layout(std::move(layout)), m_subtensor_shape(std::move(subtensor_shape)) { @@ -30,13 +33,12 @@ void PortDescriptor::validate_arguments() { m_layout.resize(m_tensor_shape.size()); // NCHW layout by default std::iota(m_layout.begin(), m_layout.end(), 0); - } else if (m_layout.size() != m_tensor_shape.size()) { - OPENVINO_THROW("Snippets tensor descriptor: Layout size must be equal to the shape size"); } + OPENVINO_ASSERT(m_layout.size() == m_tensor_shape.size(), "Snippets tensor descriptor: Layout size must be equal to the shape size"); } PortDescriptorPtr PortDescriptor::clone() const { - const auto desc = std::make_shared(m_tensor_shape, m_subtensor_shape, m_layout); + auto desc = std::make_shared(m_tensor_shape, m_subtensor_shape, m_layout); desc->set_reg(m_reg); return desc; } diff --git a/src/common/snippets/src/op/buffer.cpp b/src/common/snippets/src/op/buffer.cpp index 8b703fa0c29a16..615979ec5e3281 100644 --- a/src/common/snippets/src/op/buffer.cpp +++ b/src/common/snippets/src/op/buffer.cpp @@ -46,15 +46,13 @@ bool Buffer::visit_attributes(AttributeVisitor& visitor) { void Buffer::validate_and_infer_types() { INTERNAL_OP_SCOPE(Buffer_validate_and_infer_types); - ov::Shape output_shape; + ov::PartialShape output_shape; if (m_type == Type::NewMemory) { OPENVINO_ASSERT(get_input_size() == 0, "Buffer with new allocated memory must to not have arguments!"); output_shape = m_shape; } else if (m_type == Type::IntermediateMemory) { - const auto& input_shape = get_input_partial_shape(0); - OPENVINO_ASSERT(input_shape.is_static(), "Buffer supports only static input shape"); m_element_type = get_input_element_type(0); - output_shape = input_shape.get_shape(); + output_shape = get_input_partial_shape(0); } else { OPENVINO_THROW("Buffer supports only the following types: NewMemory and IntermediateMemory"); } diff --git a/src/common/snippets/src/op/rank_normalization.cpp b/src/common/snippets/src/op/rank_normalization.cpp new file mode 100644 index 00000000000000..5dfd46492a1946 --- /dev/null +++ b/src/common/snippets/src/op/rank_normalization.cpp @@ -0,0 +1,57 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/op/rank_normalization.hpp" +#include "snippets/utils.hpp" + +namespace ov { +namespace snippets { +namespace op { + +RankNormalization::RankNormalization(const Output& data, size_t num_prepend, size_t num_append) : + Op({data}), m_num_prepend(num_prepend), m_num_append(num_append) { + constructor_validate_and_infer_types(); +} + + +std::shared_ptr RankNormalization::clone_with_new_inputs(const OutputVector& new_args) const { + check_new_args_count(this, new_args); + return std::make_shared(new_args[0], m_num_prepend, m_num_append); +} + +void RankNormalization::validate_and_infer_types() { + auto new_shape = get_input_partial_shape(0); + // Note: other values are not allowed, only planar + blocked layout combination can be normalized. + NODE_VALIDATION_CHECK(this, utils::one_of(m_num_append, 0lu, 1lu), + "num_append could be only 0 or 1, other values are not allowed."); + new_shape.insert(new_shape.begin(), m_num_prepend, Dimension(1)); + new_shape.insert(new_shape.end(), m_num_append, Dimension(1)); + set_output_type(0, get_input_element_type(0), new_shape); +} + +bool RankNormalization::visit_attributes(AttributeVisitor& visitor) { + visitor.on_attribute("num_prepend", m_num_prepend); + visitor.on_attribute("num_append", m_num_append); + return true; +} + +RankNormalization::ShapeInfer::ShapeInfer(const std::shared_ptr& n) { + const auto& rank_norm = as_type_ptr(n); + OPENVINO_ASSERT(rank_norm, "Invalid operation passed to RankNormalization::ShapeInfer: ", n->get_type_info().name); + m_num_append = rank_norm->m_num_append; + m_num_prepend = rank_norm->m_num_prepend; +} + +IShapeInferSnippets::Result +RankNormalization::ShapeInfer::infer(const std::vector& input_shapes) { + OPENVINO_ASSERT(input_shapes.size() == 1, "Invalid number of input shapes passed to RankNormalization::ShapeInfer::infer"); + VectorDims out_shape = input_shapes[0].get(); + out_shape.insert(out_shape.begin(), m_num_prepend, 1); + out_shape.insert(out_shape.end(), m_num_append, 1); + return {{out_shape}, ShapeInferStatus::success}; +} + +} // namespace op +} // namespace snippets +} // namespace ov \ No newline at end of file diff --git a/src/common/snippets/src/op/scalar.cpp b/src/common/snippets/src/op/scalar.cpp index 029a2e613f28d2..4efd1716a6fb94 100644 --- a/src/common/snippets/src/op/scalar.cpp +++ b/src/common/snippets/src/op/scalar.cpp @@ -4,14 +4,17 @@ #include "snippets/op/scalar.hpp" +namespace ov { +namespace snippets { +namespace op { -std::shared_ptr ov::snippets::op::Scalar::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Scalar::clone_with_new_inputs(const OutputVector& new_args) const { check_new_args_count(this, new_args); return std::make_shared(*this); } // Scalar currently supports only one-element constants, this could be changed in the future -void ov::snippets::op::Scalar::validate_and_infer_types() { +void Scalar::validate_and_infer_types() { Constant::validate_and_infer_types(); auto out_pshape = get_output_partial_shape(0); NODE_VALIDATION_CHECK(this, out_pshape.is_static(), "Scalar supports only static input shapes"); @@ -20,7 +23,7 @@ void ov::snippets::op::Scalar::validate_and_infer_types() { " shape"); } -bool ov::snippets::op::Scalar::visit_attributes(AttributeVisitor& visitor) { +bool Scalar::visit_attributes(AttributeVisitor& visitor) { auto shape = get_output_shape(0); auto type = get_output_element_type(0); auto value = cast_vector(); @@ -29,3 +32,7 @@ bool ov::snippets::op::Scalar::visit_attributes(AttributeVisitor& visitor) { visitor.on_attribute("value", value); return true; } + +} // namespace op +} // namespace snippets +} // namespace ov \ No newline at end of file diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index dc13bb3e8bb716..fccecfa8ab5f32 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -6,9 +6,7 @@ #include "snippets/remarks.hpp" #include "snippets/op/subgraph.hpp" -#include "snippets/op/convert_saturation.hpp" -#include "snippets/pass/insert_movebroadcast.hpp" #include "snippets/pass/broadcast_to_movebroadcast.hpp" #include "snippets/pass/propagate_precision.hpp" #include "snippets/pass/convert_constants.hpp" @@ -17,6 +15,9 @@ #include "snippets/pass/matmul_to_brgemm.hpp" #include "snippets/pass/fuse_transpose_brgemm.hpp" #include "snippets/pass/set_softmax_ports.hpp" +#include "snippets/pass/canonicalization.hpp" +#include "snippets/pass/align_element_types.hpp" +#include "snippets/lowered/pass/validate_shapes.hpp" #include "snippets/utils.hpp" @@ -29,6 +30,7 @@ #include "snippets/lowered/pass/init_loops.hpp" #include "snippets/lowered/pass/insert_buffers.hpp" #include "snippets/lowered/pass/insert_load_store.hpp" +#include "snippets/lowered/pass/insert_broadcastmove.hpp" #include "snippets/lowered/pass/load_movebroadcast_to_broadcastload.hpp" #include "snippets/lowered/pass/allocate_buffers.hpp" #include "snippets/lowered/pass/propagate_layout.hpp" @@ -61,7 +63,7 @@ namespace snippets { namespace op { void Subgraph::set_generator(std::shared_ptr generator) { - m_generator = generator; + m_generator = std::move(generator); } void Subgraph::set_virtual_port_count(const size_t count) { @@ -171,36 +173,6 @@ std::shared_ptr Subgraph::clone_with_new_inputs(const OutputVector& inputs return make_shared(inputs, body().clone()); } -std::vector Subgraph::reshape_body(const std::vector& input_shapes) { - auto& params = body_ptr()->get_parameters(); - OPENVINO_ASSERT(params.size() == input_shapes.size(), "Got invalid number of input shapes to reshape subgraph body"); - for (size_t i = 0; i < params.size(); ++i) { - params[i]->set_partial_shape(input_shapes[i]); - } - body_ptr()->validate_nodes_and_infer_types(); - std::vector output_shapes; - for (const auto& res : body_ptr()->get_results()) { - output_shapes.emplace_back(res->get_input_partial_shape(0)); - } - return output_shapes; -} - -std::vector Subgraph::reshape_body(const std::vector& input_shapes) { - auto& params = body_ptr()->get_parameters(); - OPENVINO_ASSERT(params.size() == input_shapes.size(), "Got invalid number of input shapes to reshape subgraph body"); - for (size_t i = 0; i < params.size(); ++i) { - params[i]->set_partial_shape(input_shapes[i]); - } - body_ptr()->validate_nodes_and_infer_types(); - std::vector output_shapes; - for (const auto& res : body_ptr()->get_results()) { - auto pshape = res->get_input_partial_shape(0); - OPENVINO_ASSERT(pshape.is_static(), "Subgraph inferred dynamic output shape during reshape with static inputs"); - output_shapes.emplace_back(res->get_input_partial_shape(0).get_shape()); - } - return output_shapes; -} - void Subgraph::validate_and_infer_types() { INTERNAL_OP_SCOPE(Subgraph); OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::validate_and_infer_types") @@ -311,166 +283,6 @@ auto Subgraph::constant_input_should_be_inside_body(const std::shared_ptr(node); } -/// -/// \brief Canonization transforms original subgraph and to canonical form suitable for code generation. In particular, -/// it handles supported layout conversions, broadcasts inputs and outputs to a single rank and layout. Canonicalization -/// returns master-shape (max rank + max dimensions over all outputs) that can be used for scheduling. -/// Canonicalization currently supports only the following layout conversions: -/// * None: all inputs have the same layout -/// * Planar + blocked: some inputs have blocked, and some have planar layouts, e.g. + -/// Also there is precision aligning inside body of subgraph during canonicalization -ov::PartialShape snippets::op::Subgraph::canonicalize(const BlockedShapeVector& outputShapes, - const BlockedShapeVector& inputShapes) { - INTERNAL_OP_SCOPE(Subgraph); - OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::canonicalize") - NODE_VALIDATION_CHECK(this, inputShapes.size() == body_ptr()->get_parameters().size(), - "Number of parameters for snippet doesn't match passed to generate method: ", - inputShapes.size(), " vs ", body_ptr()->get_parameters().size(), "."); - - NODE_VALIDATION_CHECK(this, outputShapes.size() == body_ptr()->get_results().size(), - "number of results for snippet doesn't match passed to generate method: ", - outputShapes.size(), " vs ", body_ptr()->get_results().size(), "."); - - auto getMaxRankBlockedShape = [](const BlockedShapeVector& blockedShapes) -> const BlockedShape& { - return *std::max_element(blockedShapes.begin(), blockedShapes.end(), - [&](const BlockedShape& lhs, const BlockedShape& rhs) { - return std::get<0>(lhs).size() < std::get<0>(rhs).size(); - }); - }; - PartialShape baseShape; - AxisVector baseOrder; - std::tie(baseShape, baseOrder, std::ignore) = getMaxRankBlockedShape(inputShapes); - maxInputRank = baseShape.size(); - appendOnesForCanonical.resize(inputShapes.size(), 0); - const bool baseIsBlocked = baseOrder.size() != std::set(baseOrder.begin(), baseOrder.end()).size(); - for (size_t i = 0; i < inputShapes.size(); i++) { - const auto& blockedShape = inputShapes[i]; - PartialShape inShape; - AxisVector inOrder; - element::Type inType; - std::tie(inShape, inOrder, inType) = blockedShape; - const auto inRank = inShape.size(); - NODE_VALIDATION_CHECK(this, inRank <= maxInputRank, "Input rank can't be larger than output rank in snippets."); - if (inRank < maxInputRank) { - appendOnesForCanonical[i] = maxInputRank - inRank; - PartialShape newShape(ov::Shape(maxInputRank, 1)); - // todo: more complicated logics is needed if we want to merge smth else than blocked and planar - if (baseIsBlocked) { - const bool inIsNotBlocked = inOrder.size() == std::set(inOrder.begin(), inOrder.end()).size(); - NODE_VALIDATION_CHECK(this, inIsNotBlocked, "Snippets don't support conversion between blocked layouts of different ranks"); - inShape.insert(inShape.end(), ov::Dimension(1)); - appendOnesForCanonical[i]--; - } - NODE_VALIDATION_CHECK(this, PartialShape::broadcast_merge_into(newShape, inShape, ov::op::AutoBroadcastType::NUMPY), - "Failed to broadcast_merge inputs in snippets canonicalization"); - inShape = std::move(newShape); - } else { - // todo: 4d blocked + 5d planar layouts are not supported: + - NODE_VALIDATION_CHECK(this, - equal(baseOrder.begin(), baseOrder.end(), inOrder.begin()), - "Snippets canonicalization got input shapes of equal ranks but different layouts, which is not supported"); - } - ov::PartialShape tmpPShape(baseShape); - // todo: we need to generalize canonicalization for domain-sensitive ops. E.g. MatMul inputs can't be broadcasted one to another - if (!config.m_has_domain_sensitive_ops) - NODE_VALIDATION_CHECK(this, - PartialShape::broadcast_merge_into(tmpPShape, inShape, ::ov::op::AutoBroadcastType::NUMPY), - "Failed to create broadcastable shapes in snippets canonicalization"); - const auto paramShape = body_ptr()->get_parameters()[i]->get_partial_shape(); - const auto paramType = body_ptr()->get_parameters()[i]->get_element_type(); - if (paramShape.size() != inShape.size() || !equal(paramShape.begin(), paramShape.end(), inShape.begin())) - body_ptr()->replace_parameter(i, std::make_shared(paramType, inShape)); - } - body_ptr()->validate_nodes_and_infer_types(); - - auto skipStartEndOnes = [](const PartialShape& shape) { - auto begin = shape.begin(); - auto end = shape.end(); - while (begin != end && *begin == 1) - begin++; - while (begin != end && *(end - 1) == 1) - end--; - - PartialShape trimmedShape(std::vector(end - begin, 1)); - std::copy(begin, end, trimmedShape.begin()); - return trimmedShape; - }; - - // Check that output shapes are broadcastable => can be scheduled - const auto& body_results = body_ptr()->get_results(); - PartialShape outPShape = body_results[0]->get_input_partial_shape(0); - // todo: we need a slightly more general approach for backward ROI propagation - const auto& result_parent = body_results[0]->get_input_node_shared_ptr(0); - if (body_results.size() == 1 && - ov::is_type(result_parent) && - ov::is_type(result_parent->get_input_node_shared_ptr(0))) { - outPShape = result_parent->get_input_partial_shape(0); - } else { - for (size_t i = 0; i < body_results.size(); i++) { - auto shape_i = body_results[i]->get_input_partial_shape(0); - auto outputShape_i = std::get<0>(outputShapes[i]); - // Check that the produced output shape corresponds to the passed shape - // Some produced shapes may have been changed to be broadcastable (e.g. blocked + planar outputs), - // so we need to remove leading and trailing "1" before the comparison - PartialShape pShape_i(skipStartEndOnes(shape_i)); - bool compatibleWithPassedShape = PartialShape::broadcast_merge_into(pShape_i, - skipStartEndOnes(outputShape_i), - ::ov::op::AutoBroadcastType::NUMPY); - NODE_VALIDATION_CHECK(this, compatibleWithPassedShape, - "Inferred and passed results shapes are incompatible for snippet "); - // Check that output shapes are broadcastable to each other => can be scheduled - bool compatibleWithOtherOutputs = PartialShape::broadcast_merge_into(outPShape, shape_i, - ::ov::op::AutoBroadcastType::NUMPY); - NODE_VALIDATION_CHECK(this, compatibleWithOtherOutputs, - "Snippets output shapes must be numpy broadcastable"); - } - } - - // We should insert Converts after Parameters and Constant and before Results - // to align precision inside Subgraph body that is supported by Plugin - align_element_types(outputShapes, inputShapes); - - master_shape = outPShape; - return master_shape; -} - -ov::PartialShape snippets::op::Subgraph::canonicalized_body_shape_infer(const BlockedShapeVector& inputShapes) { - std::vector normInputShapes; - for (size_t i = 0; i < inputShapes.size(); i++) { - PartialShape inShape = std::get<0>(inputShapes[i]); - const auto inRank = inShape.size(); - if (inRank < maxInputRank) { - PartialShape newShape(ov::Shape(maxInputRank, 1)); - for (size_t ir = 0; ir < inRank; ir++) { - newShape[appendOnesForCanonical[i] + ir] = inShape[ir]; - } - normInputShapes.push_back(newShape.get_shape()); - } else { - normInputShapes.push_back(inShape.get_shape()); - } - } - reshape_body(normInputShapes); - - const auto& body_results = body_ptr()->get_results(); - PartialShape outPShape = body_results[0]->get_input_partial_shape(0); - const auto& result_parent = body_results[0]->get_input_node_shared_ptr(0); - if (body_results.size() == 1 && - ov::is_type(result_parent) && - ov::is_type(result_parent->get_input_node_shared_ptr(0))) { - outPShape = result_parent->get_input_partial_shape(0); - } else { - for (size_t i = 0; i < body_results.size(); i++) { - auto shape_i = body_results[i]->get_input_partial_shape(0); - bool compatibleWithOtherOutputs = PartialShape::broadcast_merge_into(outPShape, shape_i, - ::ov::op::AutoBroadcastType::NUMPY); - NODE_VALIDATION_CHECK(this, compatibleWithOtherOutputs, - "Snippets output shapes must be numpy broadcastable"); - } - } - master_shape = outPShape; - return master_shape; -} - bool Subgraph::check_broadcast(const std::shared_ptr& node) noexcept { const auto elementwise = std::dynamic_pointer_cast(node); return @@ -503,8 +315,40 @@ IShapeInferSnippets::Result Subgraph::OVShapeInfer::infer(const std::vector output_dims; + if (is_dynamic()) { + // Note that in case of dynamic implementation shapeInfer() is called before PrepareParams, + // so there must be last_result available + // In principle, we can instantiate shape_infer here, but it's not an intended pipeline behavior. + OPENVINO_ASSERT(m_shape_infer, "Can't calculate master_shape when shapeInfer is not initialized"); + output_dims = m_shape_infer->get_last_result().dims; + OPENVINO_ASSERT(!output_dims.empty(), "Can't calculate master_shape before the first shape inference"); + } else { + for (const auto& res : body_ptr()->get_results()) { + const auto& res_input = res->input(0); + OPENVINO_ASSERT(res_input.get_partial_shape().is_static(), "Result have dynamic shape in static pipeline"); + // We need to account to the shape's layout stored in Output rt_info + const auto& planar_shape = utils::get_planar_pshape(res_input.get_source_output()); + output_dims.emplace_back(planar_shape.get_shape()); + } + } + + if (output_dims.size() == 1) + return output_dims.front(); + + const auto& default_broadcasting = std::make_shared(); + // Note: we have to convert vector to vector> + // because of shape inference interface + std::vector> inputs; + inputs.reserve(output_dims.size()); + for (const auto& d : output_dims) + inputs.emplace_back(d); + return default_broadcasting->infer(inputs).dims.front(); +} + std::shared_ptr -Subgraph::convert_body_to_linear_ir(const std::shared_ptr& shape_infer_factory) const { +Subgraph::convert_body_to_linear_ir(const std::shared_ptr& shape_infer_factory) { lowered::Config lowering_config; lowering_config.m_save_expressions = config.m_has_domain_sensitive_ops; lowering_config.m_need_fill_tail_register = config.m_has_domain_sensitive_ops; @@ -513,89 +357,44 @@ Subgraph::convert_body_to_linear_ir(const std::shared_ptr(body_ptr(), shape_infer_factory, lowering_config); + m_linear_ir = std::make_shared(body_ptr(), shape_infer_factory, lowering_config); + m_shape_infer = m_linear_ir->get_shape_infer_instance(); + return m_linear_ir; } -void Subgraph::align_element_types(const BlockedShapeVector& outputShapes, - const BlockedShapeVector& inputShapes) { - // We should insert Convert before Results to set original output element type if needed - const auto& body_results = body_ptr()->get_results(); - for (size_t i = 0; i < outputShapes.size(); i++) { - const auto needed_out_type = std::get<2>(outputShapes[i]); - if (body_results[i]->get_input_element_type(0) != needed_out_type) { - auto parent_output = body_results[i]->get_input_source_output(0); - std::shared_ptr consumer = body_results[i]; - - // Snippets supports Transpose only after Parameter or before Result nodes - // So we have to insert Convert before Transpose (if there is) on Subgraph outputs - const auto transpose = ov::as_type_ptr(parent_output.get_node_shared_ptr()); - if (transpose) { - OPENVINO_ASSERT(parent_output.get_target_inputs().size() == 1, - "If Result has Transpose on input, this Result must be single consumer of the Transpose"); - parent_output = transpose->get_input_source_output(0); - consumer = transpose; - } - - const auto convert = std::make_shared(parent_output, needed_out_type); - ov::copy_runtime_info(parent_output.get_node_shared_ptr(), convert); - - consumer->set_argument(0, convert); - consumer->validate_and_infer_types(); - if (consumer != body_results[i]) - body_results[i]->validate_and_infer_types(); - } - } - - // We should change existing element type to original for Parameters if needed - const auto& parameters = body_ptr()->get_parameters(); - for (size_t i = 0; i < inputShapes.size(); ++i) { - const auto needed_in_type = std::get<2>(inputShapes[i]); - const auto& parameter = parameters[i]; - const auto original_type = parameter->get_element_type(); - if (original_type != needed_in_type) { - parameter->set_element_type(needed_in_type); - parameter->validate_and_infer_types(); - - auto parent_output = parameter->output(0); - auto consumer_inputs = parent_output.get_target_inputs(); - - // Snippets supports Transpose only after Parameter or before Result nodes - // So we have to insert Convert after Transpose (if there is) on Subgraph inputs - if (std::any_of(consumer_inputs.cbegin(), consumer_inputs.cend(), - [](const ov::Input& input) { return ov::is_type(input.get_node()); })) { - OPENVINO_ASSERT(consumer_inputs.size() == 1, - "If Parameter has Transpose on output, this Transpose must be single consumer of the Parameter"); - const auto transpose = consumer_inputs.begin()->get_node()->shared_from_this(); - transpose->validate_and_infer_types(); - - parent_output = transpose; - consumer_inputs = parent_output.get_target_inputs(); - } - - const auto convert = std::make_shared(parent_output, original_type); - ov::copy_runtime_info(parent_output.get_node_shared_ptr(), convert); - - for (const auto input : consumer_inputs) { - const auto& input_node = input.get_node(); - if (input_node == convert.get()) { - continue; - } - input_node->set_argument(input.get_index(), convert->output(0)); - } - } +std::shared_ptr Subgraph::clone() const { + ov::OutputVector subgraph_node_inputs; + for (const auto &input : input_values()) { + auto new_input = std::make_shared(input.get_element_type(), input.get_partial_shape()); + subgraph_node_inputs.push_back(new_input); } -} - -void Subgraph::data_flow_transformations(const std::vector& backend_passes) { + std::shared_ptr new_body = body_ptr()->clone(); + auto result = std::make_shared(subgraph_node_inputs, new_body); + // Note: ov::copy_runtime_info accepts only shared_ptr as "from" but never modifies it, + // so we have to cast away constness to copy runtime info + ov::copy_runtime_info(const_pointer_cast(shared_from_this()), result); + result->set_friendly_name(get_friendly_name()); + if (m_linear_ir) + result->m_linear_ir = std::make_shared(m_linear_ir->deep_copy()); + // Note: we don't update shapeInfer here, since it's initialized in the constructor + if (m_generator) + result->m_generator = m_generator->clone(); + return result; +} + +void Subgraph::data_flow_transformations(const BlockedShapeVector& blocked_input_shapes, + const std::vector& input_precisions, + const std::vector& output_precisions, + const std::vector& backend_passes) { INTERNAL_OP_SCOPE(Subgraph); OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::data_flow_transformations") - const auto& params = body_ptr()->get_parameters(); - bool inputs_has_dynamic_last_dims = std::any_of(params.begin(), params.end(), - [](const shared_ptr& p) { - return p->get_partial_shape().rbegin()->is_dynamic(); - }); - snippets::pass::Manager manager; + ov::snippets::pass::Manager manager; + if (!blocked_input_shapes.empty()) + manager.register_pass(blocked_input_shapes); + if (!input_precisions.empty() && !output_precisions.empty()) + manager.register_pass(input_precisions, output_precisions); + if (config.m_has_domain_sensitive_ops) { manager.register_pass(); manager.register_pass(); @@ -605,14 +404,6 @@ void Subgraph::data_flow_transformations(const std::vector(); manager.register_pass(); manager.register_pass(); - // todo: presently dynamic pipeline is activated even if the last two dimension are static - // In general, we can use static kernels in this case, but several parameters (src and dst memory pointers for example) - // should be passed as run-time args, so it's a mixed mode: kernel is shape-aware, but some additional runtime args are required - // Presently Broadcasting is organized in the following way: - // * ALL last dims are static => broadcasting is handled via MoveBroadcast and pointer arithmetics (even for dynamic upper dims) - if (!inputs_has_dynamic_last_dims) { - manager.register_pass(); - } manager.register_pass(m_generator->get_target_machine()); manager.register_pass(); @@ -623,8 +414,9 @@ void Subgraph::data_flow_transformations(const std::vector(buffer_allocation_rank); common_pipeline.register_pass(vector_size); common_pipeline.register_pass(); + common_pipeline.register_pass(); common_pipeline.register_pass(); + + common_pipeline.register_pass(); + common_pipeline.register_pass(); common_pipeline.register_pass(); common_pipeline.register_pass(); @@ -669,57 +465,44 @@ void Subgraph::control_flow_transformations(lowered::LinearIR& linear_ir, final_pipeline.register_pass(); final_pipeline.run(linear_ir); - m_buffer_scratchpad = buffer_allocation_pass->get_scratchpad_size(); + lowering_result.buffer_scratchpad_size = buffer_allocation_pass->get_scratchpad_size(); } -snippets::Schedule Subgraph::generate(const BlockedShapeVector& output_shapes, - const BlockedShapeVector& input_shapes, +snippets::Schedule Subgraph::generate(const BlockedShapeVector& blocked_input_shapes, + const std::vector& input_precisions, + const std::vector& output_precisions, + const std::vector& data_flow_backend_passes, + const lowered::pass::PassPipeline& backend_passes_pre_common, + const lowered::pass::PassPipeline& backend_passes_post_common, + const std::shared_ptr& factory, const void* compile_params) { - canonicalize(output_shapes, input_shapes); - return generate(compile_params); + data_flow_transformations(blocked_input_shapes, input_precisions, output_precisions, data_flow_backend_passes); + convert_body_to_linear_ir(factory); + return generate_from_linear_ir(backend_passes_pre_common, backend_passes_post_common, compile_params); } -snippets::Schedule Subgraph::generate(const BlockedShapeVector& output_shapes, - const BlockedShapeVector& input_shapes, - const std::vector& data_flow_passes, - const lowered::pass::PassPipeline& control_flow_passes_pre_common, - const lowered::pass::PassPipeline& control_flow_passes_post_common, - const std::shared_ptr& shape_infer_factory, - const void* compile_params) { - canonicalize(output_shapes, input_shapes); - return generate(data_flow_passes, control_flow_passes_pre_common, control_flow_passes_post_common, - shape_infer_factory, compile_params); -} - -snippets::Schedule Subgraph::generate(const void* compile_params) { - return generate({}, {}, {}, nullptr, compile_params); -} - -snippets::Schedule Subgraph::generate(const std::vector& data_flow_passes, - const lowered::pass::PassPipeline& control_flow_passes_pre_common, - const lowered::pass::PassPipeline& control_flow_passes_post_common, - const std::shared_ptr& shape_infer_factory, - const void* compile_params) { +snippets::Schedule Subgraph::generate_from_linear_ir(const lowered::pass::PassPipeline& backend_passes_pre_common, + const lowered::pass::PassPipeline& backend_passes_post_common, + const void* compile_params) const { INTERNAL_OP_SCOPE(Subgraph); OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::generate") OPENVINO_ASSERT(m_generator != nullptr, "generate is called while generator is not set"); - data_flow_transformations(data_flow_passes); - - lowered::LinearIR linear_ir = *convert_body_to_linear_ir(shape_infer_factory); - control_flow_transformations(linear_ir, control_flow_passes_pre_common, control_flow_passes_post_common); - // actual code emission - const auto& lowering_result = m_generator->generate(linear_ir, linear_ir.get_config(), compile_params); - const auto ptr = lowering_result.binary_code; - + // Note: some transformations performed in the generator, e.g. tail insertion, can break shape propagation + // until we fix this behavior, we have to make a copy of LIR before giving it to the generator. + OPENVINO_ASSERT(m_linear_ir, "Attempt to call generate, when linear IR was not initialized"); + auto linear_ir = m_linear_ir->deep_copy(); + LoweringResult lowering_result; + control_flow_transformations(linear_ir, lowering_result, backend_passes_pre_common, backend_passes_post_common); + m_generator->generate(linear_ir, lowering_result, compile_params); VectorDims parallel_exec_domain = linear_ir.get_master_shape(); const size_t loop_depth = linear_ir.get_config().m_loop_depth; for (size_t i = 0; i < loop_depth; i++) parallel_exec_domain[parallel_exec_domain.size() - 1 - i] = 1; - return {parallel_exec_domain, ptr}; + return {parallel_exec_domain, std::move(lowering_result)}; } void Subgraph::print() const { diff --git a/src/common/snippets/src/pass/align_element_types.cpp b/src/common/snippets/src/pass/align_element_types.cpp new file mode 100644 index 00000000000000..da1ab1cb2c038f --- /dev/null +++ b/src/common/snippets/src/pass/align_element_types.cpp @@ -0,0 +1,106 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/pass/align_element_types.hpp" +#include "snippets/itt.hpp" + +namespace ov { +namespace snippets { + +pass::AlignElementTypes::AlignElementTypes(std::vector input_precisions, + std::vector output_precisions) : + m_input_precisions(std::move(input_precisions)), + m_output_precisions(std::move(output_precisions)) { +} + +bool pass::AlignElementTypes::run_on_model(const std::shared_ptr& m) { + RUN_ON_MODEL_SCOPE(AlignElementTypes); + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::AlignElementTypes") + bool is_modified = false; + const auto& results = m->get_results(); + const auto& params = m->get_parameters(); + OPENVINO_ASSERT(m_input_precisions.size() == params.size() && m_output_precisions.size() == results.size(), + "Number of parameters for snippet doesn't match passed to the Canonicalization pass. "); + + // We should insert Convert before Results to set original output element type if needed + for (size_t i = 0; i < m_output_precisions.size(); i++) { + const auto needed_out_type = m_output_precisions[i]; + if (results[i]->get_input_element_type(0) != needed_out_type) { + std::shared_ptr consumer = results[i]; + auto parent_output = consumer->get_input_source_output(0); + + // Snippets supports Transpose only after Parameter or before Result nodes + // So we have to insert Convert before Transpose (if there is) on Subgraph outputs + const auto transpose = ov::as_type_ptr(parent_output.get_node_shared_ptr()); + if (transpose) { + OPENVINO_ASSERT(parent_output.get_target_inputs().size() == 1, + "If Result has Transpose on input, this Result must be single consumer of the Transpose"); + parent_output = transpose->get_input_source_output(0); + consumer = transpose; + } + + const auto convert = std::make_shared(parent_output, needed_out_type); + ov::copy_runtime_info(parent_output.get_node_shared_ptr(), convert); + + consumer->set_argument(0, convert); + consumer->validate_and_infer_types(); + if (transpose) + results[i]->validate_and_infer_types(); + is_modified = true; + } + } + + // We should change existing element type to original for Parameters if needed + for (size_t i = 0; i < m_input_precisions.size(); ++i) { + const auto needed_in_type = m_input_precisions[i]; + const auto& parameter = params[i]; + const auto original_type = parameter->get_element_type(); + if (original_type != needed_in_type) { + parameter->set_element_type(needed_in_type); + parameter->validate_and_infer_types(); + + auto parent_output = parameter->output(0); + auto consumer_inputs = parent_output.get_target_inputs(); + + const auto& first_child = consumer_inputs.begin()->get_node()->shared_from_this(); + // Note: RankNormalization of is designed for shape-inference purposes only. + // It does not process any data (nor does it emit any code), so it doesn't require Convert operations + if (is_type(first_child)) { + OPENVINO_ASSERT(consumer_inputs.size() == 1, "RankNormalization is supposed to be the only consumer"); + parent_output = first_child->output(0); + consumer_inputs = parent_output.get_target_inputs(); + } + + // Snippets supports Transpose only after Parameter or before Result nodes + // So we have to insert Convert after Transpose (if there is) on Subgraph inputs + if (std::any_of(consumer_inputs.cbegin(), consumer_inputs.cend(), + [](const ov::Input& input) { return ov::is_type(input.get_node()); })) { + OPENVINO_ASSERT(consumer_inputs.size() == 1, + "If Parameter has Transpose on output, this Transpose must be single consumer of the Parameter"); + const auto transpose = consumer_inputs.begin()->get_node()->shared_from_this(); + transpose->validate_and_infer_types(); + + parent_output = transpose; + consumer_inputs = parent_output.get_target_inputs(); + } + + const auto& convert = std::make_shared(parent_output, original_type); + ov::copy_runtime_info(parent_output.get_node_shared_ptr(), convert); + + for (const auto input : consumer_inputs) { + const auto& input_node = input.get_node(); + if (input_node == convert.get()) { + continue; + } + input_node->set_argument(input.get_index(), convert->output(0)); + } + + is_modified = true; + } + } + return is_modified; +} + +} // namespace snippets +} // namespace ov \ No newline at end of file diff --git a/src/common/snippets/src/pass/broadcast_to_movebroadcast.cpp b/src/common/snippets/src/pass/broadcast_to_movebroadcast.cpp index 65fbbc162a8ada..cd803b163b5bbf 100644 --- a/src/common/snippets/src/pass/broadcast_to_movebroadcast.cpp +++ b/src/common/snippets/src/pass/broadcast_to_movebroadcast.cpp @@ -5,7 +5,7 @@ #include "snippets/itt.hpp" #include "snippets/pass/broadcast_to_movebroadcast.hpp" -#include "snippets/pass/insert_movebroadcast.hpp" +#include "snippets/op/broadcastmove.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "openvino/opsets/opset1.hpp" @@ -30,15 +30,19 @@ ov::snippets::pass::BroadcastToMoveBroadcast::BroadcastToMoveBroadcast() { const auto target_shape = root->get_output_partial_shape(0); const auto value_shape = root->get_input_partial_shape(0); - if (target_shape.is_dynamic() || value_shape.is_dynamic()) { - return false; + OPENVINO_ASSERT(target_shape.is_static() && value_shape.rank().is_static(), "Broadcast with dynamic target shape is not supported in Snippets"); + // Insert BroadcastMove only if the last dimension needs to be broadcasted. Higher-level dims broadcasting + // will be handled by pointer arithmetics. Note that this behavior should be changed in case of full op::Boradcast support. + Output in_value = root->input_value(0); + if (*target_shape.rbegin() != *value_shape.rbegin()) { + auto broadcasted_shape = value_shape; + *broadcasted_shape.rbegin() = *target_shape.rbegin(); + const auto& broadcast_node = std::make_shared(in_value, broadcasted_shape); + in_value = broadcast_node->output(0); } - const auto broadcast_node = ov::snippets::pass::InsertMoveBroadcast::BroadcastNodeLastDim(root->input_value(0), - target_shape.get_shape(), - value_shape.get_shape()); - replace_output_update_name(root->output(0), broadcast_node); - ov::copy_runtime_info(root, broadcast_node.get_node_shared_ptr()); + replace_output_update_name(root->output(0), in_value); + ov::copy_runtime_info(root, in_value.get_node_shared_ptr()); return true; }; diff --git a/src/common/snippets/src/pass/canonicalization.cpp b/src/common/snippets/src/pass/canonicalization.cpp new file mode 100644 index 00000000000000..23414d2925bf36 --- /dev/null +++ b/src/common/snippets/src/pass/canonicalization.cpp @@ -0,0 +1,84 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/pass/canonicalization.hpp" +#include "snippets/op/rank_normalization.hpp" +#include "snippets/itt.hpp" +#include "snippets/utils.hpp" +#include "snippets/lowered/port_descriptor.hpp" + +namespace ov { +namespace snippets { + +pass::Canonicalization::Canonicalization(const BlockedShapeVector& blocked_input_shapes) { + m_in_shapes.reserve(blocked_input_shapes.size()); + m_in_layouts.reserve(blocked_input_shapes.size()); + for (const auto& bs : blocked_input_shapes) { + m_has_dynamic_inputs |= utils::is_dynamic_vdims(bs.first); + m_in_shapes.emplace_back(bs.first); + m_in_layouts.emplace_back(bs.second); + // Note: Blocking (if any) must be accounted for in input shapes + OPENVINO_ASSERT(m_in_shapes.back().size() == m_in_layouts.back().size(), "Input shapes and layouts must have the same rank"); + } +} + +bool pass::Canonicalization::run_on_model(const std::shared_ptr& m) { + RUN_ON_MODEL_SCOPE(Canonicalization); + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::Canonicalization") + bool is_modified = false; + const ParameterVector& params = m->get_parameters(); + OPENVINO_ASSERT(m_in_shapes.size() == params.size(), + "Number of parameters for snippet doesn't match passed to the Canonicalization pass. ", + "Expected: ", m_in_shapes.size(), " Got: ", params.size(), "."); + + // Note that shape rank also incorporates layout, so NCHW16c would have shape rank 5 + auto is_blocked_layout = [](const Layout& l) { + return l.size() != std::set(l.begin(), l.end()).size(); + }; + auto compare_ranks = [](const Layout& l, const Layout& r) { + return l.size() < r.size(); + }; + // Layout with the max rank + const auto& max_rank_it = std::max_element(m_in_layouts.begin(), m_in_layouts.end(), compare_ranks); + Layout base_layout = *max_rank_it; + size_t max_rank = base_layout.size(); + const bool base_is_blocked = is_blocked_layout(base_layout); + + for (size_t i = 0; i < m_in_layouts.size(); i++) { + const auto& i_layout = m_in_layouts[i]; + const auto& i_shape = m_in_shapes[i]; + const auto i_rank = i_layout.size(); + const bool i_is_blocked = is_blocked_layout(i_layout); + // Canonicalization logic briefly: + // * If this input is blocked => Reshape corresponding input parameter, so the following transformations + // will work with a shape of a larger rank. In dynamic case, this shape will be updated during shapeInfer() + // call, but the important thing is that the shape rank won't change. + // * If some of the input shapes is blocked (=> base_is_blocked), but this input is planar, + // then insert RankNormalization op after this input. This is needed, so all shapes inside the body have + // similar ranks. + if (i_is_blocked) { + OPENVINO_ASSERT(base_is_blocked && i_rank == max_rank, "If this shape is blocked, base must also be blocked"); + params[i]->set_partial_shape(snippets::utils::vdims_to_pshape(i_shape)); + is_modified = true; + } else if (i_rank < max_rank) { + size_t num_append = base_is_blocked; + OPENVINO_ASSERT(max_rank >= i_rank + num_append, "Unsupported blocked shapes combination in canonicalization"); + size_t num_prepend = max_rank - i_rank - num_append; + const auto& out = params[i]->output(0); + const auto& target_inputs = out.get_target_inputs(); + auto rank_norm = std::make_shared(out, num_prepend, num_append); + for (auto& in : target_inputs) + in.replace_source_output(rank_norm); + is_modified = true; + } else { + // todo: 4d blocked + 5d planar layouts are not supported: + + OPENVINO_ASSERT(equal(base_layout.begin(), base_layout.end(), i_layout.begin()), + "Canonicalization got input shapes of equal ranks but different layouts, which is not supported"); + } + } + return is_modified; +} + +} // namespace snippets +} // namespace ov \ No newline at end of file diff --git a/src/common/snippets/src/pass/convert_constants.cpp b/src/common/snippets/src/pass/convert_constants.cpp index b5fb81b77dd98a..c374ee010d3446 100644 --- a/src/common/snippets/src/pass/convert_constants.cpp +++ b/src/common/snippets/src/pass/convert_constants.cpp @@ -24,8 +24,7 @@ ov::snippets::pass::ConvertConstantsToScalars::ConvertConstantsToScalars() { // Note that all Constants {1,1,1,1} are converted to Scalar {1} here // This is needed to simplify shape inference, otherwise {1,1,1,1} Constants can increase output rank // Also some operations support only scalar shapes, so we need separate scalars and shape [1] - const auto shape = constant->get_output_shape(0).size() == 0 ? ov::Shape{} : ov::Shape{1}; - auto scalar = std::make_shared(ov::op::v0::Constant(*constant, shape)); + auto scalar = std::make_shared(ov::op::v0::Constant(*constant, ov::Shape{1})); scalar->set_friendly_name(constant->get_friendly_name()); ov::copy_runtime_info(constant, scalar); ov::replace_node(constant, scalar); diff --git a/src/common/snippets/src/pass/hash.cpp b/src/common/snippets/src/pass/hash.cpp index 2f975ef2cbccee..cea21e37e861cf 100644 --- a/src/common/snippets/src/pass/hash.cpp +++ b/src/common/snippets/src/pass/hash.cpp @@ -15,6 +15,7 @@ #include "openvino/core/model.hpp" #include "openvino/op/util/framework_node.hpp" #include "openvino/opsets/opset1.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "transformations/rt_info/primitives_priority_attribute.hpp" namespace ov { @@ -180,6 +181,17 @@ class SnippetsHasher : public ov::AttributeVisitor { m_hash = hash_combine(m_hash, data[i]); } } + } else if (const auto& a = + ov::as_type>>(&adapter)) { + if (name == "value" && m_node_type_name == "Constant") { + m_hash = hash_combine(m_hash, AttrType::constant); + const int64_t size = a->get()->size(); + m_hash = hash_combine(hash_combine(m_hash, AttrType::size), size); + auto data = static_cast(a->get()->get_ptr()); + for (int64_t i = 0; i < size; i++) { + m_hash = hash_combine(m_hash, data[i]); + } + } } else if (const auto& a = ov::as_type>(&adapter)) { const auto& attrs = a->get(); // Update node attributes in data field diff --git a/src/common/snippets/src/pass/set_softmax_ports.cpp b/src/common/snippets/src/pass/set_softmax_ports.cpp index 1651a6d6217495..f8d38d51ec59b5 100644 --- a/src/common/snippets/src/pass/set_softmax_ports.cpp +++ b/src/common/snippets/src/pass/set_softmax_ports.cpp @@ -25,11 +25,9 @@ ov::snippets::pass::SetSoftmaxPorts::SetSoftmaxPorts() { auto root = m.get_match_root(); const auto& pshape = root->get_input_partial_shape(0); - if (pshape.is_dynamic()) - return false; - const auto shape = pshape.get_shape(); - const auto rank = shape.size(); + OPENVINO_ASSERT(!pshape.rank().is_dynamic(), "SetSoftmaxPorts doesn't support dynamic ranks"); + const auto rank = pshape.rank().get_length(); int64_t axis; if (const auto softmax_v8 = ov::as_type_ptr(root)) { @@ -44,7 +42,7 @@ ov::snippets::pass::SetSoftmaxPorts::SetSoftmaxPorts() { OPENVINO_ASSERT(axis < static_cast(rank), "Softmax has incorrect axis"); std::vector subtensor(rank, 1); - for (size_t i = axis; i < rank; ++i) + for (auto i = axis; i < rank; ++i) subtensor[i] = lowered::PortDescriptor::ServiceDimensions::FULL_DIM; lowered::PortDescriptorUtils::set_port_descriptor_ptr(root->input(0), std::make_shared(root->input(0), subtensor)); diff --git a/src/common/snippets/src/shape_inference/shape_inference.cpp b/src/common/snippets/src/shape_inference/shape_inference.cpp index 22470a13d3443f..0b9117d05d0477 100644 --- a/src/common/snippets/src/shape_inference/shape_inference.cpp +++ b/src/common/snippets/src/shape_inference/shape_inference.cpp @@ -63,6 +63,7 @@ const IShapeInferSnippetsFactory::TRegistry IShapeInferSnippetsFactory::registry SHAPE_INFER_PREDEFINED(ov::op::v0::Result, EmptyShapeInfer), // SHAPE_INFER_OP_SPECIFIC(op::LoadReshape), + SHAPE_INFER_OP_SPECIFIC(op::RankNormalization), SHAPE_INFER_OP_SPECIFIC(op::BroadcastLoad), SHAPE_INFER_OP_SPECIFIC(op::BroadcastMove), }; diff --git a/src/common/snippets/src/utils.cpp b/src/common/snippets/src/utils.cpp index df894604d11693..242391b908dc03 100644 --- a/src/common/snippets/src/utils.cpp +++ b/src/common/snippets/src/utils.cpp @@ -92,7 +92,8 @@ VectorDims pshape_to_vdims(const PartialShape& pshape) { result.reserve(pshape.size()); for (const auto& d : pshape) result.push_back(d.is_dynamic() ? IShapeInferSnippets::DYNAMIC_DIMENSION : d.get_length()); - return result; + // Note: PartialShape could be empty which designates scalar value. However, Scalars are represented as {1} in Snippets + return result.empty() ? VectorDims {1} : result; } ov::PartialShape vdims_to_pshape(const VectorDims& vdims) { @@ -132,6 +133,10 @@ VectorDims get_planar_vdims(const snippets::lowered::ExpressionPort& expr_port) return get_planar_vdims(expr_port.get_descriptor_ptr()); } +bool is_dynamic_vdims(const VectorDims& shape) { + return std::any_of(shape.cbegin(), shape.cend(), [](size_t v){ return v == IShapeInferSnippets::DYNAMIC_DIMENSION; }); +} + } // namespace utils } // namespace snippets } // namespace ov diff --git a/src/common/snippets/tests/include/lowering_utils.hpp b/src/common/snippets/tests/include/lowering_utils.hpp index a419e6575a5de5..f2c872f725b7d6 100644 --- a/src/common/snippets/tests/include/lowering_utils.hpp +++ b/src/common/snippets/tests/include/lowering_utils.hpp @@ -6,6 +6,8 @@ #include #include "snippets/op/subgraph.hpp" #include "snippets_helpers.hpp" +#include "snippets/pass_manager.hpp" +#include "snippets/shape_inference/shape_inference.hpp" namespace ov { namespace test { @@ -23,11 +25,17 @@ class DummyEmitter : public ov::snippets::Emitter { void emit_data() const override {} }; +struct DummyCompiledSnippet : public ov::snippets::CompiledSnippet { + const uint8_t* get_code() const override { return nullptr; } + size_t get_code_size() const override { return 0; } + bool empty() const override { return true; } +}; + class DummyTargetMachine : public ov::snippets::TargetMachine { public: DummyTargetMachine(const std::vector& custom_opset = {}); bool is_supported() const override { return true; } - ov::snippets::code get_snippet() const override { return nullptr; } + ov::snippets::CompiledSnippetPtr get_snippet() override { return std::make_shared(); } size_t get_lanes() const override { return 10; } }; @@ -35,6 +43,7 @@ class DummyGenerator : public ov::snippets::Generator { public: DummyGenerator() : ov::snippets::Generator(std::make_shared()) {} DummyGenerator(const std::shared_ptr& t) : ov::snippets::Generator(t) {} + std::shared_ptr clone() const override { return std::make_shared(target); } protected: opRegType get_specific_op_reg_type(const std::shared_ptr& op) const override { return vec2vec; }; @@ -48,13 +57,15 @@ class LoweringTests : public TransformationTestsF { void TearDown() override; static std::shared_ptr getSubgraph(const std::shared_ptr& f); + using IShapeInferSnippetsFactory = ov::snippets::IShapeInferSnippetsFactory; static std::shared_ptr getLoweredSubgraph(const std::shared_ptr& f, const ov::PartialShape& master_shape, const std::vector& backend_passes = {}, const ov::snippets::lowered::pass::PassPipeline& lowered_pre_common = {}, const ov::snippets::lowered::pass::PassPipeline& lowered_post_common = {}, - const std::shared_ptr& generator = nullptr); + const std::shared_ptr& generator = nullptr, + const std::shared_ptr& factory = std::make_shared()); static std::shared_ptr getTokenizedSubgraph(const std::shared_ptr& f); protected: diff --git a/src/common/snippets/tests/include/pass/canonicalization.hpp b/src/common/snippets/tests/include/pass/canonicalization.hpp index 88f6ebc0336a80..bcb27d08cb1395 100644 --- a/src/common/snippets/tests/include/pass/canonicalization.hpp +++ b/src/common/snippets/tests/include/pass/canonicalization.hpp @@ -5,36 +5,25 @@ #pragma once #include "lowering_utils.hpp" +#include "snippets/op/subgraph.hpp" #include "snippets_helpers.hpp" +#include "snippets/shape_types.hpp" +#include "snippets/pass/canonicalization.hpp" namespace ov { namespace test { namespace snippets { -using BlockedShape = ov::snippets::op::Subgraph::BlockedShape; -using BlockedShapeVector = ov::snippets::op::Subgraph::BlockedShapeVector; - -// todo: implement tests with 3 inputs and two outputs (aka SnippetsCanonicalizationParams3Inputs) -// Note that the expected output shape isn't necessary equal to one of the output blocked_shapes. -// For example, consider the following graph: (1, 2, 2, 1, 8) + (1, 2, 1, 1, 8) + (1, 2, 1, 5, 8) => (1, 2, 2, 1, 8) + (1, 2, 1, 5, 8). -typedef std::tuple< - std::tuple, // Shape & BlockedShape for input 0 - std::tuple, // Shape & BlockedShape for input 0 - BlockedShape, // BlockedShape output shape passed to canonicalize() - Shape // expected output Shape -> canonicalizationParams; - - -class CanonicalizationTests : public LoweringTests, public testing::WithParamInterface { +class CanonicalizationTests : public TransformationTestsF { public: - static std::string getTestCaseName(testing::TestParamInfo obj); + using VectorDims = ov::snippets::VectorDims; + using Layout = std::vector; + virtual void run(); protected: - void SetUp() override; - std::shared_ptr snippets_model; - Shape expected_output_shape; - BlockedShapeVector input_blocked_shapes; - BlockedShapeVector output_blocked_shapes; + std::vector m_input_shapes; + std::vector m_input_layouts; + void prepare_functions(const std::vector& shapes); }; } // namespace snippets diff --git a/src/common/snippets/tests/src/lowering_utils.cpp b/src/common/snippets/tests/src/lowering_utils.cpp index 83207244ac031f..5d49d38a6af2e7 100644 --- a/src/common/snippets/tests/src/lowering_utils.cpp +++ b/src/common/snippets/tests/src/lowering_utils.cpp @@ -106,13 +106,13 @@ std::shared_ptr const std::vector& backend_passes, const ov::snippets::lowered::pass::PassPipeline& lowered_pre_common, const ov::snippets::lowered::pass::PassPipeline& lowered_post_common, - const std::shared_ptr& generator) { + const std::shared_ptr& generator, + const std::shared_ptr& factory) { auto subgraph = getTokenizedSubgraph(f); subgraph->set_generator(generator == nullptr ? std::make_shared() : generator); - subgraph->set_master_shape(master_shape); subgraph->set_tile_rank(2); // Note: lowered_pipeline would have no effect on subgraph body, since it's applied on linear IR - subgraph->generate(backend_passes, lowered_pre_common, lowered_post_common); + subgraph->generate({}, {}, {}, backend_passes, lowered_pre_common, lowered_post_common, factory); return subgraph; } diff --git a/src/common/snippets/tests/src/pass/canonicalization.cpp b/src/common/snippets/tests/src/pass/canonicalization.cpp index 4981b4f8d8e139..a9311b67598263 100644 --- a/src/common/snippets/tests/src/pass/canonicalization.cpp +++ b/src/common/snippets/tests/src/pass/canonicalization.cpp @@ -5,101 +5,84 @@ #include #include "pass/canonicalization.hpp" #include "common_test_utils/common_utils.hpp" -#include +#include "snippets/pass/canonicalization.hpp" +#include "snippets/op/rank_normalization.hpp" +#include namespace ov { namespace test { namespace snippets { -using ov::snippets::op::Subgraph; - -class SKIP_CanonicalizationTests : public CanonicalizationTests { -public: - void SetUp() override { - GTEST_SKIP(); - } - void TearDown() override{}; -}; - -std::string CanonicalizationTests::getTestCaseName(testing::TestParamInfo obj) { - std::vector> inputs(2); - Subgraph::BlockedShape output; - Shape expectedOutput; - std::tie(inputs[0], inputs[1], output, expectedOutput) = obj.param; - std::ostringstream result; - for (size_t i = 0; i < inputs.size(); i++) { - const auto& blockedshape = std::get<1>(inputs[i]); - // input shape - result << "IS[" << i << "]=" << ov::test::utils::vec2str(std::get<0>(inputs[i])) << "_"; - // input blocked shape - result << "IBS[" << i << "]=" << ov::test::utils::partialShape2str({std::get<0>(blockedshape)}) << "_"; - // input blocked order - result << "IBO[" << i << "]=" << ov::test::utils::vec2str(std::get<1>(blockedshape)) << "_"; - } - // output blocked shape - result << "OBS[0]=" << ov::test::utils::partialShape2str({std::get<0>(output)}) << "_"; - // output blocked order - result << "OBO[0]=" << ov::test::utils::vec2str(std::get<1>(output)) << "_"; - result << "ExpOS[0]=" << ov::test::utils::vec2str(expectedOutput) << "_"; - return result.str(); +namespace { +void normalizeParameter(const std::shared_ptr& par, size_t num_prepend, size_t num_append) { + auto target_inputs = par->get_output_target_inputs(0); + auto rank_norm = std::make_shared(par, + num_prepend, + num_append); + for (auto& t : target_inputs) + t.replace_source_output(rank_norm); } +} // namespace -void CanonicalizationTests::SetUp() { - TransformationTestsF::SetUp(); - std::vector> inputs(2); - output_blocked_shapes.resize(1); - std::tie(inputs[0], inputs[1], output_blocked_shapes[0], expected_output_shape) = this->GetParam(); +void CanonicalizationTests::prepare_functions(const std::vector& shapes) { + std::vector pshapes; + pshapes.reserve(shapes.size()); + for (const auto& v : shapes ) + pshapes.emplace_back(v); + const auto &f = AddFunction(pshapes); + model = f.getOriginal(); + model_ref = model->clone(); +} - input_blocked_shapes = {std::get<1>(inputs[0]), std::get<1>(inputs[1])}; - snippets_model = std::make_shared(std::vector{std::get<0>(inputs[0]), std::get<0>(inputs[1])}); +void CanonicalizationTests::run() { + ASSERT_TRUE(model); + ASSERT_EQ(m_input_shapes.size(), m_input_layouts.size()); + BlockedShapeVector blocked_input_shapes; + blocked_input_shapes.reserve(m_input_shapes.size()); + for (size_t i = 0; i < m_input_shapes.size(); i++) + blocked_input_shapes.emplace_back(m_input_shapes[i], m_input_layouts[i]); + manager.register_pass(blocked_input_shapes); + disable_rt_info_check(); } -TEST_P(CanonicalizationTests, Add) { - model = snippets_model->getOriginal(); - model_ref = snippets_model->getReference(); - auto subgraph = getTokenizedSubgraph(model); - subgraph->set_generator(std::make_shared()); - auto canonical_output_shape = subgraph->canonicalize(output_blocked_shapes, input_blocked_shapes); - ASSERT_TRUE(canonical_output_shape.is_static()); - ASSERT_DIMS_EQ(canonical_output_shape.get_shape(), expected_output_shape); +TEST_F(CanonicalizationTests, smoke_Snippets_Canonicalization_0) { + m_input_shapes = {{2, 3, 10, 64}, {2, 3, 10, 64}}; + m_input_layouts = {{0, 1, 2, 3}, {0, 1, 2, 3}}; + prepare_functions(m_input_shapes); + run(); } namespace CanonicalizationTestsInstantiation { -using ov::snippets::op::Subgraph; -std::vector input_shapes; -Shape expected_output_shape; - -using ov::Shape; -ov::element::Type_t prec = ov::element::f32; -std::tuple blockedInput0{{1, 64, 2, 5}, - {{1, 4, 2, 5, 16}, {0, 1, 2, 3, 1}, prec}}; -Subgraph::BlockedShape output{{1, 4, 2, 5, 16}, {0, 1, 2, 3, 1}, prec}; -Shape canonical_shape{1, 4, 2, 5, 16}; - -std::vector> blockedInput1{{{1, 1, 2, 5}, {{1, 1, 2, 5, 1}, {0, 1, 2, 3, 1}, prec}}, - {{1, 1, 2, 1}, {{1, 1, 2, 1, 1}, {0, 1, 2, 3, 1}, prec}}, - {{1, 64, 1, 1}, {{1, 4, 1, 1, 16}, {0, 1, 2, 3, 1}, prec}}}; +TEST_F(CanonicalizationTests, smoke_Snippets_Canonicalization_1) { + m_input_shapes = {{2, 3, 10, 64}, + {10, 64}}; + m_input_layouts = {{0, 1, 2, 3}, + {0, 1}}; + prepare_functions(m_input_shapes); + normalizeParameter(model_ref->get_parameters()[1], 2, 0); + run(); +} -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_BroadcastBlocked, - SKIP_CanonicalizationTests /* CVS-114607 */, - ::testing::Combine(::testing::Values(blockedInput0), - ::testing::ValuesIn(blockedInput1), - ::testing::Values(output), - ::testing::Values(canonical_shape)), - CanonicalizationTests::getTestCaseName); +TEST_F(CanonicalizationTests, smoke_Snippets_Canonicalization_2) { + m_input_shapes = {{2, 3, 10, 64, 16}, + {1, 10, 64}}; + m_input_layouts = {{0, 1, 2, 3, 1}, + {0, 1, 2}}; + prepare_functions({{2, 48, 10, 64}, + {1, 10, 64}}); + const auto& params = model_ref->get_parameters(); + // Note: We can't create functions with mismatching input shapes, + // so we have to set Parameter shapes after the functions were created + // This reproduces Snippets pipeline well, since blocked shapes are set after the tokenization + params[0]->set_partial_shape(PartialShape(m_input_shapes[0])); + model->get_parameters()[0]->set_partial_shape(PartialShape(m_input_shapes[0])); -std::vector> planarInput1{{{1, 1, 2, 5}, {{1, 2, 5}, {0, 1, 2}, prec}}, - {{1, 1, 2, 5}, {{2, 5}, {0, 1}, prec}}, - {{1, 2, 5}, {{2, 5}, {0, 1}, prec}}, - {{2, 5}, {{2, 5}, {0, 1}, prec}}, - {{5}, {{5}, {0}, prec}}}; + normalizeParameter(params[1], 1, 1); + // need to trigger validate..(...) manually to propagate new blocked shapes, + // this is correct since RankNormalization ops re-enables shape propagation for blocked shapes + model_ref->validate_nodes_and_infer_types(); + run(); +} -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_BroadcastPlanar, - SKIP_CanonicalizationTests /* CVS-114607 */, - ::testing::Combine(::testing::Values(blockedInput0), - ::testing::ValuesIn(planarInput1), - ::testing::Values(output), - ::testing::Values(canonical_shape)), - CanonicalizationTests::getTestCaseName); } // namespace CanonicalizationTestsInstantiation } // namespace snippets } // namespace test diff --git a/src/common/transformations/include/ov_ops/nms_ie_internal.hpp b/src/common/transformations/include/ov_ops/nms_ie_internal.hpp index 797b89add6d4bb..75f4fa6e1b91fd 100644 --- a/src/common/transformations/include/ov_ops/nms_ie_internal.hpp +++ b/src/common/transformations/include/ov_ops/nms_ie_internal.hpp @@ -21,6 +21,10 @@ class TRANSFORMATIONS_API NonMaxSuppressionIEInternal : public Op { NonMaxSuppressionIEInternal() = default; + static constexpr int Rotation_None = 0; + static constexpr int Rotation_Clockwise = 1; + static constexpr int Rotation_Counterclockwise = 2; + NonMaxSuppressionIEInternal(const Output& boxes, const Output& scores, const Output& max_output_boxes_per_class, @@ -29,7 +33,8 @@ class TRANSFORMATIONS_API NonMaxSuppressionIEInternal : public Op { int center_point_box, bool sort_result_descending, const element::Type& output_type = element::i64, - const element::Type& score_output_type = element::f32); + const element::Type& score_output_type = element::f32, + const int rotation = Rotation_None); NonMaxSuppressionIEInternal(const Output& boxes, const Output& scores, @@ -40,7 +45,8 @@ class TRANSFORMATIONS_API NonMaxSuppressionIEInternal : public Op { int center_point_box, bool sort_result_descending, const element::Type& output_type = element::i64, - const element::Type& score_output_type = element::f32); + const element::Type& score_output_type = element::f32, + const int rotation = Rotation_None); void validate_and_infer_types() override; @@ -52,6 +58,7 @@ class TRANSFORMATIONS_API NonMaxSuppressionIEInternal : public Op { bool m_sort_result_descending = true; element::Type m_output_type; element::Type m_scores_output_type; + int m_rotation{Rotation_None}; private: int64_t max_boxes_output_from_input() const; diff --git a/src/common/transformations/include/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp b/src/common/transformations/include/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp new file mode 100644 index 00000000000000..0b8d31b404090e --- /dev/null +++ b/src/common/transformations/include/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API ConvertU4WeightsZeroPointToScalar; + +} // namespace pass +} // namespace ov + +/** + * @ingroup ie_transformation_common_api + * @brief Converts U4 weights zero point to scalar if all values are equal + */ +class ov::pass::ConvertU4WeightsZeroPointToScalar : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ConvertU4WeightsZeroPointToScalar", "0"); + ConvertU4WeightsZeroPointToScalar(); +}; diff --git a/src/common/transformations/include/transformations/common_optimizations/gelu_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/gelu_fusion.hpp index 61722260bb5a57..71b5fcafc9fe75 100644 --- a/src/common/transformations/include/transformations/common_optimizations/gelu_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/gelu_fusion.hpp @@ -19,6 +19,7 @@ class TRANSFORMATIONS_API GeluFusionWithErfTwo; class TRANSFORMATIONS_API GeluFusionWithErfThree; class TRANSFORMATIONS_API GeluFusionWithErfFour; class TRANSFORMATIONS_API GeluFusionWithTanh; +class TRANSFORMATIONS_API GeluFusionWithTanhNoPower; } // namespace pass } // namespace ov @@ -78,6 +79,17 @@ class ov::pass::GeluFusionWithTanh : public ov::pass::MatcherPass { GeluFusionWithTanh(); }; +/** + * @ingroup ie_transformation_common_api + * @brief GeluFusion transformation replaces a sub-graph + * x * 0.5 * (1 + tanh((x * 0.044715 * x + 1) * x * sqrt(2 / pi))) with a Gelu (Tanh) op. + */ +class ov::pass::GeluFusionWithTanhNoPower : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("GeluFusionWithTanhNoPower", "0"); + GeluFusionWithTanhNoPower(); +}; + /** * @ingroup ie_transformation_common_api * @brief GeluFusion transformation replaces various sub-graphs with a Gelu op. @@ -91,5 +103,6 @@ class ov::pass::GeluFusion : public ov::pass::GraphRewrite { add_matcher(); add_matcher(); add_matcher(); + add_matcher(); } }; diff --git a/src/common/transformations/include/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp b/src/common/transformations/include/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp new file mode 100644 index 00000000000000..5eb3b285365f92 --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "openvino/pass/graph_rewrite.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API ConvertNMSRotatedToNMSIEInternal; + +} // namespace pass +} // namespace ov + +class ov::pass::ConvertNMSRotatedToNMSIEInternal : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ConvertNMSRotatedToNMSIEInternal", "0"); + ConvertNMSRotatedToNMSIEInternal(); +}; diff --git a/src/common/transformations/src/ov_ops/nms_ie_internal.cpp b/src/common/transformations/src/ov_ops/nms_ie_internal.cpp index c305304dbf7238..e879224dd935c7 100644 --- a/src/common/transformations/src/ov_ops/nms_ie_internal.cpp +++ b/src/common/transformations/src/ov_ops/nms_ie_internal.cpp @@ -20,12 +20,14 @@ op::internal::NonMaxSuppressionIEInternal::NonMaxSuppressionIEInternal(const Out int center_point_box, bool sort_result_descending, const ov::element::Type& output_type, - const ov::element::Type& score_output_type) + const ov::element::Type& score_output_type, + const int rotation) : Op({boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold}), m_center_point_box(center_point_box), m_sort_result_descending(sort_result_descending), m_output_type(output_type), - m_scores_output_type(score_output_type) { + m_scores_output_type(score_output_type), + m_rotation(rotation) { constructor_validate_and_infer_types(); } @@ -38,12 +40,14 @@ op::internal::NonMaxSuppressionIEInternal::NonMaxSuppressionIEInternal(const Out int center_point_box, bool sort_result_descending, const ov::element::Type& output_type, - const ov::element::Type& score_output_type) + const ov::element::Type& score_output_type, + const int rotation) : Op({boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, soft_nms_sigma}), m_center_point_box(center_point_box), m_sort_result_descending(sort_result_descending), m_output_type(output_type), - m_scores_output_type(score_output_type) { + m_scores_output_type(score_output_type), + m_rotation{rotation} { constructor_validate_and_infer_types(); } @@ -59,7 +63,9 @@ std::shared_ptr op::internal::NonMaxSuppressionIEInternal::clone_with_new_ new_args.at(5), m_center_point_box, m_sort_result_descending, - m_output_type); + m_output_type, + m_scores_output_type, + m_rotation); } else if (new_args.size() == 5) { return make_shared(new_args.at(0), new_args.at(1), @@ -68,7 +74,9 @@ std::shared_ptr op::internal::NonMaxSuppressionIEInternal::clone_with_new_ new_args.at(4), m_center_point_box, m_sort_result_descending, - m_output_type); + m_output_type, + m_scores_output_type, + m_rotation); } OPENVINO_THROW("Unsupported number of inputs: " + std::to_string(new_args.size())); } @@ -79,6 +87,7 @@ bool op::internal::NonMaxSuppressionIEInternal::visit_attributes(AttributeVisito visitor.on_attribute("sort_result_descending", m_sort_result_descending); visitor.on_attribute("output_type", m_output_type); visitor.on_attribute("score_output_type", m_scores_output_type); + visitor.on_attribute("rotation", m_rotation); return true; } diff --git a/src/common/transformations/src/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp b/src/common/transformations/src/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp new file mode 100644 index 00000000000000..6313db127ac406 --- /dev/null +++ b/src/common/transformations/src/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp @@ -0,0 +1,80 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp" + +#include "itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/subtract.hpp" +#include "openvino/pass/pattern/op/or.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "openvino/reference/autobroadcast_binop.hpp" +#include "transformations/utils/utils.hpp" + +ov::pass::ConvertU4WeightsZeroPointToScalar::ConvertU4WeightsZeroPointToScalar() { + MATCHER_SCOPE(ConvertU4WeightsZeroPointToScalar); + auto weights_m = pattern::wrap_type(pattern::type_matches(ov::element::u4)); + auto convert_m = pattern::wrap_type({weights_m}, pattern::consumers_count(1)); + + auto float_zp_predicate = [](ov::Output output) -> bool { + return pattern::type_matches_any({ov::element::f32, ov::element::f16})(output) && + pattern::consumers_count(1)(output); + }; + auto float_zero_point_m = pattern::wrap_type(float_zp_predicate); + + auto u4_zp_predicate = [](ov::Output output) -> bool { + return pattern::type_matches(ov::element::u4)(output) && pattern::consumers_count(1)(output); + }; + auto u4_zero_point_m = pattern::wrap_type(u4_zp_predicate); + auto zero_point_convert_m = pattern::wrap_type({u4_zero_point_m}, float_zp_predicate); + + auto zero_point_m = std::make_shared(OutputVector{float_zero_point_m, zero_point_convert_m}); + auto subtract_m = pattern::wrap_type({convert_m, zero_point_m}); + + ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + auto& pattern_map = m.get_pattern_value_map(); + auto weights = ov::as_type_ptr(pattern_map.at(weights_m).get_node_shared_ptr()); + std::shared_ptr zero_point; + if (pattern_map.count(float_zero_point_m)) { + const auto& float_zp = pattern_map.at(float_zero_point_m); + zero_point = ov::as_type_ptr(float_zp.get_node_shared_ptr()); + } else { + const auto& u4_zp = pattern_map.at(u4_zero_point_m); + zero_point = ov::as_type_ptr(u4_zp.get_node_shared_ptr()); + } + if (!weights || !zero_point) + return false; + // Due to the matcher specific and Subtract branches similarity, + // weights and zero_point might be mixed up with each other + if (ov::shape_size(weights->get_shape()) < ov::shape_size(zero_point->get_shape())) + std::swap(zero_point, weights); + + auto zero_point_shape = zero_point->get_shape(); + if (ov::shape_size(zero_point_shape) == 1) + return false; + + const auto& weights_shape = weights->get_shape(); + const size_t weights_rank = weights_shape.size(); + const size_t zero_point_rank = zero_point_shape.size(); + // Zero point constant can be converted into scalar only if this does not affect Subtract output shape + if (weights_rank < zero_point_rank) + return false; + + zero_point_shape.insert(zero_point_shape.begin(), weights_rank - zero_point_rank, 1); + for (size_t i = 0; i < weights_rank; ++i) { + if (zero_point_shape[i] > weights_shape[i]) + return false; + } + + float zp_value; + if (!ov::op::util::get_single_value(zero_point, zp_value)) + return false; + const auto new_zp = ov::op::v0::Constant::create(zero_point->get_element_type(), {}, {zp_value}); + return ov::replace_node_update_name(zero_point, new_zp); + }; + + auto m = std::make_shared(subtract_m, matcher_name); + register_matcher(m, callback); +} diff --git a/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp index 90b718f8067cae..7f7915f7965774 100644 --- a/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp @@ -8,6 +8,7 @@ #include +#include #include #include "itt.hpp" @@ -16,8 +17,8 @@ #include "openvino/op/constant.hpp" #include "openvino/op/divide.hpp" #include "openvino/op/erf.hpp" -#include "openvino/op/gelu.hpp" #include "openvino/op/multiply.hpp" +#include "openvino/op/parameter.hpp" #include "openvino/op/power.hpp" #include "openvino/op/tanh.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" @@ -302,11 +303,10 @@ ov::pass::GeluFusionWithTanh::GeluFusionWithTanh() { return false; } - constexpr float pi = 3.141592653589793238462643383279502884f; bool valid_constant_values = op::util::has_constant_value(pow_constant_value, 3.0f) && op::util::has_constant_value(mul_0_constant_value, 0.044715f, 0.001f) && - op::util::has_constant_value(mul_1_constant_value, std::sqrt(2.0f / pi), 0.01f) && + op::util::has_constant_value(mul_1_constant_value, std::sqrt(2.0 / M_PI), 0.01) && op::util::has_constant_value(mul_2_constant_value, 0.5f) && op::util::has_constant_value(add_1_constant_value, 1.0f); @@ -336,3 +336,76 @@ ov::pass::GeluFusionWithTanh::GeluFusionWithTanh() { auto m = std::make_shared(mul_3, matcher_name); register_matcher(m, callback); } + +ov::pass::GeluFusionWithTanhNoPower::GeluFusionWithTanhNoPower() { + // Replaces a sub-graph with a Gelu (ov::op::v0::Tanh) op + // x * 0.5 * (1 + tanh((x * 0.044715 * x + 1) * x * sqrt(2 / pi))) + MATCHER_SCOPE(GeluFusionWithTanhNoPower); + auto input = pattern::any_input(); + + auto const1 = pattern::wrap_type(); + auto mul1 = pattern::wrap_type({input, const1}); + + auto mul2 = pattern::wrap_type({mul1, input}); + + auto const2 = pattern::wrap_type(); + auto add1 = pattern::wrap_type({const2, mul2}); + + auto const3 = pattern::wrap_type(); + auto mul3 = pattern::wrap_type({input, const3}); + + auto mul4 = pattern::wrap_type({add1, mul3}); + + auto tanh = pattern::wrap_type({mul4}); + + auto const4 = pattern::wrap_type(); + auto add2 = pattern::wrap_type({tanh, const4}); + + auto const5 = pattern::wrap_type(); + auto mul5 = pattern::wrap_type({input, const5}); + + auto mul6 = pattern::wrap_type({add2, mul5}); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); + auto x_output = pattern_to_output.at(input); + + auto const1_value = pattern_to_output.at(const1).get_node_shared_ptr(); + auto const2_value = pattern_to_output.at(const2).get_node_shared_ptr(); + auto const3_value = pattern_to_output.at(const3).get_node_shared_ptr(); + auto const4_value = pattern_to_output.at(const4).get_node_shared_ptr(); + auto const5_value = pattern_to_output.at(const5).get_node_shared_ptr(); + + bool valid_constant_values = op::util::has_constant_value(const1_value, 0.044715f, 0.001f) && + op::util::has_constant_value(const2_value, 1.0f) && + op::util::has_constant_value(const3_value, std::sqrt(2.0 / M_PI), 0.01) && + op::util::has_constant_value(const4_value, 1.0f) && + op::util::has_constant_value(const5_value, 0.5f); + + if (!valid_constant_values) { + return false; + } + + auto gelu = std::make_shared(x_output, op::GeluApproximationMode::TANH); + + gelu->set_friendly_name(m.get_match_root()->get_friendly_name()); + ov::copy_runtime_info( + { + pattern_to_output.at(mul1).get_node_shared_ptr(), + pattern_to_output.at(mul2).get_node_shared_ptr(), + pattern_to_output.at(add1).get_node_shared_ptr(), + pattern_to_output.at(mul3).get_node_shared_ptr(), + pattern_to_output.at(mul4).get_node_shared_ptr(), + pattern_to_output.at(tanh).get_node_shared_ptr(), + pattern_to_output.at(add2).get_node_shared_ptr(), + pattern_to_output.at(mul5).get_node_shared_ptr(), + pattern_to_output.at(mul6).get_node_shared_ptr(), + }, + gelu); + ov::replace_node(m.get_match_root(), gelu); + return true; + }; + + auto m = std::make_shared(mul6, matcher_name); + this->register_matcher(m, callback); +} diff --git a/src/common/transformations/src/transformations/common_optimizations/gru_cell_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/gru_cell_fusion.cpp index e5eae04c640553..5b3aaec614ff17 100644 --- a/src/common/transformations/src/transformations/common_optimizations/gru_cell_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/gru_cell_fusion.cpp @@ -148,6 +148,15 @@ ov::pass::GRUCellFusion::GRUCellFusion() { Bh = rg.make(WRh.get_element_type(), Shape{1, static_cast(hidden_size)}, 0); } + // perform additional check for applicability of the transformation + // without this check, process_weights can fail + if (WR.get_partial_shape()[1] != (hidden_size + input_size)) { + return false; + } + if (WRh.get_partial_shape()[1] != (hidden_size + input_size)) { + return false; + } + Output Wzrh, Rzrh, Bzrh; if (cnt_of_consumers_of_zero_out == 1 && cnt_of_consumers_of_first_out == 2) { tie(Wzrh, Rzrh) = process_weights(rg, false, WR, WRh, input_size, hidden_size, axis_0, axis_1); diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp index 068e1f27a291e9..9a3446f2386161 100644 --- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp @@ -21,6 +21,7 @@ #include "transformations/common_optimizations/conv_to_binary_conv.hpp" #include "transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp" #include "transformations/common_optimizations/convert_quantize_dequantize.hpp" +#include "transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp" #include "transformations/common_optimizations/convolution_to_group_convolution_fusion.hpp" #include "transformations/common_optimizations/depth_to_space_fusion.hpp" #include "transformations/common_optimizations/dilated_convolution_converter.hpp" @@ -86,6 +87,7 @@ #include "transformations/op_conversions/convert_ti_to_sequences.hpp" #include "transformations/resolve_names_collisions.hpp" #include "transformations/smart_reshape/lstm_states_broadcast.hpp" +#include "transformations/smart_reshape/matmul_sr.hpp" #include "transformations/smart_reshape/reshape_sinking.hpp" bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr& f) { @@ -165,11 +167,13 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr auto transpose_sinking = manager.register_pass(); ADD_MATCHER(transpose_sinking, TransposeSinking) - // SplitSqueezeConcatFusion should work in same GraphRewrite as TransposesSinking, // because it replaces pattern that may contain Transposes which must be optimized before // the transformation and it also inserts Transpose that can be optimized by TransposeSinking ADD_MATCHER(transpose_sinking, SplitSqueezeConcatFusion) + + REGISTER_PASS(manager, TransposeMatMul) + auto eliminations = manager.register_pass(); ADD_MATCHER(eliminations, EliminateUnsqueezeGather) ADD_MATCHER(eliminations, NopElimination, m_use_shapes) @@ -212,6 +216,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr ADD_MATCHER(common_fusions, ShuffleChannelsFusion, !m_use_shapes) ADD_MATCHER(common_fusions, NonZeroHorizontalFusion) ADD_MATCHER(common_fusions, AdaptivePoolToReduce) + ADD_MATCHER(common_fusions, ConvertU4WeightsZeroPointToScalar) common_fusions->set_name("ov::pass::CommonFusions"); REGISTER_PASS(manager, BinarizeWeights) diff --git a/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp b/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp index f9738929931f21..3304ee3718ab57 100644 --- a/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp @@ -116,7 +116,7 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st } // Remove inputs bool pass_required = false; - std::set> required_inputs; + std::set required_inputs_indices; auto op_inputs = multi_subgraph_op->input_values(); std::vector> to_remove_descriptors_indexes; to_remove_descriptors_indexes.resize(subgraphs_size); @@ -133,7 +133,7 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st } else { // collecting required inputs is needed to detect cases where the input // is not needed in a one body, but the other one uses it (for example If case) - required_inputs.insert(op_inputs[body_in_descriptors[i]->m_input_index]); // only unique + required_inputs_indices.insert(body_in_descriptors[i]->m_input_index); } } } @@ -148,7 +148,9 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st } }; auto update_op_inputs_desc = [&subgraphs_size](const std::shared_ptr& op, + std::set& required_inputs_indices, uint64_t removed_loop_idx) { + std::set new_required_inputs_indices; for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) { auto& descriptors = op->get_input_descriptions(static_cast(body_idx)); for (auto& desc : descriptors) { @@ -157,6 +159,14 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st } } } + for (auto input_index : required_inputs_indices) { + if (input_index > removed_loop_idx) { + new_required_inputs_indices.insert(input_index - 1); + } else { + new_required_inputs_indices.insert(input_index); + } + } + required_inputs_indices = new_required_inputs_indices; }; // Remove dangling body params and input and update input descriptors for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) { @@ -174,13 +184,17 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st update_body_param_desc(body_in_descriptors, body_in_descriptors[desc_idx]->m_body_parameter_index); // remove dangling input of MultiSubGraphOp which was not removed earlier - auto& current_input = op_inputs[body_in_descriptors[desc_idx]->m_input_index]; - if (std::count(std::begin(required_inputs), std::end(required_inputs), current_input) == 0 && + auto current_input_idx = body_in_descriptors[desc_idx]->m_input_index; + auto& current_input = op_inputs[current_input_idx]; + // the same input tensor can go to different input ports + if (std::count(std::begin(required_inputs_indices), + std::end(required_inputs_indices), + current_input_idx) == 0 && std::count(std::begin(op_inputs), std::end(op_inputs), current_input) > 0) { - op_inputs.erase(std::next(op_inputs.begin(), body_in_descriptors[desc_idx]->m_input_index)); + op_inputs.erase(std::next(op_inputs.begin(), current_input_idx)); // Move all input indexes (in all bodies) which are after these indicated by // to_remove_descriptors_indexes and are not used in any body - update_op_inputs_desc(multi_subgraph_op, body_in_descriptors[desc_idx]->m_input_index); + update_op_inputs_desc(multi_subgraph_op, required_inputs_indices, current_input_idx); } } else { updated_body_in_descriptors.emplace_back(body_in_descriptors[desc_idx]); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.cpp b/src/common/transformations/src/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.cpp new file mode 100644 index 00000000000000..b3040cda132852 --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.cpp @@ -0,0 +1,109 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp" + +#include +#include + +#include "itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/non_max_suppression.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "ov_ops/nms_ie_internal.hpp" +#include "transformations/utils/utils.hpp" + +ov::pass::ConvertNMSRotatedToNMSIEInternal::ConvertNMSRotatedToNMSIEInternal() { + MATCHER_SCOPE(ConvertNMSRotatedToNMSIEInternal); + auto nms = ov::pass::pattern::wrap_type(); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + auto nms_rotated = std::dynamic_pointer_cast(m.get_match_root()); + if (!nms_rotated || transformation_callback(nms_rotated)) { + return false; + } + + const auto new_args = nms_rotated->input_values(); + const std::size_t num_of_inputs = new_args.size(); + OPENVINO_ASSERT(num_of_inputs == 5); + + const auto& max_per_class = new_args.at(2); + const auto& iou_threshold = new_args.at(3); + const auto& score_threshold = new_args.at(4); + + // vector of new openvino operations + NodeVector new_ops; + + auto one_dim_shape = Shape{1}; + + Output new_max_per_class; + Output new_iou_threshold; + Output new_score_threshold; + Output new_soft_nms_sigma; + + Output new_shape_for_max_per_class = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {1}); + Output new_shape_for_iou_threshold = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {1}); + Output new_shape_for_score_threshold = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {1}); + Output new_shape_for_soft_nms_sigma = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {1}); + + new_max_per_class = std::make_shared(max_per_class, new_shape_for_max_per_class, true); + new_ops.emplace_back(new_max_per_class.get_node_shared_ptr()); + + new_iou_threshold = std::make_shared(iou_threshold, new_shape_for_iou_threshold, true); + new_ops.emplace_back(new_iou_threshold.get_node_shared_ptr()); + + new_score_threshold = + std::make_shared(score_threshold, new_shape_for_score_threshold, true); + new_ops.emplace_back(new_score_threshold.get_node_shared_ptr()); + + constexpr int BoxEncodingType_Center = 1; // see NonMaxSuppression::BoxEncodingType + const int center_point_box = BoxEncodingType_Center; // for NMSRotated is it always Center + + const auto rotation = nms_rotated->get_clockwise() + ? op::internal::NonMaxSuppressionIEInternal::Rotation_Clockwise + : op::internal::NonMaxSuppressionIEInternal::Rotation_Counterclockwise; + + std::shared_ptr nms_legacy{nullptr}; + + nms_legacy = + std::make_shared(new_args.at(0), + new_args.at(1), + + new_max_per_class, + new_iou_threshold, + new_score_threshold, + + center_point_box, + nms_rotated->get_sort_result_descending(), + element::i32, + nms_rotated->get_output_element_type(1), + rotation); + new_ops.push_back(nms_legacy); + + Output output_0 = nms_legacy->output(0); + if (nms_rotated->output(0).get_element_type() != output_0.get_element_type()) { + output_0 = std::make_shared(output_0, nms_rotated->output(0).get_element_type()); + output_0.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms_rotated->output(0))); + new_ops.emplace_back(output_0.get_node_shared_ptr()); + } + + Output output_2 = nms_legacy->output(2); + if (nms_rotated->output(2).get_element_type() != output_2.get_element_type()) { + output_2 = std::make_shared(output_2, nms_rotated->output(2).get_element_type()); + output_2.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms_rotated->output(2))); + new_ops.emplace_back(output_2.get_node_shared_ptr()); + } + + nms_legacy->set_friendly_name(nms_rotated->get_friendly_name()); + ov::copy_runtime_info(nms_rotated, new_ops); + ov::replace_node(nms_rotated, {output_0, nms_legacy->output(1), output_2}); + return true; + }; + + auto m = std::make_shared(nms, matcher_name); + this->register_matcher(m, callback); +} diff --git a/src/common/transformations/src/transformations/utils/utils.cpp b/src/common/transformations/src/transformations/utils/utils.cpp index 62b1765e7ba275..b7cde395a66eb5 100644 --- a/src/common/transformations/src/transformations/utils/utils.cpp +++ b/src/common/transformations/src/transformations/utils/utils.cpp @@ -31,6 +31,8 @@ bool get_single_value(const std::shared_ptr& const_node, float return util::normalize_single_value(const_node->get_vector(), value, check_value_range); case element::Type_t::f64: return util::normalize_single_value(const_node->get_vector(), value, check_value_range); + case element::Type_t::i4: + return util::normalize_single_value(const_node->cast_vector(), value, check_value_range); case element::Type_t::i8: return util::normalize_single_value(const_node->get_vector(), value, check_value_range); case element::Type_t::i16: @@ -39,6 +41,8 @@ bool get_single_value(const std::shared_ptr& const_node, float return util::normalize_single_value(const_node->get_vector(), value, check_value_range); case element::Type_t::i64: return util::normalize_single_value(const_node->get_vector(), value, check_value_range); + case element::Type_t::u4: + return util::normalize_single_value(const_node->cast_vector(), value, check_value_range); case element::Type_t::u8: return util::normalize_single_value(const_node->get_vector(), value, check_value_range); case element::Type_t::u16: diff --git a/src/common/transformations/tests/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp b/src/common/transformations/tests/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp new file mode 100644 index 00000000000000..8fc896065e9001 --- /dev/null +++ b/src/common/transformations/tests/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp @@ -0,0 +1,208 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp" + +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/core/model.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/subtract.hpp" +#include "openvino/pass/manager.hpp" + +using namespace testing; +using namespace ov; + +TEST_F(TransformationTestsF, ConvertU4WeightsFloatZeroPointToScalar) { + auto weights_precision = ov::element::u4; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{32, 1, 64}; + { + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {8.1f}); + auto subtract = std::make_shared(convert, zero_point); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply}, ParameterVector{}); + manager.register_pass(); + } + { + ov::Shape scalar_shape{}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(decompression_precision, scalar_shape, {8.1f}); + auto subtract = std::make_shared(convert, zero_point); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model_ref = std::make_shared(NodeVector{multiply}, ParameterVector{}); + } + comparator.enable(FunctionsComparator::ACCURACY); + comparator.enable(FunctionsComparator::CONST_VALUES); +} + +TEST_F(TransformationTestsF, ConvertU4WeightsU4ZeroPointToScalar) { + auto weights_precision = ov::element::u4; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{32, 1, 64}; + { + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, {8}); + auto zero_point_convert = std::make_shared(zero_point, decompression_precision); + auto subtract = std::make_shared(convert, zero_point_convert); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply}, ParameterVector{}); + manager.register_pass(); + } + { + ov::Shape scalar_shape{}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(weights_precision, scalar_shape, {8}); + auto zero_point_convert = std::make_shared(zero_point, decompression_precision); + auto subtract = std::make_shared(convert, zero_point_convert); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model_ref = std::make_shared(NodeVector{multiply}, ParameterVector{}); + } + comparator.enable(FunctionsComparator::ACCURACY); + comparator.enable(FunctionsComparator::CONST_VALUES); +} + +TEST_F(TransformationTestsF, ConvertU4WeightsFloatZeroPointToScalarWeightsWithBiggerRank) { + auto weights_precision = ov::element::u4; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{64}; + { + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {8}); + auto subtract = std::make_shared(convert, zero_point); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply}, ParameterVector{}); + manager.register_pass(); + } + { + ov::Shape scalar_shape{}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(decompression_precision, scalar_shape, {8}); + auto subtract = std::make_shared(convert, zero_point); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model_ref = std::make_shared(NodeVector{multiply}, ParameterVector{}); + } + comparator.enable(FunctionsComparator::ACCURACY); + comparator.enable(FunctionsComparator::CONST_VALUES); +} + +TEST_F(TransformationTestsF, FuseU4WeightsAndZeroPointNotScalarLikeZP) { + auto weights_precision = ov::element::u8; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{32, 1, 64}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + std::vector zero_point_values(ov::shape_size(decompression_shape), 8); + zero_point_values.back() = 6; + auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, zero_point_values); + auto zero_point_convert = std::make_shared(zero_point, decompression_precision); + auto subtract = std::make_shared(convert, zero_point_convert); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply}, ParameterVector{}); + manager.register_pass(); +} + +TEST_F(TransformationTestsF, FuseU4WeightsAndZeroPointNotU4Weights) { + auto weights_precision = ov::element::u8; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{32, 1, 64}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, {8}); + auto zero_point_convert = std::make_shared(zero_point, decompression_precision); + auto subtract = std::make_shared(convert, zero_point_convert); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply}, ParameterVector{}); + manager.register_pass(); +} + +TEST_F(TransformationTestsF, ConvertU4WeightsFloatZeroPointToScalarAdditionalZPConsumer) { + auto weights_precision = ov::element::u4; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{32, 1, 64}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {8}); + auto zero_point_consumer = std::make_shared(zero_point); + auto subtract = std::make_shared(convert, zero_point); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply, zero_point_consumer}, ParameterVector{}); + manager.register_pass(); +} + +TEST_F(TransformationTestsF, ConvertU4WeightsU4ZeroPointToScalarAdditionalZPConsumer) { + auto weights_precision = ov::element::u4; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{32, 1, 64}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, {8}); + auto zero_point_consumer = std::make_shared(zero_point); + auto zero_point_convert = std::make_shared(zero_point, decompression_precision); + auto subtract = std::make_shared(convert, zero_point_convert); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply, zero_point_consumer}, ParameterVector{}); + manager.register_pass(); +} + +TEST_F(TransformationTestsF, ConvertU4WeightsU4ZeroPointToScalarAdditionalZPConvertConsumer) { + auto weights_precision = ov::element::u4; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{32, 1, 64}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, {8}); + auto zero_point_convert = std::make_shared(zero_point, decompression_precision); + auto zero_point_convert_consumer = std::make_shared(zero_point_convert); + auto subtract = std::make_shared(convert, zero_point_convert); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply, zero_point_convert_consumer}, ParameterVector{}); + manager.register_pass(); +} + +TEST_F(TransformationTestsF, ConvertU4WeightsU4ZeroPointToScalarZPWithBiggerRank) { + auto weights_precision = ov::element::u4; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{1, 32, 1, 64}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, {8}); + auto zero_point_convert = std::make_shared(zero_point, decompression_precision); + auto zero_point_convert_consumer = std::make_shared(zero_point_convert); + auto subtract = std::make_shared(convert, zero_point_convert); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply, zero_point_convert_consumer}, ParameterVector{}); + manager.register_pass(); +} diff --git a/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp b/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp index 7c5311307d7d95..aa1f1d32a3da16 100644 --- a/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp +++ b/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp @@ -9,14 +9,21 @@ #include #include +#include #include #include #include #include "common_test_utils/ov_test_utils.hpp" #include "openvino/core/model.hpp" -#include "openvino/opsets/opset7.hpp" -#include "openvino/opsets/opset9.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/erf.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/power.hpp" +#include "openvino/op/tanh.hpp" #include "openvino/pass/constant_folding.hpp" #include "openvino/pass/manager.hpp" #include "transformations/convert_precision.hpp" @@ -28,17 +35,17 @@ using namespace ov; TEST_F(TransformationTestsF, GeluFusionPatternOne) { { - auto data = std::make_shared(element::f32, Shape{2, 2}); + auto data = std::make_shared(element::f32, Shape{2, 2}); - auto div_const = opset7::Constant::create(element::f32, Shape{1}, {M_SQRT2}); - auto add_const = opset7::Constant::create(element::f32, Shape{1}, {1.0}); - auto mul_const = opset7::Constant::create(element::f32, Shape{1}, {0.5}); + auto div_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {M_SQRT2}); + auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0}); + auto mul_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5}); - auto div = std::make_shared(data, div_const); - auto erf = std::make_shared(div); - auto add = std::make_shared(erf, add_const); - auto mul_first = std::make_shared(data, mul_const); - auto mul = std::make_shared(mul_first, add); + auto div = std::make_shared(data, div_const); + auto erf = std::make_shared(div); + auto add = std::make_shared(erf, add_const); + auto mul_first = std::make_shared(data, mul_const); + auto mul = std::make_shared(mul_first, add); model = std::make_shared(NodeVector{mul}, ParameterVector{data}); @@ -47,24 +54,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternOne) { { auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data); + auto gelu = std::make_shared(data); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionPatternOneF16) { { - auto data = std::make_shared(element::f16, Shape{2, 2}); + auto data = std::make_shared(element::f16, Shape{2, 2}); - auto div_const = opset7::Constant::create(element::f16, Shape{1}, {M_SQRT2}); - auto add_const = opset7::Constant::create(element::f16, Shape{1}, {1.0}); - auto mul_const = opset7::Constant::create(element::f16, Shape{1}, {0.5}); + auto div_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {M_SQRT2}); + auto add_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {1.0}); + auto mul_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {0.5}); - auto div = std::make_shared(data, div_const); - auto erf = std::make_shared(div); - auto add = std::make_shared(erf, add_const); - auto mul_first = std::make_shared(data, mul_const); - auto mul = std::make_shared(mul_first, add); + auto div = std::make_shared(data, div_const); + auto erf = std::make_shared(div); + auto add = std::make_shared(erf, add_const); + auto mul_first = std::make_shared(data, mul_const); + auto mul = std::make_shared(mul_first, add); model = std::make_shared(NodeVector{mul}, ParameterVector{data}); @@ -73,24 +80,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternOneF16) { { auto data = std::make_shared(element::f16, Shape{2, 2}); - auto gelu = std::make_shared(data); + auto gelu = std::make_shared(data); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionPatternTwo) { { - auto data = std::make_shared(element::f32, Shape{2, 2}); + auto data = std::make_shared(element::f32, Shape{2, 2}); - auto div_const = opset7::Constant::create(element::f32, Shape{1}, {M_SQRT2}); - auto add_const = opset7::Constant::create(element::f32, Shape{1}, {1.0}); - auto mul_const = opset7::Constant::create(element::f32, Shape{1}, {0.5}); + auto div_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {M_SQRT2}); + auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0}); + auto mul_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5}); - auto div = std::make_shared(data, div_const); - auto erf = std::make_shared(div); - auto add = std::make_shared(erf, add_const); - auto mul_first = std::make_shared(data, add); - auto mul = std::make_shared(mul_first, mul_const); + auto div = std::make_shared(data, div_const); + auto erf = std::make_shared(div); + auto add = std::make_shared(erf, add_const); + auto mul_first = std::make_shared(data, add); + auto mul = std::make_shared(mul_first, mul_const); model = std::make_shared(NodeVector{mul}, ParameterVector{data}); @@ -99,24 +106,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternTwo) { { auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data); + auto gelu = std::make_shared(data); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionPatternTwoF16) { { - auto data = std::make_shared(element::f16, Shape{2, 2}); + auto data = std::make_shared(element::f16, Shape{2, 2}); - auto div_const = opset7::Constant::create(element::f16, Shape{1}, {M_SQRT2}); - auto add_const = opset7::Constant::create(element::f16, Shape{1}, {1.0}); - auto mul_const = opset7::Constant::create(element::f16, Shape{1}, {0.5}); + auto div_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {M_SQRT2}); + auto add_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {1.0}); + auto mul_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {0.5}); - auto div = std::make_shared(data, div_const); - auto erf = std::make_shared(div); - auto add = std::make_shared(erf, add_const); - auto mul_first = std::make_shared(data, add); - auto mul = std::make_shared(mul_first, mul_const); + auto div = std::make_shared(data, div_const); + auto erf = std::make_shared(div); + auto add = std::make_shared(erf, add_const); + auto mul_first = std::make_shared(data, add); + auto mul = std::make_shared(mul_first, mul_const); model = std::make_shared(NodeVector{mul}, ParameterVector{data}); @@ -125,24 +132,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternTwoF16) { { auto data = std::make_shared(element::f16, Shape{2, 2}); - auto gelu = std::make_shared(data); + auto gelu = std::make_shared(data); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionPatternThree) { { - auto data = std::make_shared(element::f32, Shape{2, 2}); + auto data = std::make_shared(element::f32, Shape{2, 2}); - auto div_const = opset7::Constant::create(element::f32, Shape{1}, {M_SQRT2}); - auto add_const = opset7::Constant::create(element::f32, Shape{1}, {1.0}); - auto mul_const = opset7::Constant::create(element::f32, Shape{1}, {0.5}); + auto div_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {M_SQRT2}); + auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0}); + auto mul_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5}); - auto div = std::make_shared(data, div_const); - auto erf = std::make_shared(div); - auto add = std::make_shared(erf, add_const); - auto mul_first = std::make_shared(add, mul_const); - auto mul = std::make_shared(data, mul_first); + auto div = std::make_shared(data, div_const); + auto erf = std::make_shared(div); + auto add = std::make_shared(erf, add_const); + auto mul_first = std::make_shared(add, mul_const); + auto mul = std::make_shared(data, mul_first); model = std::make_shared(NodeVector{mul}, ParameterVector{data}); @@ -151,24 +158,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternThree) { { auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data); + auto gelu = std::make_shared(data); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionPatternThreeF16) { { - auto data = std::make_shared(element::f16, Shape{2, 2}); + auto data = std::make_shared(element::f16, Shape{2, 2}); - auto div_const = opset7::Constant::create(element::f16, Shape{1}, {M_SQRT2}); - auto add_const = opset7::Constant::create(element::f16, Shape{1}, {1.0}); - auto mul_const = opset7::Constant::create(element::f16, Shape{1}, {0.5}); + auto div_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {M_SQRT2}); + auto add_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {1.0}); + auto mul_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {0.5}); - auto div = std::make_shared(data, div_const); - auto erf = std::make_shared(div); - auto add = std::make_shared(erf, add_const); - auto mul_first = std::make_shared(add, mul_const); - auto mul = std::make_shared(data, mul_first); + auto div = std::make_shared(data, div_const); + auto erf = std::make_shared(div); + auto add = std::make_shared(erf, add_const); + auto mul_first = std::make_shared(add, mul_const); + auto mul = std::make_shared(data, mul_first); model = std::make_shared(NodeVector{mul}, ParameterVector{data}); @@ -177,24 +184,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternThreeF16) { { auto data = std::make_shared(element::f16, Shape{2, 2}); - auto gelu = std::make_shared(data); + auto gelu = std::make_shared(data); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionPatternFour) { { - auto data = std::make_shared(element::f32, Shape{2, 2}); + auto data = std::make_shared(element::f32, Shape{2, 2}); - auto mul1_const = opset9::Constant::create(element::f32, Shape{1}, {1.0f / M_SQRT2}); - auto add_const = opset9::Constant::create(element::f32, Shape{1}, {0.5f}); - auto mul2_const = opset9::Constant::create(element::f32, Shape{1}, {0.5f}); + auto mul1_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0f / M_SQRT2}); + auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5f}); + auto mul2_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5f}); - auto mul1 = std::make_shared(data, mul1_const); - auto erf = std::make_shared(mul1); - auto mul2 = std::make_shared(erf, mul2_const); - auto add = std::make_shared(mul2, add_const); - auto mul3 = std::make_shared(data, add); + auto mul1 = std::make_shared(data, mul1_const); + auto erf = std::make_shared(mul1); + auto mul2 = std::make_shared(erf, mul2_const); + auto add = std::make_shared(mul2, add_const); + auto mul3 = std::make_shared(data, add); model = std::make_shared(NodeVector{mul3}, ParameterVector{data}); @@ -203,24 +210,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternFour) { { auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data); + auto gelu = std::make_shared(data); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionPatternFourF16) { { - auto data = std::make_shared(element::f16, Shape{2, 2}); + auto data = std::make_shared(element::f16, Shape{2, 2}); - auto mul1_const = opset9::Constant::create(element::f16, Shape{1}, {1.0f / M_SQRT2}); - auto add_const = opset9::Constant::create(element::f16, Shape{1}, {0.5f}); - auto mul2_const = opset9::Constant::create(element::f16, Shape{1}, {0.5f}); + auto mul1_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {1.0f / M_SQRT2}); + auto add_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {0.5f}); + auto mul2_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {0.5f}); - auto mul1 = std::make_shared(data, mul1_const); - auto erf = std::make_shared(mul1); - auto mul2 = std::make_shared(erf, mul2_const); - auto add = std::make_shared(mul2, add_const); - auto mul3 = std::make_shared(data, add); + auto mul1 = std::make_shared(data, mul1_const); + auto erf = std::make_shared(mul1); + auto mul2 = std::make_shared(erf, mul2_const); + auto add = std::make_shared(mul2, add_const); + auto mul3 = std::make_shared(data, add); model = std::make_shared(NodeVector{mul3}, ParameterVector{data}); @@ -229,24 +236,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternFourF16) { { auto data = std::make_shared(element::f16, Shape{2, 2}); - auto gelu = std::make_shared(data); + auto gelu = std::make_shared(data); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionPatternIncorrectDivConstValue) { { - auto data = std::make_shared(element::f32, Shape{2, 2}); + auto data = std::make_shared(element::f32, Shape{2, 2}); - auto div_const = opset7::Constant::create(element::f32, Shape{1}, {1.4149}); - auto add_const = opset7::Constant::create(element::f32, Shape{1}, {1.0}); - auto mul_const = opset7::Constant::create(element::f32, Shape{1}, {0.5}); + auto div_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.4149}); + auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0}); + auto mul_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5}); - auto div = std::make_shared(data, div_const); - auto erf = std::make_shared(div); - auto add = std::make_shared(erf, add_const); - auto mul_first = std::make_shared(data, add); - auto mul = std::make_shared(mul_first, mul_const); + auto div = std::make_shared(data, div_const); + auto erf = std::make_shared(div); + auto add = std::make_shared(erf, add_const); + auto mul_first = std::make_shared(data, add); + auto mul = std::make_shared(mul_first, mul_const); model = std::make_shared(NodeVector{mul}, ParameterVector{data}); model_ref = std::make_shared(NodeVector{mul}, ParameterVector{data}); @@ -257,17 +264,17 @@ TEST_F(TransformationTestsF, GeluFusionPatternIncorrectDivConstValue) { TEST_F(TransformationTestsF, GeluFusionPatternTooShortDivConstValue) { { - auto data = std::make_shared(element::f32, Shape{2, 2}); + auto data = std::make_shared(element::f32, Shape{2, 2}); - auto div_const = opset7::Constant::create(element::f32, Shape{1}, {1.4142}); - auto add_const = opset7::Constant::create(element::f32, Shape{1}, {1.0}); - auto mul_const = opset7::Constant::create(element::f32, Shape{1}, {0.5}); + auto div_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.4142}); + auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0}); + auto mul_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5}); - auto div = std::make_shared(data, div_const); - auto erf = std::make_shared(div); - auto add = std::make_shared(erf, add_const); - auto mul_first = std::make_shared(data, add); - auto mul = std::make_shared(mul_first, mul_const); + auto div = std::make_shared(data, div_const); + auto erf = std::make_shared(div); + auto add = std::make_shared(erf, add_const); + auto mul_first = std::make_shared(data, add); + auto mul = std::make_shared(mul_first, mul_const); model = std::make_shared(NodeVector{mul}, ParameterVector{data}); model_ref = std::make_shared(NodeVector{mul}, ParameterVector{data}); @@ -278,60 +285,62 @@ TEST_F(TransformationTestsF, GeluFusionPatternTooShortDivConstValue) { TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_equal_const_values) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); } { - auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_params_no_conversion) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_param = std::make_shared(element::f32, Shape{1}); - auto pow = std::make_shared(input, pow_param); - auto mul_0_param = std::make_shared(element::f32, Shape{1}); - auto mul_0 = std::make_shared(pow, mul_0_param); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_param = std::make_shared(element::f32, Shape{1}); + auto pow = std::make_shared(input, pow_param); + auto mul_0_param = std::make_shared(element::f32, Shape{1}); + auto mul_0 = std::make_shared(pow, mul_0_param); + auto add_0 = std::make_shared(input, mul_0); - auto mul_1_param = std::make_shared(element::f32, Shape{1}); - auto mul_1 = std::make_shared(add_0, mul_1_param); + auto mul_1_param = std::make_shared(element::f32, Shape{1}); + auto mul_1 = std::make_shared(add_0, mul_1_param); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_param = std::make_shared(element::f32, Shape{1}); - auto add_1 = std::make_shared(tanh, add_1_param); + auto add_1_param = std::make_shared(element::f32, Shape{1}); + auto add_1 = std::make_shared(tanh, add_1_param); - auto mul_2_param = std::make_shared(element::f32, Shape{1}); - auto mul_2 = std::make_shared(add_1, mul_2_param); + auto mul_2_param = std::make_shared(element::f32, Shape{1}); + auto mul_2 = std::make_shared(add_1, mul_2_param); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared( NodeVector{mul_3}, @@ -342,63 +351,67 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_params_no_conversion) { TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_pow_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); + auto input = std::make_shared(element::f32, Shape{2, 2}); auto pow_constant = - std::make_shared(element::f32, Shape{1}, std::vector{3.0f + 1.0e-8f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + std::make_shared(element::f32, Shape{1}, std::vector{3.0f + 1.0e-8f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); } { - auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_pow_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{2.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{2.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); @@ -407,62 +420,66 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_pow_value) { TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_mul_0_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.04515f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.04515f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); } { - auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_mul_0_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.4715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{1.4715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); @@ -471,61 +488,64 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_mul_0_value) { TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_mul_1_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{0.7980868f}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, Shape{1}, std::vector{0.7980868f}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); } { - auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_mul_1_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(10.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(10.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); @@ -534,63 +554,67 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_mul_1_value) { TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_add_1_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); auto add_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{1.0f + 1.0e-8f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + std::make_shared(element::f32, Shape{1}, std::vector{1.0f + 1.0e-8f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); } { - auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_add_1_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{2.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{2.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); @@ -599,65 +623,110 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_add_1_value) { TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_mul_2_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); auto mul_2_constant = - std::make_shared(element::f32, Shape{1}, std::vector{0.5f + 1.0e-8f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + std::make_shared(element::f32, Shape{1}, std::vector{0.5f + 1.0e-8f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); } { - auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_mul_2_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{5.0f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{5.0f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); } } + +TEST_F(TransformationTestsF, FoldGeluOperation) { + { + auto param = std::make_shared(element::f32, Shape{1006, 2, 100, 3, 4096}); + auto const1 = ov::op::v0::Constant::create(element::f32, Shape{1, 1, 1}, std::vector{0.044715f}); + + auto mul1 = std::make_shared(param, const1); + auto mul2 = std::make_shared(mul1, param); + + auto const2 = ov::op::v0::Constant::create(element::f32, Shape{1, 1, 1}, std::vector{1.0}); + auto add1 = std::make_shared(const2, mul2); + + auto const3 = ov::op::v0::Constant::create(element::f32, + Shape{1, 1, 1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul3 = std::make_shared(param, const3); + + auto mul4 = std::make_shared(add1, mul3); + auto tan = std::make_shared(mul4); + + auto const4 = ov::op::v0::Constant::create(element::f32, Shape{1, 1, 1}, std::vector{1.0}); + auto add2 = std::make_shared(tan, const4); + + auto const5 = ov::op::v0::Constant::create(element::f32, Shape{1, 1, 1}, std::vector{0.5}); + auto mul5 = std::make_shared(param, const5); + + auto mul6 = std::make_shared(add2, mul5); + + auto result = std::make_shared(mul6); + model = std::make_shared(NodeVector{result}, ParameterVector{param}); + + manager.register_pass(); + } + + { + auto param = std::make_shared(element::f32, Shape{1006, 2, 100, 3, 4096}); + auto gelu = std::make_shared(param, ov::op::GeluApproximationMode::TANH); + auto result = std::make_shared(gelu); + model_ref = std::make_shared(NodeVector{result}, ParameterVector{param}); + } +} diff --git a/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp b/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp index 698973740e08e6..b0e327e4d4bad4 100644 --- a/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp +++ b/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp @@ -433,3 +433,74 @@ TEST_F(SharedTransformationTestsF, SharedShapeOfTestMixed) { model_ref = std::make_shared(NodeVector{concat}, ParameterVector{input}); } } + +namespace { +OutputVector createShapeNodesInMemory(const std::vector& node_order_in_memory, + std::shared_ptr& memory, + const std::string& node_name_prefix, + const std::shared_ptr& input, + element::Type output_type) { + OutputVector outputs; + memory.reset(::malloc(node_order_in_memory.size() * sizeof(v3::ShapeOf)), ::free); + for (size_t i = 0; i < node_order_in_memory.size(); ++i) { + v3::ShapeOf* node_addr = static_cast(memory.get()) + node_order_in_memory[i]; + auto node_ptr = + std::shared_ptr(new (node_addr) v3::ShapeOf(input, output_type), [](v3::ShapeOf* node) { + node->v3::ShapeOf::~ShapeOf(); + }); + std::stringstream ss; + ss << node_name_prefix << i; + node_ptr->set_friendly_name(ss.str()); + outputs.push_back(node_ptr->output(0)); + } + + return outputs; +} + +std::shared_ptr createModelWithShapes(const Shape& input_shape, + const std::vector& node_order_in_memory, + const std::string& node_name_prefix, + std::shared_ptr& buffer) { + auto input = std::make_shared(element::f32, input_shape); + auto shape_nodes = createShapeNodesInMemory(node_order_in_memory, buffer, node_name_prefix, input, element::i64); + + NodeVector inputs_of_concat; + for (const auto& shape_node : shape_nodes) { + auto node = std::make_shared(shape_node, element::i64); + inputs_of_concat.push_back(node); + } + + auto concat = std::make_shared(inputs_of_concat, 0); + return std::make_shared(NodeVector{concat}, ParameterVector{input}); +} +} // namespace + +/** + * @brief Check that node address is not influenced on the transformation result + */ +TEST(TransformationTests, SharedShapeOfTestRandomOrder) { + Shape input_shape{120, 4}; + std::shared_ptr buffer; + // nodes are placed into pre-allocated memory in order that is specified in next variable + std::vector> node_orders_in_memory = {{0, 1}, {1, 0}}; + + std::vector> models; + for (const auto& node_order_in_memory : node_orders_in_memory) { + auto model = createModelWithShapes(input_shape, node_order_in_memory, "Shape_", buffer); + + ov::pass::Manager manager; + manager.register_pass(); + manager.run_passes(model); + + const auto model_ops = model->get_ops(); + const auto op_it = std::find_if(model_ops.begin(), model_ops.end(), [](const std::shared_ptr& node) { + return node->get_friendly_name() == "Shape_0"; + }); + ASSERT_TRUE(op_it != model_ops.end()) << "node Shape_0 is not found in model"; + // we need to clone while memory will be reused on the next iteration for the new model + models.push_back(model->clone()); + } + + FunctionsComparator comparator = FunctionsComparator::with_default(); + comparator.compare(models[0], models[1]); +} diff --git a/src/common/transformations/tests/utils/compress_quantize_weights.cpp b/src/common/transformations/tests/utils/compress_quantize_weights.cpp index cc31017368863f..15d07188f805f2 100644 --- a/src/common/transformations/tests/utils/compress_quantize_weights.cpp +++ b/src/common/transformations/tests/utils/compress_quantize_weights.cpp @@ -232,6 +232,30 @@ TEST_F(TransformationTestsF, CompressQuantizeWeightsWithZeroPointEliminated) { comparator.enable(FunctionsComparator::CmpValues::ACCURACY); } +TEST_F(TransformationTestsF, CompressQuantizeWeightsWithZeroPointEliminatedZeroScale) { + { + auto data = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {-0.144816, 0.0858578, 0.110928}); + auto input_low = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {-0.402659, -0.383148, -0.34054}); + auto input_high = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {0.399513, 0.380155, 0.33788}); + auto output_low = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {-0.402659, 0.0, -0.34054}); + auto output_high = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {0.399513, 0.0, 0.33788}); + auto fq = std::make_shared(data, input_low, input_high, output_low, output_high, 256); + model = std::make_shared(NodeVector{fq}, ParameterVector{}); + + manager.register_pass(); + } + + { + auto data = opset8::Constant::create(element::i8, Shape{3, 1, 1, 1}, {-46, 29, 42}); + auto convert = std::make_shared(data, element::f32); + auto scale = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {0.00314577, 0.0, 0.00266047}); + auto mul = std::make_shared(convert, scale); + model_ref = std::make_shared(NodeVector{mul}, ParameterVector{}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ACCURACY); +} + TEST_F(TransformationTestsF, CompressQuantizeWeightsWithZeroPointEliminatedFP16) { { auto data = opset8::Constant::create(element::f16, Shape{3, 1, 1, 1}, {0.2, 1.2, 1.2}); diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index 6c0da965f9bfad..1dac080461d16b 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -1188,7 +1188,7 @@ void constant_convert_test(element::Type type_from, } ASSERT_TRUE(actual.size() >= expected.size()); for (size_t i = 0; i < expected.size(); i++) { - ASSERT_EQ(expected[i], actual[i]); + EXPECT_EQ(expected[i], actual[i]) << "Elements with index " << i << " are not equal."; } } @@ -1378,7 +1378,7 @@ TEST(TransformationTests, ConvertPrecision_ConstantConversion_U1ToU4) { constant_convert_test(element::u1, element::u4, std::vector{171}, - {1, 0, 1, 0, 1, 0, 1, 1}); + {0, 1, 0, 1, 0, 1, 1, 1}); } TEST(TransformationTests, ConvertPrecision_keep_precission_sensitive_fp32_with_exp) { diff --git a/src/core/include/openvino/core/type/nf4.hpp b/src/core/dev_api/openvino/core/type/nf4.hpp similarity index 100% rename from src/core/include/openvino/core/type/nf4.hpp rename to src/core/dev_api/openvino/core/type/nf4.hpp diff --git a/src/core/dev_api/openvino/runtime/aligned_buffer.hpp b/src/core/dev_api/openvino/runtime/aligned_buffer.hpp new file mode 100644 index 00000000000000..7611744f7c4e58 --- /dev/null +++ b/src/core/dev_api/openvino/runtime/aligned_buffer.hpp @@ -0,0 +1,75 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "openvino/core/attribute_adapter.hpp" +#include "openvino/core/core_visibility.hpp" + +namespace ov { +/// \brief Allocates a block of memory on the specified alignment. The actual size of the +/// allocated memory is larger than the requested size by the alignment, so allocating 1 +/// byte +/// on 64 byte alignment will allocate 65 bytes. +class OPENVINO_API AlignedBuffer { +public: + // Allocator objects and the allocation interfaces are owned by the + // creators of AlignedBuffers. They need to ensure that the lifetime of + // allocator exceeds the lifetime of this AlignedBuffer. + AlignedBuffer(size_t byte_size, size_t alignment = 64); + + AlignedBuffer(); + virtual ~AlignedBuffer(); + + AlignedBuffer(AlignedBuffer&& other); + AlignedBuffer& operator=(AlignedBuffer&& other); + + size_t size() const { + return m_byte_size; + } + void* get_ptr(size_t offset) const { + return m_aligned_buffer + offset; + } + void* get_ptr() { + return m_aligned_buffer; + } + const void* get_ptr() const { + return m_aligned_buffer; + } + template + T* get_ptr() { + return reinterpret_cast(m_aligned_buffer); + } + template + const T* get_ptr() const { + return reinterpret_cast(m_aligned_buffer); + } + + template + explicit operator T*() { + return get_ptr(); + } + +private: + AlignedBuffer(const AlignedBuffer&) = delete; + AlignedBuffer& operator=(const AlignedBuffer&) = delete; + +protected: + char* m_allocated_buffer; + char* m_aligned_buffer; + size_t m_byte_size; +}; + +template <> +class OPENVINO_API AttributeAdapter> + : public DirectValueAccessor> { +public: + AttributeAdapter(std::shared_ptr& value); + + OPENVINO_RTTI("AttributeAdapter"); +}; + +} // namespace ov diff --git a/src/core/dev_api/openvino/runtime/shared_buffer.hpp b/src/core/dev_api/openvino/runtime/shared_buffer.hpp new file mode 100644 index 00000000000000..1b51bfa07b7ee3 --- /dev/null +++ b/src/core/dev_api/openvino/runtime/shared_buffer.hpp @@ -0,0 +1,31 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/runtime/aligned_buffer.hpp" + +namespace ov { + +/// \brief SharedBuffer class to store pointer to pre-acclocated buffer. +template +class SharedBuffer : public ov::AlignedBuffer { +public: + SharedBuffer(char* data, size_t size, const T& shared_object) : _shared_object(shared_object) { + m_allocated_buffer = data; + m_aligned_buffer = data; + m_byte_size = size; + } + + virtual ~SharedBuffer() { + m_aligned_buffer = nullptr; + m_allocated_buffer = nullptr; + m_byte_size = 0; + } + +private: + T _shared_object; +}; + +} // namespace ov diff --git a/src/core/dev_api/validation_util.hpp b/src/core/dev_api/validation_util.hpp index e93fefd1411eb9..2495fd1029959a 100644 --- a/src/core/dev_api/validation_util.hpp +++ b/src/core/dev_api/validation_util.hpp @@ -34,7 +34,7 @@ OPENVINO_API bool are_unique(const std::vector& data); /// /// \param value Value to be clipped. /// \param min Minimum value bound. -/// \param max Maximum value boiund +/// \param max Maximum value bound. /// /// \return Value if between min, max otherwise min or max. OPENVINO_API int64_t clip(const int64_t& value, const int64_t& min, const int64_t& max); @@ -43,18 +43,21 @@ OPENVINO_API int64_t clip(const int64_t& value, const int64_t& min, const int64_ /// /// \param subgraph sink /// -/// \return Constant node or nullptr if unable to constantfold the subgraph +/// \return Constant node or nullptr if unable to constant fold the subgraph OPENVINO_API std::shared_ptr constantfold_subgraph(const Output& subgraph_sink); -/** - * @brief Runs an estimation of source tensor. If it succeeded to calculate both bounds and - * they are the same returns Constant operation from the resulting bound, otherwise nullptr. - * - * @param source Node output used to get its tensor data as constant. - * @return Shared pointer to constant data or nullptr. - */ +/// \brief Runs an estimation of source tensor. If it succeeded to calculate both bounds and +/// they are the same returns Constant operation from the resulting bound, otherwise nullptr. +/// +/// \param source Node output used to get its tensor data as constant. +/// \return Shared pointer to constant data or nullptr. OPENVINO_API std::shared_ptr get_constant_from_source(const Output& source); +/// \brief Make scalar tensor which stores maximum value of ov::element::Type. +/// \param et Element type to get its maximum. +/// \return Tensor with maximum value. +Tensor make_tensor_of_max_value(const element::Type_t et); + /// \brief Apply auto padding to padding_above and padding_below inputs /// if all needed informations are known. /// diff --git a/src/core/include/ngraph/op/util/op_annotations.hpp b/src/core/include/ngraph/op/util/op_annotations.hpp index dad2aa3b43431a..dec2879f9c837f 100644 --- a/src/core/include/ngraph/op/util/op_annotations.hpp +++ b/src/core/include/ngraph/op/util/op_annotations.hpp @@ -30,7 +30,6 @@ struct NGRAPH_API_DEPRECATED oi_pair { }; /// \brief Base class for annotations added to graph ops - class NGRAPH_API_DEPRECATED NGRAPH_API OpAnnotations { NGRAPH_SUPPRESS_DEPRECATED_START public: diff --git a/src/core/include/ngraph/op/util/slice_plan.hpp b/src/core/include/ngraph/op/util/slice_plan.hpp index a0f99cccaed30c..e47e4ecd80e4f0 100644 --- a/src/core/include/ngraph/op/util/slice_plan.hpp +++ b/src/core/include/ngraph/op/util/slice_plan.hpp @@ -38,6 +38,7 @@ namespace ngraph { // // A SlicePlan is used to collect parameters for these ops. // +// This class is moved to dev API struct NGRAPH_API_DEPRECATED NGRAPH_API SlicePlan { // Parameters for the Slice std::vector begins; diff --git a/src/core/include/ngraph/opsets/opset.hpp b/src/core/include/ngraph/opsets/opset.hpp index 26c21e237b16c3..3f65437c6d3801 100644 --- a/src/core/include/ngraph/opsets/opset.hpp +++ b/src/core/include/ngraph/opsets/opset.hpp @@ -31,9 +31,7 @@ namespace ngraph { class NGRAPH_API OpSet : public ov::OpSet { public: explicit OpSet(const ov::OpSet& opset); - NGRAPH_SUPPRESS_DEPRECATED_START OpSet(const ngraph::OpSet& opset); - NGRAPH_SUPPRESS_DEPRECATED_END OpSet() = default; /// \brief Insert an op into the opset with a particular name and factory void insert(const std::string& name, const NodeTypeInfo& type_info, FactoryRegistry::Factory factory) { @@ -56,19 +54,20 @@ class NGRAPH_API OpSet : public ov::OpSet { } }; -const NGRAPH_API OpSet& get_opset1(); -const NGRAPH_API OpSet& get_opset2(); -const NGRAPH_API OpSet& get_opset3(); -const NGRAPH_API OpSet& get_opset4(); -const NGRAPH_API OpSet& get_opset5(); -const NGRAPH_API OpSet& get_opset6(); -const NGRAPH_API OpSet& get_opset7(); -const NGRAPH_API OpSet& get_opset8(); -const NGRAPH_API OpSet& get_opset9(); -const NGRAPH_API OpSet& get_opset10(); -const NGRAPH_API OpSet& get_opset11(); -const NGRAPH_API OpSet& get_opset12(); -const NGRAPH_API OpSet& get_opset13(); -const NGRAPH_API std::map>& get_available_opsets(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset1(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset2(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset3(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset4(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset5(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset6(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset7(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset8(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset9(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset10(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset11(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset12(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset13(); +NGRAPH_API_DEPRECATED const NGRAPH_API std::map>& +get_available_opsets(); } // namespace ngraph NGRAPH_SUPPRESS_DEPRECATED_END diff --git a/src/core/include/openvino/core/type/element_type.hpp b/src/core/include/openvino/core/type/element_type.hpp index 1534e9e0cc8fca..78e200d5035f79 100644 --- a/src/core/include/openvino/core/type/element_type.hpp +++ b/src/core/include/openvino/core/type/element_type.hpp @@ -20,7 +20,6 @@ #include "openvino/core/rtti.hpp" #include "openvino/core/type/bfloat16.hpp" #include "openvino/core/type/float16.hpp" -#include "openvino/core/type/nf4.hpp" /** * @defgroup ov_element_cpp_api Element types diff --git a/src/core/include/openvino/op/constant.hpp b/src/core/include/openvino/op/constant.hpp index 14ee7b3313490e..6299dde459061c 100644 --- a/src/core/include/openvino/op/constant.hpp +++ b/src/core/include/openvino/op/constant.hpp @@ -12,7 +12,6 @@ # define WAS_OV_LIBRARY_DEFINED_CONSTANT #endif -#include "ngraph/runtime/aligned_buffer.hpp" #include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/shared_buffer.hpp" @@ -21,11 +20,14 @@ # undef WAS_OV_LIBRARY_DEFINED_CONSTANT #endif #include "openvino/core/coordinate_diff.hpp" -#include "openvino/core/node.hpp" #include "openvino/core/type/element_type.hpp" #include "openvino/core/type/element_type_traits.hpp" +#include "openvino/op/op.hpp" namespace ov { + +class AlignedBuffer; + namespace op { namespace v0 { /// \brief Class for constants. @@ -177,13 +179,20 @@ class OPENVINO_API Constant : public Op { /// \param shape The shape of the tensor constant. /// \param data A pointer to pre-allocated shared data. template + OPENVINO_DEPRECATED("This constructor is deprecated and will be removed in 2024.0 release") Constant(const element::Type& type, const Shape& shape, std::shared_ptr> data) : m_element_type(type), m_shape(shape) { - m_data = data; + m_data = legacy_to_ov_aligned_buffer(data); constructor_validate_and_infer_types(); } OPENVINO_SUPPRESS_DEPRECATED_END + Constant(const element::Type& type, const Shape& shape, const std::shared_ptr& data) + : m_element_type(type), + m_shape(shape) { + m_data = data; + constructor_validate_and_infer_types(); + } Constant(const Constant& other); Constant(const Constant& other, const Shape& new_shape); @@ -241,11 +250,7 @@ class OPENVINO_API Constant : public Op { AxisSet get_axis_set_val() const; /// \brief Return data size in bytes - size_t get_byte_size() const { - OPENVINO_SUPPRESS_DEPRECATED_START - return m_data->size(); - OPENVINO_SUPPRESS_DEPRECATED_END - } + size_t get_byte_size() const; /// \brief Wrapper around constructing a shared_ptr of a Constant /// @@ -370,11 +375,8 @@ class OPENVINO_API Constant : public Op { return rc; } - const void* get_data_ptr() const { - OPENVINO_SUPPRESS_DEPRECATED_START - return (m_data ? m_data->get_ptr() : nullptr); - OPENVINO_SUPPRESS_DEPRECATED_END - } + const void* get_data_ptr() const; + template const T* get_data_ptr() const { OPENVINO_ASSERT(sizeof(T) <= m_element_type.size() || shape_size(m_shape) <= 0, "Buffer over-read"); @@ -406,6 +408,11 @@ class OPENVINO_API Constant : public Op { private: Constant(bool memset_allocation, const element::Type& type, const Shape& shape); + OPENVINO_SUPPRESS_DEPRECATED_START + std::shared_ptr legacy_to_ov_aligned_buffer( + const std::shared_ptr& buffer); + OPENVINO_SUPPRESS_DEPRECATED_END + template , typename std::enable_if, typename std::enable_if::type = true> StorageDataType get_element_value(size_t index) const { - return (get_data_ptr()[index / 2] >> (index % 2 ? 0 : 4)) & 0x0F; + return (get_data_ptr()[index / 2] >> (index % 2 ? 4 : 0)) & 0x0F; } template , typename std::enable_if::type = true> StorageDataType get_element_value(size_t index) const { - const uint8_t i4data = (get_data_ptr()[index / 2] >> (index % 2 ? 0 : 4)) & 0x0F; + const uint8_t i4data = (get_data_ptr()[index / 2] >> (index % 2 ? 4 : 0)) & 0x0F; const bool is_negative_number = (i4data >> 3) & 0x01; const int8_t data = is_negative_number ? i4data | 0xF0 : i4data; return data; @@ -485,9 +492,19 @@ class OPENVINO_API Constant : public Op { if (!std::is_same::value) { OPENVINO_ASSERT( !std::numeric_limits::is_signed || std::numeric_limits::lowest() <= c, - "Cannot cast vector from constant. Some values are outside the range."); + "Cannot cast vector from ", + Type, + " constant to ", + element::from(), + ". Some values are outside the range. Example: ", + c); OPENVINO_ASSERT(std::numeric_limits::max() >= c, - "Cannot cast vector from constant. Some values are outside the range."); + "Cannot cast vector from ", + Type, + " constant to ", + element::from(), + ". Some values are outside the range. Example: ", + c); } #if defined(__clang__) # pragma clang diagnostic pop @@ -530,7 +547,7 @@ class OPENVINO_API Constant : public Op { const auto round_element_no = element_number % 2 ? element_number + 1 : element_number; output.reserve(round_element_no); // adds 1 more elements here? std::for_each(source_begin, source_end, [&](IN_T c) { - for (const auto i : {4, 0}) { + for (const auto i : {0, 4}) { const uint8_t data = (c >> i) & 0x0F; output.push_back(data); } @@ -548,7 +565,7 @@ class OPENVINO_API Constant : public Op { const auto round_element_no = element_number % 2 ? element_number + 1 : element_number; output.reserve(round_element_no); // adds 1 more elements here? std::for_each(source_begin, source_end, [&](IN_T c) { - for (const auto i : {4, 0}) { + for (const auto i : {0, 4}) { const uint8_t i4data = (c >> i) & 0x0F; const bool is_negative_number = (i4data >> 3) & 0x01; const int8_t data = is_negative_number ? i4data | 0xF0 : i4data; @@ -627,11 +644,7 @@ class OPENVINO_API Constant : public Op { void allocate_buffer(bool memset_allocation); - void* get_data_ptr_nc() { - OPENVINO_SUPPRESS_DEPRECATED_START - return (m_data ? m_data->get_ptr() : nullptr); - OPENVINO_SUPPRESS_DEPRECATED_END - } + void* get_data_ptr_nc(); template typename element_type_traits::value_type* get_data_ptr_nc() { @@ -663,27 +676,9 @@ class OPENVINO_API Constant : public Op { template , - typename std::enable_if::type = true> - void write_buffer(const std::vector& source) { - auto p = get_data_ptr_nc(); - size_t i = 0; - for (; i < source.size() / 2; i++) { - const auto v1 = value_in_range(source[i * 2]) & 0x0F; - const auto v2 = value_in_range(source[i * 2 + 1]) & 0x0F; - const auto v = (v1 << 4) | v2; - p[i] = static_cast(v); - } - if (source.size() % 2) { - const auto v1 = value_in_range(source[i * 2]) & 0x0F; - const auto v = v1 << 4; - p[i] = static_cast(v); - } - } - - template , - typename std::enable_if::value, bool>::type = true> + typename std::enable_if::value), + bool>::type = true> void write_buffer(const std::vector& source) { auto p = get_data_ptr_nc(); size_t i = 0; @@ -710,15 +705,15 @@ class OPENVINO_API Constant : public Op { auto p = get_data_ptr_nc(); size_t i = 0; for (; i < source.size() / 2; i++) { - const auto idx1 = ConvertNF4::quantize(static_cast(source[i * 2])); - const auto idx2 = ConvertNF4::quantize(static_cast(source[i * 2 + 1])); + const auto idx1 = quantize_nf4(static_cast(source[i * 2])); + const auto idx2 = quantize_nf4(static_cast(source[i * 2 + 1])); const auto v1 = value_in_range(idx1) & 0x0F; const auto v2 = value_in_range(idx2) & 0x0F; const auto v = (v2 << 4) | v1; p[i] = static_cast(v); } if (source.size() % 2) { - const auto idx1 = ConvertNF4::quantize(static_cast(source[i * 2])); + const auto idx1 = quantize_nf4(static_cast(source[i * 2])); const auto v = value_in_range(idx1) & 0x0F; p[i] = static_cast(v); } @@ -858,12 +853,11 @@ class OPENVINO_API Constant : public Op { } return shape_size(m_shape) * m_element_type.size(); } + static uint8_t quantize_nf4(float x); element::Type m_element_type; Shape m_shape{}; - OPENVINO_SUPPRESS_DEPRECATED_START - std::shared_ptr m_data; - OPENVINO_SUPPRESS_DEPRECATED_END + std::shared_ptr m_data; mutable std::atomic_bool m_all_elements_bitwise_identical{false}; mutable std::atomic_bool m_all_elements_bitwise_identical_checked{false}; bool m_alloc_buffer_on_visit_attributes = true; diff --git a/src/core/include/openvino/op/minimum.hpp b/src/core/include/openvino/op/minimum.hpp index c8cfc5c9d7c999..30819b2a72f849 100644 --- a/src/core/include/openvino/op/minimum.hpp +++ b/src/core/include/openvino/op/minimum.hpp @@ -29,9 +29,7 @@ class OPENVINO_API Minimum : public util::BinaryElementwiseArithmetic { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v1 diff --git a/src/core/include/openvino/op/mod.hpp b/src/core/include/openvino/op/mod.hpp index 5e58a2ec03d733..defb1c65163898 100644 --- a/src/core/include/openvino/op/mod.hpp +++ b/src/core/include/openvino/op/mod.hpp @@ -29,6 +29,8 @@ class OPENVINO_API Mod : public util::BinaryElementwiseArithmetic { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override; + bool evaluate_lower(TensorVector& outputs) const override; + bool evaluate_upper(TensorVector& outputs) const override; bool has_evaluate() const override; }; } // namespace v1 diff --git a/src/core/include/openvino/op/util/framework_node.hpp b/src/core/include/openvino/op/util/framework_node.hpp index 57a6be7a3a34fb..f8a63b38b823e5 100644 --- a/src/core/include/openvino/op/util/framework_node.hpp +++ b/src/core/include/openvino/op/util/framework_node.hpp @@ -81,7 +81,7 @@ class OPENVINO_API FrameworkNode : public MultiSubGraphOp { explicit FrameworkNode(const OutputVector& inputs, size_t output_size = 1, size_t num_subgraphs = 0); - void validate_and_infer_types() override; + virtual void validate_and_infer_types() override; bool visit_attributes(AttributeVisitor& visitor) override; diff --git a/src/core/reference/include/openvino/reference/convert.hpp b/src/core/reference/include/openvino/reference/convert.hpp index e943e548a8fa4e..3924ce690553b2 100644 --- a/src/core/reference/include/openvino/reference/convert.hpp +++ b/src/core/reference/include/openvino/reference/convert.hpp @@ -8,13 +8,14 @@ #include "openvino/core/type/element_type.hpp" #include "openvino/core/type/float16.hpp" +#include "openvino/core/type/nf4.hpp" namespace ov { namespace reference { namespace detail { inline void set_u1(uint8_t* buf, size_t idx, uint8_t val) { const size_t byte_idx = idx / 8; - const uint8_t bit_idx = 7 - (idx % 8); + const uint8_t bit_idx = 7 - (idx % 8); // Reversed order of bits if (val) { buf[byte_idx] |= (1 << bit_idx); } else { @@ -24,33 +25,33 @@ inline void set_u1(uint8_t* buf, size_t idx, uint8_t val) { inline uint8_t get_u1(const uint8_t* buf, size_t idx) { const size_t byte_idx = idx / 8; - const uint8_t bit_idx = 7 - (idx % 8); + const uint8_t bit_idx = 7 - (idx % 8); // Reversed order of bits return (buf[byte_idx] & (1 << bit_idx)) ? 1 : 0; } inline void set_u4(uint8_t* buf, size_t idx, uint8_t val) { const size_t byte_idx = idx / 2; - const uint8_t bit_shift = 4 * (++idx % 2); + const uint8_t bit_shift = 4 * (idx % 2); buf[byte_idx] &= ~(0xF << bit_shift); // half byte zeroed buf[byte_idx] |= ((val & 0xF) << bit_shift); // set 1's } inline uint8_t get_u4(const uint8_t* buf, size_t idx) { const size_t byte_idx = idx / 2; - const uint8_t bit_shift = 4 * (++idx % 2); + const uint8_t bit_shift = 4 * (idx % 2); return (buf[byte_idx] >> bit_shift) & 0xF; } inline void set_i4(uint8_t* buf, size_t idx, int8_t val) { const size_t byte_idx = idx / 2; - const uint8_t bit_shift = 4 * (++idx % 2); + const uint8_t bit_shift = 4 * (idx % 2); buf[byte_idx] &= ~(0xF << bit_shift); // half byte zeroed buf[byte_idx] |= ((val & 0xF) << bit_shift); // set 1's } inline int8_t get_i4(const uint8_t* buf, size_t idx) { const size_t byte_idx = idx / 2; - const uint8_t bit_shift = 4 * (++idx % 2); + const uint8_t bit_shift = 4 * (idx % 2); uint8_t val = (buf[byte_idx] >> bit_shift) & 0xF; if (val & 0x08) { // negative number val |= 0xF0; @@ -87,7 +88,7 @@ void lp_convert(const TI* arg, TO* out, size_t count, element::Type_t src_type, } else if (dst_type == element::i4) { detail::set_i4(output, i, detail::get_value(input, i, src_type)); } else if (src_type == element::nf4) { - ConvertNF4::unpack(out, input, i); + ov::ConvertNF4::unpack(out, input, i); } else { out[i] = detail::get_value(input, i, src_type); } diff --git a/src/core/reference/include/openvino/reference/minimum.hpp b/src/core/reference/include/openvino/reference/minimum.hpp index 4bfe8ff0c89c83..8d70ae0fc99ee0 100644 --- a/src/core/reference/include/openvino/reference/minimum.hpp +++ b/src/core/reference/include/openvino/reference/minimum.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include #include "openvino/core/shape.hpp" #include "openvino/op/util/attr_types.hpp" @@ -12,11 +12,16 @@ namespace ov { namespace reference { +namespace func { +template +T min(const T a, const T b) { + return std::min(a, b); +} +} // namespace func + template void minimum(const T* arg0, const T* arg1, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = arg0[i] < arg1[i] ? arg0[i] : arg1[i]; - } + std::transform(arg0, std::next(arg0, count), arg1, out, func::min); } template @@ -26,9 +31,7 @@ void minimum(const T* arg0, const Shape& arg0_shape, const Shape& arg1_shape, const op::AutoBroadcastSpec& broadcast_spec) { - autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T { - return x < y ? x : y; - }); + autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, func::min); } } // namespace reference } // namespace ov diff --git a/src/core/reference/include/openvino/reference/mod.hpp b/src/core/reference/include/openvino/reference/mod.hpp index 81ae69e32ebfb2..671ee012393641 100644 --- a/src/core/reference/include/openvino/reference/mod.hpp +++ b/src/core/reference/include/openvino/reference/mod.hpp @@ -6,6 +6,7 @@ #include #include +#include #include "openvino/reference/autobroadcast_binop.hpp" #include "openvino/reference/utils/type_util.hpp" @@ -22,6 +23,72 @@ template ()>::type* = T mod(const T x, const T y) { return x - (std::trunc(x / y) * y); } + +/** + * @brief Estimates division remainder `[v1, v2] % m = [r0, r1]` as interval. + * + * Assumes that ` 0 <= v1 <= v2 and m != 0`, in other cases result is undefined behaviour. + * The result interval estimate minimum and maximum but is not true that value can be any value between min and max. + * e.g. + * - [4,6] % 5 = [0, 4], but in fact accurate result is set of [0,1,4] + + * @param v1 Minimum of value interval. + * @param v2 Maximum of value interval. + * @param m Modulo divisor. + * @return Remainder of division as interval range. + */ +template ::value>::type* = nullptr> +std::pair mod_interval_value(const T v1, const T v2, const T m) { + const auto v_diff = v2 - v1; + auto r = std::make_pair(func::mod(v1, m), func::mod(v2, m)); + + if ((r.second < r.first) || ((v_diff != T{0}) && (v_diff >= m))) { + r.first = T{0}; + r.second = m - T{1}; + } + return r; +} + +/** + * @brief Estimates division reminder of `[v1, v2] & [m1, m2] = [r0, r1]` as interval. + * + * * Assumes that ` 0 <= v1 <= v2 and 0 < m1 <= m2`, in other cases result is undefined behaviour. + * + * @param v1 Minimum of value interval. + * @param v2 Maximum of value interval. + * @param m1 Minimum of modulo divisor. + * @param m2 Maximum of modulo divisor. + * @return Remainder of division as interval range. + */ +template ::value>::type* = nullptr> +std::pair mod_interval(const T v1, const T v2, const T m1, const T m2) { + auto r = mod_interval_value(v1, v2, m1); + if (v2 != 0) { + if (m1 != m2) { + const auto v_diff = v2 - v1; + const auto m_diff = m2 - m1; + + auto r2 = mod_interval_value(v1, v2, m2); + r.first = std::min(r.first, r2.first); + r.second = std::max(r.second, r2.second); + + if (v_diff == T{0} && m_diff != T{1}) { + const T v2_half = v2 / T{2}; + if ((m1 < v2_half) || ((m1 < v2) && (v2 < m2))) { + r.first = T{0}; + + if ((v2_half < m2) && (m2 < v2)) { + const T v2_half_next = v2_half + T{1}; + r.second = func::mod(v2, v2_half_next); + } else { + r.second = m2 - T{1}; + } + } + } + } + } + return r; +} } // namespace func /** @@ -42,7 +109,7 @@ void mod(InputIt arg0, const Shape& arg_shape1, const op::AutoBroadcastSpec& broadcast_spec) { using T = typename std::iterator_traits::value_type; - autobroadcast_binop(arg0, arg1, out, arg_shape0, arg_shape1, broadcast_spec, &func::mod); + autobroadcast_binop(arg0, arg1, out, arg_shape0, arg_shape1, broadcast_spec, func::mod); } } // namespace reference } // namespace ov diff --git a/src/core/reference/src/op/nms_rotated.cpp b/src/core/reference/src/op/nms_rotated.cpp index fd604acd5cc6c7..3b4f21d4431c31 100644 --- a/src/core/reference/src/op/nms_rotated.cpp +++ b/src/core/reference/src/op/nms_rotated.cpp @@ -127,7 +127,7 @@ void nms_rotated(const float* boxes_data, for (int64_t batch = 0; batch < num_batches; batch++) { const float* boxesPtr = boxes_data + batch * num_boxes * 5; - RotatedBox* r = reinterpret_cast(const_cast(boxesPtr)); + const RotatedBox* r = reinterpret_cast(boxesPtr); for (int64_t class_idx = 0; class_idx < num_classes; class_idx++) { const float* scoresPtr = scores_data + batch * (num_classes * num_boxes) + class_idx * num_boxes; @@ -137,11 +137,11 @@ void nms_rotated(const float* boxes_data, for (int64_t box_idx = 0; box_idx < num_boxes; box_idx++) { if (scoresPtr[box_idx] > score_threshold) { + candidate_boxes.emplace_back(r[box_idx], box_idx, scoresPtr[box_idx], 0, batch, class_idx); // Convert counterclockwise to clockwise if (!clockwise) { - r[box_idx].a *= -1; + candidate_boxes.back().box.a *= -1.f; } - candidate_boxes.emplace_back(r[box_idx], box_idx, scoresPtr[box_idx], 0, batch, class_idx); } } diff --git a/src/core/reference/src/op/strided_slice.cpp b/src/core/reference/src/op/strided_slice.cpp index 6e83305e653059..cad9dee20d02be 100644 --- a/src/core/reference/src/op/strided_slice.cpp +++ b/src/core/reference/src/op/strided_slice.cpp @@ -8,10 +8,10 @@ #include -#include "ngraph/runtime/aligned_buffer.hpp" #include "openvino/reference/reshape.hpp" #include "openvino/reference/reverse.hpp" #include "openvino/reference/slice.hpp" +#include "openvino/runtime/aligned_buffer.hpp" namespace ov { namespace reference { @@ -30,8 +30,7 @@ void strided_slice(const char* arg, return; } - OPENVINO_SUPPRESS_DEPRECATED_START - ngraph::runtime::AlignedBuffer slice_out_buffer(shape_size(sp.reshape_in_shape) * elem_type); + ov::AlignedBuffer slice_out_buffer(shape_size(sp.reshape_in_shape) * elem_type); slice(reinterpret_cast(arg), slice_out_buffer.get_ptr(), arg_shape, @@ -41,7 +40,7 @@ void strided_slice(const char* arg, sp.reshape_in_shape, elem_type); - ngraph::runtime::AlignedBuffer reshape_out_buffer(shape_size(sp.reshape_out_shape) * elem_type); + ov::AlignedBuffer reshape_out_buffer(shape_size(sp.reshape_out_shape) * elem_type); reshape(slice_out_buffer.get_ptr(), reshape_out_buffer.get_ptr(), sp.reshape_in_shape, elem_type); reverse(reshape_out_buffer.get_ptr(), @@ -50,7 +49,6 @@ void strided_slice(const char* arg, sp.reshape_out_shape, sp.reverse_axes, elem_type); - OPENVINO_SUPPRESS_DEPRECATED_END } } // namespace reference } // namespace ov diff --git a/src/core/shape_inference/include/utils.hpp b/src/core/shape_inference/include/utils.hpp index 32e53766ba0d60..cac12973a18179 100644 --- a/src/core/shape_inference/include/utils.hpp +++ b/src/core/shape_inference/include/utils.hpp @@ -419,6 +419,17 @@ ov::optional get_input_bounds(const ov::Node* op, size_t port, const IT * @return Result shape from inputs with applied broadcast specification. */ ov::Shape infer_broadcast_shape(const ov::Node* const op, const ov::Shape& first, const ov::Shape& second); + +/** + * @brief Inference broadcast shape from input tensor shapes for element wise operator + * according to broadcast specification stored in operator. + * + * @param op Pointer to operator. + * @param inputs Tensors vector to get theirs shapes. + * + * @return Result shape from input tensors shape with applied broadcast specification. + */ +ov::Shape infer_broadcast_shape(const ov::Node* const op, const ov::TensorVector& inputs); } // namespace op /** diff --git a/src/core/shape_inference/src/utils.cpp b/src/core/shape_inference/src/utils.cpp index c89221d286ac69..74351e6fc1cfc0 100644 --- a/src/core/shape_inference/src/utils.cpp +++ b/src/core/shape_inference/src/utils.cpp @@ -5,6 +5,7 @@ #include "utils.hpp" #include "eltwise_shape_inference.hpp" +#include "openvino/core/validation_util.hpp" namespace ov { namespace op { @@ -12,5 +13,9 @@ namespace op { ov::Shape infer_broadcast_shape(const ov::Node* const op, const ov::Shape& first, const ov::Shape& second) { return eltwise_shape_infer(op, std::vector{first, second}).front().to_shape(); } + +ov::Shape infer_broadcast_shape(const ov::Node* const op, const ov::TensorVector& inputs) { + return eltwise_shape_infer(op, ov::util::get_tensors_partial_shapes(inputs)).front().to_shape(); +} } // namespace op } // namespace ov diff --git a/src/core/src/graph_util.cpp b/src/core/src/graph_util.cpp index 8001678dab2601..4c6a4d0f33e516 100644 --- a/src/core/src/graph_util.cpp +++ b/src/core/src/graph_util.cpp @@ -319,7 +319,8 @@ bool replace_output_update_name(Output output, const Output& replace bool replace_node_update_name(const std::shared_ptr& target, const std::shared_ptr& replacement) { for (auto& output : target->output(0).get_target_inputs()) { - if (ov::as_type(replacement->input_value(0).get_node()) && + if (replacement->get_input_size() > 0 && + ov::as_type(replacement->input_value(0).get_node()) && ov::as_type(output.get_node())) { return false; } diff --git a/src/core/src/node_input.cpp b/src/core/src/node_input.cpp index 7c6b8a9ff2102c..11a353cb765b49 100644 --- a/src/core/src/node_input.cpp +++ b/src/core/src/node_input.cpp @@ -60,12 +60,15 @@ bool Input::operator==(const Input& other) const { bool Input::operator!=(const Input& other) const { return !(*this == other); } + bool Input::operator<(const Input& other) const { - return m_node < other.m_node || (m_node == other.m_node && m_index < other.m_index); + return m_node->get_instance_id() < other.m_node->get_instance_id() || + (m_node == other.m_node && m_index < other.m_index); } bool Input::operator>(const Input& other) const { - return m_node > other.m_node || (m_node == other.m_node && m_index > other.m_index); + return m_node->get_instance_id() > other.m_node->get_instance_id() || + (m_node == other.m_node && m_index > other.m_index); } bool Input::operator<=(const Input& other) const { @@ -135,11 +138,13 @@ bool Input::operator!=(const Input& other) const { return !(*this == other); } bool Input::operator<(const Input& other) const { - return m_node < other.m_node || (m_node == other.m_node && m_index < other.m_index); + return m_node->get_instance_id() < other.m_node->get_instance_id() || + (m_node == other.m_node && m_index < other.m_index); } bool Input::operator>(const Input& other) const { - return m_node > other.m_node || (m_node == other.m_node && m_index > other.m_index); + return m_node->get_instance_id() > other.m_node->get_instance_id() || + (m_node == other.m_node && m_index > other.m_index); } bool Input::operator<=(const Input& other) const { diff --git a/src/core/src/node_output.cpp b/src/core/src/node_output.cpp index fbd7d3f172280c..4d5de39b75132a 100644 --- a/src/core/src/node_output.cpp +++ b/src/core/src/node_output.cpp @@ -137,10 +137,12 @@ bool Output::operator!=(const Output& other) const { return !(*this == other); } bool Output::operator<(const Output& other) const { - return m_node < other.m_node || (m_node == other.m_node && m_index < other.m_index); + return m_node->get_instance_id() < other.m_node->get_instance_id() || + (m_node == other.m_node && m_index < other.m_index); } bool Output::operator>(const Output& other) const { - return m_node > other.m_node || (m_node == other.m_node && m_index > other.m_index); + return m_node->get_instance_id() > other.m_node->get_instance_id() || + (m_node == other.m_node && m_index > other.m_index); } bool Output::operator<=(const Output& other) const { return !(*this > other); @@ -211,10 +213,12 @@ bool Output::operator!=(const Output& other) const { return !(*this == other); } bool Output::operator<(const Output& other) const { - return m_node < other.m_node || (m_node == other.m_node && m_index < other.m_index); + return m_node->get_instance_id() < other.m_node->get_instance_id() || + (m_node == other.m_node && m_index < other.m_index); } bool Output::operator>(const Output& other) const { - return m_node > other.m_node || (m_node == other.m_node && m_index > other.m_index); + return m_node->get_instance_id() > other.m_node->get_instance_id() || + (m_node == other.m_node && m_index > other.m_index); } bool Output::operator<=(const Output& other) const { return !(*this > other); diff --git a/src/core/src/op/add.cpp b/src/core/src/op/add.cpp index 316f71b3ebca7c..0d09563b9ae201 100644 --- a/src/core/src/op/add.cpp +++ b/src/core/src/op/add.cpp @@ -19,14 +19,11 @@ struct Evaluate : element::NoAction { static result_type visit(const Tensor& in0, const Tensor& in1, Tensor& out, + const Shape& shape0, + const Shape& shape1, const AutoBroadcastSpec& broadcast_spec) { using T = typename element_type_traits::value_type; - reference::add(in0.data(), - in1.data(), - out.data(), - in0.get_shape(), - in1.get_shape(), - broadcast_spec); + reference::add(in0.data(), in1.data(), out.data(), shape0, shape1, broadcast_spec); return true; } }; @@ -48,15 +45,16 @@ std::shared_ptr Add::clone_with_new_inputs(const OutputVector& new_args) c bool Add::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const { OV_OP_SCOPE(v1_Add_evaluate); OPENVINO_ASSERT(outputs.size() == 1); - OPENVINO_ASSERT(inputs.size() == 2); - outputs[0].set_shape(infer_broadcast_shape(this, inputs[0].get_shape(), inputs[1].get_shape())); + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); using namespace ov::element; return IfTypeOf::apply( inputs[0].get_element_type(), inputs[0], inputs[1], outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), get_autob()); } diff --git a/src/core/src/op/constant.cpp b/src/core/src/op/constant.cpp index 27d9e000b64dec..34e97d73eeee30 100644 --- a/src/core/src/op/constant.cpp +++ b/src/core/src/op/constant.cpp @@ -10,8 +10,13 @@ #include #include "itt.hpp" +#include "ngraph/runtime/aligned_buffer.hpp" #include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/tensor.hpp" +#include "openvino/core/type/element_type.hpp" +#include "openvino/core/type/float16.hpp" +#include "openvino/core/type/nf4.hpp" +#include "openvino/runtime/shared_buffer.hpp" template static inline std::string to_cpp_string(T value) { @@ -27,6 +32,14 @@ static inline std::string to_cpp_string(T value) { } return rc; } +OPENVINO_SUPPRESS_DEPRECATED_START +std::shared_ptr ov::op::v0::Constant::legacy_to_ov_aligned_buffer( + const std::shared_ptr& buffer) { + return std::make_shared>>(buffer->get_ptr(), + buffer->size(), + buffer); +} +OPENVINO_SUPPRESS_DEPRECATED_END OPENVINO_SUPPRESS_DEPRECATED_START ov::op::v0::Constant::Constant(const std::shared_ptr& tensor) { @@ -35,7 +48,7 @@ ov::op::v0::Constant::Constant(const std::shared_ptr& t // Share data from HostTensor if we work with it // And copy data in other cas if (auto hostTensor = std::dynamic_pointer_cast(tensor)) { - m_data = std::make_shared>>( + m_data = std::make_shared>>( static_cast(hostTensor->get_data_ptr()), tensor->get_size_in_bytes(), tensor); @@ -51,12 +64,10 @@ OPENVINO_SUPPRESS_DEPRECATED_END ov::op::v0::Constant::Constant(const ov::Tensor& tensor) { m_element_type = tensor.get_element_type(); m_shape = tensor.get_shape(); - OPENVINO_SUPPRESS_DEPRECATED_START // Share data from ov::Tensor - m_data = std::make_shared>(static_cast(tensor.data()), - tensor.get_byte_size(), - tensor); - OPENVINO_SUPPRESS_DEPRECATED_END + m_data = std::make_shared>(static_cast(tensor.data()), + tensor.get_byte_size(), + tensor); constructor_validate_and_infer_types(); } @@ -211,12 +222,10 @@ ov::op::v0::Constant::Constant(bool memset_allocation, const element::Type& type } void ov::op::v0::Constant::allocate_buffer(bool memset_allocation) { - OPENVINO_SUPPRESS_DEPRECATED_START - m_data = std::make_shared(mem_size(), host_alignment()); + m_data = std::make_shared(mem_size(), host_alignment()); if (memset_allocation) { std::memset(m_data->get_ptr(), 0, m_data->size()); } - OPENVINO_SUPPRESS_DEPRECATED_END } ov::op::v0::Constant::Constant(const element::Type& type, const ov::Shape& shape, const void* data) @@ -316,6 +325,18 @@ std::string ov::op::v0::Constant::convert_value_to_string(size_t index) const { return rc; } +size_t ov::op::v0::Constant::get_byte_size() const { + return m_data->size(); +} + +const void* ov::op::v0::Constant::get_data_ptr() const { + return (m_data ? m_data->get_ptr() : nullptr); +} + +void* ov::op::v0::Constant::get_data_ptr_nc() { + return (m_data ? m_data->get_ptr() : nullptr); +} + std::vector ov::op::v0::Constant::get_value_strings() const { std::vector rc; @@ -588,3 +609,7 @@ bool ov::op::v0::Constant::evaluate_lower(TensorVector& outputs) const { bool ov::op::v0::Constant::evaluate_upper(TensorVector& outputs) const { return evaluate(outputs, {}); } + +uint8_t ov::op::v0::Constant::quantize_nf4(float x) { + return ov::ConvertNF4::quantize(x); +} diff --git a/src/core/src/op/logical_and.cpp b/src/core/src/op/logical_and.cpp index fe8bd612ed2d85..91ff10dc15601c 100644 --- a/src/core/src/op/logical_and.cpp +++ b/src/core/src/op/logical_and.cpp @@ -25,19 +25,16 @@ std::shared_ptr LogicalAnd::clone_with_new_inputs(const OutputVector& new_ bool LogicalAnd::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_LogicalAnd_evaluate); OPENVINO_ASSERT(outputs.size() == 1); - OPENVINO_ASSERT(inputs.size() == 2); - const auto& shape_0 = inputs[0].get_shape(); - const auto& shape_1 = inputs[1].get_shape(); - outputs[0].set_shape(infer_broadcast_shape(this, shape_0, shape_1)); + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); if (inputs[0].get_element_type() == element::boolean) { using T = fundamental_type_for; reference::logical_and(inputs[0].data(), inputs[1].data(), outputs[0].data(), - shape_0, - shape_1, + inputs[0].get_shape(), + inputs[1].get_shape(), get_autob()); return true; } else { diff --git a/src/core/src/op/logical_or.cpp b/src/core/src/op/logical_or.cpp index 403089318de314..5d9532b1358286 100644 --- a/src/core/src/op/logical_or.cpp +++ b/src/core/src/op/logical_or.cpp @@ -26,19 +26,16 @@ std::shared_ptr LogicalOr::clone_with_new_inputs(const OutputVector& new_a bool LogicalOr::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_LogicalOr_evaluate); OPENVINO_ASSERT(outputs.size() == 1); - OPENVINO_ASSERT(inputs.size() == 2); - const auto& shape_0 = inputs[0].get_shape(); - const auto& shape_1 = inputs[1].get_shape(); - outputs[0].set_shape(infer_broadcast_shape(this, shape_0, shape_1)); + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); if (inputs[0].get_element_type() == element::boolean) { using T = fundamental_type_for; reference::logical_or(inputs[0].data(), inputs[1].data(), outputs[0].data(), - shape_0, - shape_1, + inputs[0].get_shape(), + inputs[1].get_shape(), get_autob()); return true; } else { diff --git a/src/core/src/op/minimum.cpp b/src/core/src/op/minimum.cpp index 83252519beeeac..1844c6e5b25e36 100644 --- a/src/core/src/op/minimum.cpp +++ b/src/core/src/op/minimum.cpp @@ -2,92 +2,78 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/minimum.hpp" - -#include +#include "openvino/op/minimum.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/op/convert.hpp" -#include "ngraph/op/less.hpp" -#include "ngraph/op/multiply.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/type/element_type.hpp" #include "openvino/reference/minimum.hpp" +#include "utils.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { -OPENVINO_SUPPRESS_DEPRECATED_START -namespace minimumop { -namespace { -template -bool evaluate(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - ov::reference::minimum(arg0->get_data_ptr(), - arg1->get_data_ptr(), - out->get_data_ptr(), - arg0->get_shape(), - arg1->get_shape(), - broadcast_spec); - return true; -} +namespace minimum { -bool evaluate_minimum(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - bool rc = true; - out->set_broadcast(broadcast_spec, arg0, arg1); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_minimum, i32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_minimum, i64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_minimum, u8, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_minimum, u16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_minimum, u32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_minimum, u64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_minimum, f16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_minimum, f32, arg0, arg1, out, broadcast_spec); - default: - rc = false; - break; +struct Evaluate : element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(const Tensor& arg0, + const Tensor& arg1, + Tensor& out, + const Shape& shape0, + const Shape& shape1, + const AutoBroadcastSpec& broadcast_spec) { + reference::minimum(arg0.data(), arg1.data(), out.data(), shape0, shape1, broadcast_spec); + return true; } - return rc; -} -} // namespace -} // namespace minimumop +}; +} // namespace minimum // ------------------------------ v1 ------------------------------------------- - -op::v1::Minimum::Minimum(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) +namespace v1 { +Minimum::Minimum(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) : BinaryElementwiseArithmetic(arg0, arg1, auto_broadcast) { constructor_validate_and_infer_types(); } -shared_ptr op::v1::Minimum::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Minimum::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_Minimum_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), this->get_autob()); + return std::make_shared(new_args.at(0), new_args.at(1), get_autob()); } -bool op::v1::Minimum::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool Minimum::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_Minimum_evaluate); - return minimumop::evaluate_minimum(inputs[0], inputs[1], outputs[0], get_autob()); + OPENVINO_ASSERT(outputs.size() == 1); + + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + inputs[1], + outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), + get_autob()); } -bool op::v1::Minimum::has_evaluate() const { +bool Minimum::has_evaluate() const { OV_OP_SCOPE(v1_Minimum_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u8: + case element::u16: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } +} // namespace v1 +} // namespace op +} // namespace ov diff --git a/src/core/src/op/mod.cpp b/src/core/src/op/mod.cpp index e8aa1a8a009cc2..69ac9493052d20 100644 --- a/src/core/src/op/mod.cpp +++ b/src/core/src/op/mod.cpp @@ -4,13 +4,30 @@ #include "openvino/op/mod.hpp" +#include "bound_evaluate.hpp" #include "element_visitor.hpp" #include "itt.hpp" -#include "openvino/core/shape_util.hpp" +#include "openvino/core/validation_util.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/equal.hpp" +#include "openvino/op/logical_or.hpp" +#include "openvino/op/select.hpp" #include "openvino/reference/mod.hpp" #include "utils.hpp" +#include "validation_util.hpp" namespace ov { +namespace util { +namespace { +Tensor make_tensor_of_value(const element::Type_t et, const int64_t value) { + auto c = op::v0::Constant(et, Shape{}, value); + auto t = Tensor(et, Shape{}); + std::memcpy(t.data(), c.get_data_ptr(), t.get_byte_size()); + return t; +} +} // namespace +} // namespace util + namespace op { namespace mod { struct Evaluate : ov::element::NoAction { @@ -20,17 +37,193 @@ struct Evaluate : ov::element::NoAction { static result_type visit(const Tensor& in0, const Tensor& in1, Tensor& out, + const Shape& shape0, + const Shape& shape1, const AutoBroadcastSpec& broadcast_spec) { using T = typename element_type_traits::value_type; - reference::mod(in0.data(), - in1.data(), - out.data(), - in0.get_shape(), - in1.get_shape(), - broadcast_spec); + reference::mod(in0.data(), in1.data(), out.data(), shape0, shape1, broadcast_spec); return true; } }; + +struct EvaluateBound : element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(const Tensor& v_lb, + const Tensor& v_ub, + const Tensor& m_lb, + const Tensor& m_ub, + Tensor& out, + const bool is_lower) { + auto v_lb_first = v_lb.data(); + auto v_lb_last = std::next(v_lb_first, v_lb.get_size()); + auto v_ub_first = v_ub.data(); + auto m_lb_first = m_lb.data(); + auto m_ub_first = m_ub.data(); + auto out_first = out.data(); + + if (is_lower) { + while (v_lb_first != v_lb_last) { + *out_first++ = + reference::func::mod_interval(*v_lb_first++, *v_ub_first++, *m_lb_first++, *m_ub_first++).first; + } + } else { + while (v_lb_first != v_lb_last) { + *out_first++ = + reference::func::mod_interval(*v_lb_first++, *v_ub_first++, *m_lb_first++, *m_ub_first++).second; + } + } + return true; + } +}; + +namespace { + +/** + * @brief Get node inputs bounds as TensorVector. + * + * The inputs bounds are stored as [lower0, upper0, lower1, upper1]. + * + * @param op Pointer to the node. + * @return Vector with inputs bounds tensors. + */ +TensorVector get_bounds(const Node* const op) { + auto&& v_bounds = ov::evaluate_both_bounds(op->input_value(0)); + auto&& m_bounds = ov::evaluate_both_bounds(op->input_value(1)); + return {std::move(v_bounds.first), + std::move(v_bounds.second), + std::move(m_bounds.first), + std::move(m_bounds.second)}; +} + +/** + * @brief Check if all bounds in vector are valid (allocated). + * + * @param bounds TensorVector of bounds for check. + * @return True if bounds area valid otherwise false. + */ +bool are_bounds_valid(const TensorVector& bounds) { + return std::all_of(bounds.begin(), bounds.end(), [](const Tensor& t) { + return static_cast(t); + }); +} + +/** + * @brief Evaluate binary mask of values which cannot be calculated by modulo. + * + * @param bounds Modulo inputs bounds. + * @return Tensor with binary mask or empty tensor if evaluate failed. + */ +Tensor evaluate_undefined_result_mask(const TensorVector& bounds) { + const auto eq_op = v1::Equal(); + const auto or_op = v1::LogicalOr(); + + const auto& in_et = bounds.front().get_element_type(); + + auto zero_t = ov::util::make_tensor_of_value(in_et, 0); + auto max_t = ov::util::make_tensor_of_max_value(in_et); + + const auto& v_ub = bounds[1]; + const auto& m_lb = bounds[2]; + const auto& m_ub = bounds[3]; + + auto m_mask = TensorVector{{element::boolean, m_ub.get_shape()}}; + if (!eq_op.evaluate(m_mask, {m_lb, zero_t})) { + return {}; + } + + auto out_masks = TensorVector{{element::boolean, m_lb.get_shape()}}; + if (!eq_op.evaluate(out_masks, {m_ub, zero_t})) { + return {}; + } + + auto m_or_inputs = TensorVector{out_masks[0], m_mask[0]}; + or_op.evaluate(m_mask, m_or_inputs); + if (!eq_op.evaluate(out_masks, {m_lb, max_t})) { + return {}; + } + + or_op.evaluate(m_mask, m_or_inputs); + auto v_mask = TensorVector{{element::boolean, v_ub.get_shape()}}; + if (!eq_op.evaluate(v_mask, {v_ub, max_t})) { + return {}; + } + + out_masks[0].set_shape(ov::op::infer_broadcast_shape(&or_op, v_mask[0].get_shape(), m_mask[0].get_shape())); + return or_op.evaluate(out_masks, {v_mask[0], m_mask[0]}) ? out_masks[0] : Tensor{}; +} + +/** + * @brief Get the inputs bound with valid values only. + * + * The values which result modulo to give undefined result are replaced by one. + * The auto broadcast is applied to have inputs same shape. + * + * @param bounds Modulo operator inputs bounds. + * @param mask Mask with undefined result values. + * @return Vector of bounds tensors. + */ +TensorVector get_bounds_with_valid_values(const TensorVector& bounds, const Tensor& mask) { + const auto select_op = v1::Select(); + const auto one_t = ov::util::make_tensor_of_value(bounds.front().get_element_type(), 1); + + auto m_bounds = TensorVector(); + m_bounds.reserve(bounds.size()); + std::transform(bounds.cbegin(), bounds.cend(), std::back_inserter(m_bounds), [&](const Tensor& b) { + auto tmp = TensorVector{{b.get_element_type(), mask.get_shape()}}; + return select_op.evaluate(tmp, {mask, one_t, b}) ? tmp.front() : Tensor{}; + }); + return m_bounds; +} + +/** + * @brief Evaluate modulo upper or lower bound. + * + * @param op Pointer to modulo node. + * @param outputs Tensor vector with one tensor to store bounds result. + * @param is_lower True to evaluate lower otherwise evaluate upper. + * @return True if outputs has valid data otherwise false. + */ +bool evaluate_bound(const Node* const op, TensorVector& outputs, bool is_lower) { + const auto bounds = mod::get_bounds(op); + + if (mod::are_bounds_valid(bounds)) { + const auto& in_et = bounds[0].get_element_type(); + + const auto undefined_result_mask = mod::evaluate_undefined_result_mask(bounds); + if (!undefined_result_mask) { + return false; + } + + // Set inputs values to 1 for undefined results mask (0, inf, etc.) + const auto m_bounds = mod::get_bounds_with_valid_values(bounds, undefined_result_mask); + if (!mod::are_bounds_valid(m_bounds)) { + return false; + } + + // Evaluate bound. + outputs[0].set_shape(undefined_result_mask.get_shape()); + using namespace ov::element; + if (!IfTypeOf::apply(in_et, + m_bounds[0], + m_bounds[1], + m_bounds[2], + m_bounds[3], + outputs[0], + is_lower)) { + return false; + } + // Set undefined bound value for results which cannot be calculated. + const auto select_op = v1::Select(); + const auto undefined_bound = + is_lower ? ov::util::make_tensor_of_value(in_et, 0) : ov::util::make_tensor_of_max_value(in_et); + return select_op.evaluate(outputs, {undefined_result_mask, undefined_bound, outputs.front()}); + } else { + return false; + } +} +} // namespace } // namespace mod namespace v1 { @@ -48,17 +241,28 @@ std::shared_ptr Mod::clone_with_new_inputs(const OutputVector& new_args) c bool Mod::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const { OV_OP_SCOPE(v1_Mod_evaluate); OPENVINO_ASSERT(outputs.size() == 1); - OPENVINO_ASSERT(inputs.size() == 2); - outputs[0].set_shape(infer_broadcast_shape(this, inputs[0].get_shape(), inputs[1].get_shape())); + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); using namespace ov::element; return IfTypeOf::apply(inputs[0].get_element_type(), inputs[0], inputs[1], outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), get_autob()); } +bool Mod::evaluate_lower(TensorVector& outputs) const { + OV_OP_SCOPE(v1_Mod_evaluate_lower); + return mod::evaluate_bound(this, outputs, true); +} + +bool Mod::evaluate_upper(TensorVector& outputs) const { + OV_OP_SCOPE(v1_Mod_evaluate_upper); + return mod::evaluate_bound(this, outputs, false); +} + bool Mod::has_evaluate() const { OV_OP_SCOPE(v1_Mod_has_evaluate); diff --git a/src/core/src/op/range.cpp b/src/core/src/op/range.cpp index 6285391ae56e06..204161ce10ac7b 100644 --- a/src/core/src/op/range.cpp +++ b/src/core/src/op/range.cpp @@ -144,37 +144,46 @@ bool evaluate(const HostTensorPtr& out, const HostTensorPtr& step, int version) { using T = typename element_type_traits::value_type; - T start_val; - T stop_val; - T step_val; + double start_val; + double stop_val; + double step_val; if (version < 4) { - start_val = *start->get_data_ptr(); - stop_val = *stop->get_data_ptr(); - step_val = *step->get_data_ptr(); + start_val = static_cast(*start->get_data_ptr()); + stop_val = static_cast(*stop->get_data_ptr()); + step_val = static_cast(*step->get_data_ptr()); if (!(check_value(start_val) && check_value(stop_val) && check_value(step_val) && (step_val != static_cast(0)))) { return false; } } else { - if (!(get_casted_value(start, &start_val) && get_casted_value(stop, &stop_val) && - get_casted_value(step, &step_val))) { + if (!(get_casted_value(start, &start_val) && get_casted_value(stop, &stop_val) && + get_casted_value(step, &step_val))) { return false; } } int64_t out_size = 0; + if (ov::element::Type(ET).is_integral_number()) { + start_val = std::trunc(start_val); + stop_val = std::trunc(stop_val); + step_val = std::trunc(step_val); + } + int64_t steps = static_cast(std::ceil(double(stop_val - start_val) / step_val)); if (steps > 0) { out_size = steps; } ov::Shape out_shape = ov::Shape({static_cast(out_size)}); out->set_shape(out_shape); - ov::reference::range(&start_val, &step_val, shape_size(out_shape), out->get_data_ptr()); + + T start_val_casted = static_cast(start_val); + T step_val_casted = static_cast(step_val); + ov::reference::range(&start_val_casted, &step_val_casted, shape_size(out_shape), out->get_data_ptr()); return true; } -bool evaluate_power(const HostTensorPtr& out, +bool evaluate_range(const HostTensorPtr& out, const HostTensorPtr& start, const HostTensorPtr& stop, const HostTensorPtr& step, @@ -209,7 +218,7 @@ bool op::v4::Range::evaluate(const HostTensorVector& outputs, const HostTensorVe HostTensorPtr start = inputs[0]; HostTensorPtr stop = inputs[1]; HostTensorPtr step = inputs[2]; - return rangeop::evaluate_power(out, start, stop, step, m_output_type, 4); + return rangeop::evaluate_range(out, start, stop, step, m_output_type, 4); } bool op::v4::Range::has_evaluate() const { @@ -381,7 +390,7 @@ bool op::v0::Range::evaluate(const HostTensorVector& outputs, const HostTensorVe HostTensorPtr start = inputs[0]; HostTensorPtr stop = inputs[1]; HostTensorPtr step = inputs[2]; - return rangeop::evaluate_power(out, start, stop, step, start->get_element_type(), 0); + return rangeop::evaluate_range(out, start, stop, step, start->get_element_type(), 0); } bool op::v0::Range::has_evaluate() const { diff --git a/src/core/src/op/subtract.cpp b/src/core/src/op/subtract.cpp index 6538918f9f14e2..6b21fa00483b78 100644 --- a/src/core/src/op/subtract.cpp +++ b/src/core/src/op/subtract.cpp @@ -19,14 +19,11 @@ struct Evaluate : element::NoAction { static result_type visit(const Tensor& in0, const Tensor& in1, Tensor& out, + const Shape& shape0, + const Shape& shape1, const AutoBroadcastSpec& broadcast_spec) { using T = typename element_type_traits::value_type; - reference::subtract(in0.data(), - in1.data(), - out.data(), - in0.get_shape(), - in1.get_shape(), - broadcast_spec); + reference::subtract(in0.data(), in1.data(), out.data(), shape0, shape1, broadcast_spec); return true; } }; @@ -48,14 +45,15 @@ std::shared_ptr Subtract::clone_with_new_inputs(const OutputVector& new_ar bool Subtract::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_Subtract_evaluate); OPENVINO_ASSERT(outputs.size() == 1); - OPENVINO_ASSERT(inputs.size() == 2); - outputs[0].set_shape(infer_broadcast_shape(this, inputs[0].get_shape(), inputs[1].get_shape())); + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); using namespace ov::element; return IfTypeOf::apply(inputs[0].get_element_type(), inputs[0], inputs[1], outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), get_autob()); } diff --git a/src/core/src/op/xor.cpp b/src/core/src/op/xor.cpp index eafe1fe465e315..c96599d9de3cef 100644 --- a/src/core/src/op/xor.cpp +++ b/src/core/src/op/xor.cpp @@ -21,13 +21,15 @@ struct Evaluate : element::NoAction { static result_type visit(const Tensor& arg0, const Tensor& arg1, Tensor& out, + const Shape& shape0, + const Shape& shape1, const AutoBroadcastSpec& broadcast_spec) { using T = typename element_type_traits::value_type; reference::logical_xor(arg0.data(), arg1.data(), out.data(), - arg0.get_shape(), - arg1.get_shape(), + shape0, + shape1, broadcast_spec); return true; } @@ -40,14 +42,15 @@ bool input_supported_type(const element::Type& et) { bool evaluate(const Node* const op, TensorVector& outputs, const TensorVector& inputs) { OPENVINO_ASSERT(outputs.size() == 1); - OPENVINO_ASSERT(inputs.size() == 2); - outputs[0].set_shape(infer_broadcast_shape(op, inputs[0].get_shape(), inputs[1].get_shape())); + outputs[0].set_shape(infer_broadcast_shape(op, inputs)); using namespace ov::element; return IfTypeOf::apply(inputs[0].get_element_type(), inputs[0], inputs[1], outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), op->get_autob()); } } // namespace diff --git a/src/core/src/pass/serialize.cpp b/src/core/src/pass/serialize.cpp index dc1263d7991deb..c879e8780f1370 100644 --- a/src/core/src/pass/serialize.cpp +++ b/src/core/src/pass/serialize.cpp @@ -21,6 +21,7 @@ #include "openvino/opsets/opset1.hpp" #include "openvino/pass/constant_folding.hpp" #include "openvino/reference/convert.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "openvino/util/file_util.hpp" #include "pugixml.hpp" #include "transformations/hash.hpp" @@ -532,6 +533,19 @@ class XmlSerializer : public ov::AttributeVisitor { m_xml_node.append_attribute("offset").set_value(static_cast(offset)); m_xml_node.append_attribute("size").set_value(static_cast(new_size)); } + } else if (const auto& a = ov::as_type>>(&adapter)) { + if (name == "value" && translate_type_name(m_node_type_name) == "Const") { + const int64_t size = a->get()->size(); + size_t new_size; + int64_t offset = m_constant_write_handler.write(static_cast(a->get()->get_ptr()), + size, + &new_size, + m_compress_to_fp16, + m_output_element_type); + + m_xml_node.append_attribute("offset").set_value(static_cast(offset)); + m_xml_node.append_attribute("size").set_value(static_cast(new_size)); + } } else if (const auto& a = ov::as_type>(&adapter)) { const auto& attrs = a->get(); diff --git a/src/core/src/runtime/aligned_buffer.cpp b/src/core/src/runtime/aligned_buffer.cpp index d7c5229fcc0efa..4207eefe5db9b7 100644 --- a/src/core/src/runtime/aligned_buffer.cpp +++ b/src/core/src/runtime/aligned_buffer.cpp @@ -8,6 +8,7 @@ #include #include "ngraph/util.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "openvino/util/log.hpp" NGRAPH_SUPPRESS_DEPRECATED_START @@ -64,3 +65,53 @@ AttributeAdapter>::AttributeAdap std::shared_ptr& value) : DirectValueAccessor>(value) {} } // namespace ov +NGRAPH_SUPPRESS_DEPRECATED_END + +namespace ov { +AlignedBuffer::AlignedBuffer() : m_allocated_buffer(nullptr), m_aligned_buffer(nullptr), m_byte_size(0) {} + +AlignedBuffer::AlignedBuffer(size_t byte_size, size_t alignment) : m_byte_size(byte_size) { + m_byte_size = std::max(1, byte_size); + size_t allocation_size = m_byte_size + alignment; + m_allocated_buffer = new char[allocation_size]; + m_aligned_buffer = m_allocated_buffer; + size_t mod = (alignment != 0) ? reinterpret_cast(m_aligned_buffer) % alignment : 0; + + if (mod != 0) { + m_aligned_buffer += (alignment - mod); + } +} + +AlignedBuffer::AlignedBuffer(AlignedBuffer&& other) + : m_allocated_buffer(other.m_allocated_buffer), + m_aligned_buffer(other.m_aligned_buffer), + m_byte_size(other.m_byte_size) { + other.m_allocated_buffer = nullptr; + other.m_aligned_buffer = nullptr; + other.m_byte_size = 0; +} + +AlignedBuffer::~AlignedBuffer() { + if (m_allocated_buffer != nullptr) { + delete[] m_allocated_buffer; + } +} + +AlignedBuffer& AlignedBuffer::operator=(AlignedBuffer&& other) { + if (this != &other) { + if (m_allocated_buffer != nullptr) { + delete[] m_allocated_buffer; + } + m_allocated_buffer = other.m_allocated_buffer; + m_aligned_buffer = other.m_aligned_buffer; + m_byte_size = other.m_byte_size; + other.m_allocated_buffer = nullptr; + other.m_aligned_buffer = nullptr; + other.m_byte_size = 0; + } + return *this; +} + +AttributeAdapter>::AttributeAdapter(std::shared_ptr& value) + : DirectValueAccessor>(value) {} +} // namespace ov diff --git a/src/core/src/validation_util.cpp b/src/core/src/validation_util.cpp index 803364b289008d..4a7bd1958f1c53 100644 --- a/src/core/src/validation_util.cpp +++ b/src/core/src/validation_util.cpp @@ -910,32 +910,8 @@ void evaluate_nodes(std::map& value_map, } std::shared_ptr get_constant_max_of_type(element::Type_t t) { -#define OPENVINO_TYPE_TO_MAX_CONST(t) \ - case t: \ - return ov::op::v0::Constant::create( \ - t, \ - {}, \ - {std::numeric_limits::value_type>::max()}); \ - break - - switch (t) { - OPENVINO_TYPE_TO_MAX_CONST(element::boolean); - OPENVINO_TYPE_TO_MAX_CONST(element::bf16); - OPENVINO_TYPE_TO_MAX_CONST(element::f16); - OPENVINO_TYPE_TO_MAX_CONST(element::f32); - OPENVINO_TYPE_TO_MAX_CONST(element::f64); - OPENVINO_TYPE_TO_MAX_CONST(element::i8); - OPENVINO_TYPE_TO_MAX_CONST(element::i16); - OPENVINO_TYPE_TO_MAX_CONST(element::i32); - OPENVINO_TYPE_TO_MAX_CONST(element::i64); - OPENVINO_TYPE_TO_MAX_CONST(element::u1); - OPENVINO_TYPE_TO_MAX_CONST(element::u8); - OPENVINO_TYPE_TO_MAX_CONST(element::u16); - OPENVINO_TYPE_TO_MAX_CONST(element::u32); - OPENVINO_TYPE_TO_MAX_CONST(element::u64); - default: - return nullptr; - } + auto tensor = ov::util::make_tensor_of_max_value(t); + return tensor ? std::make_shared(tensor) : nullptr; } std::shared_ptr get_constant_min_of_type(element::Type_t t) { @@ -1385,6 +1361,48 @@ std::shared_ptr get_constant_from_source(const Output& source) { } } +template +Tensor make_tensor_of_max_value(const element::Type_t et) { + Tensor t{et, Shape{}}; + *t.data() = std::numeric_limits::max(); + return t; +} + +Tensor make_tensor_of_max_value(const element::Type_t et) { + switch (et) { + case element::boolean: + return make_tensor_of_max_value>(et); + case element::bf16: + return make_tensor_of_max_value>(et); + case element::f16: + return make_tensor_of_max_value>(et); + case element::f32: + return make_tensor_of_max_value>(et); + case element::f64: + return make_tensor_of_max_value>(et); + case element::i8: + return make_tensor_of_max_value>(et); + case element::i16: + return make_tensor_of_max_value>(et); + case element::i32: + return make_tensor_of_max_value>(et); + case element::i64: + return make_tensor_of_max_value>(et); + case element::u1: + return make_tensor_of_max_value>(et); + case element::u8: + return make_tensor_of_max_value>(et); + case element::u16: + return make_tensor_of_max_value>(et); + case element::u32: + return make_tensor_of_max_value>(et); + case element::u64: + return make_tensor_of_max_value>(et); + default: + return {}; + } +} + std::vector get_tensors_partial_shapes(const TensorVector& tensors) { std::vector shapes; shapes.reserve(tensors.size()); diff --git a/src/core/tests/aligned_buffer.cpp b/src/core/tests/aligned_buffer.cpp index fae5929ba3db2a..604d153eeb5c36 100644 --- a/src/core/tests/aligned_buffer.cpp +++ b/src/core/tests/aligned_buffer.cpp @@ -2,31 +2,30 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/runtime/aligned_buffer.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "gtest/gtest.h" -using namespace ngraph; +using namespace ov; -OPENVINO_SUPPRESS_DEPRECATED_START TEST(aligned_buffer, alignment) { - runtime::AlignedBuffer buffer(100, 64); + AlignedBuffer buffer(100, 64); size_t addr = reinterpret_cast(buffer.get_ptr()) % 64; EXPECT_EQ(addr, 0); } TEST(aligned_buffer, move) { { - runtime::AlignedBuffer buffer1(100, 64); - runtime::AlignedBuffer buffer2(std::move(buffer1)); + AlignedBuffer buffer1(100, 64); + AlignedBuffer buffer2(std::move(buffer1)); EXPECT_EQ(buffer1.size(), 0); EXPECT_EQ(buffer1.get_ptr(), nullptr); EXPECT_EQ(buffer2.size(), 100); EXPECT_NE(buffer2.get_ptr(), nullptr); } { - runtime::AlignedBuffer buffer1(100, 64); - runtime::AlignedBuffer buffer2; + AlignedBuffer buffer1(100, 64); + AlignedBuffer buffer2; buffer2 = std::move(buffer1); EXPECT_EQ(buffer1.size(), 0); EXPECT_EQ(buffer1.get_ptr(), nullptr); diff --git a/src/core/tests/bfloat16.cpp b/src/core/tests/bfloat16.cpp index bb3a35dc9953a3..f031d01226cf41 100644 --- a/src/core/tests/bfloat16.cpp +++ b/src/core/tests/bfloat16.cpp @@ -10,7 +10,7 @@ #include #include "common_test_utils/float_util.hpp" -#include "ngraph/runtime/aligned_buffer.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "openvino/util/log.hpp" using namespace std; @@ -140,9 +140,8 @@ TEST(bfloat16, numeric_limits) { } TEST(benchmark, bfloat16) { - OPENVINO_SUPPRESS_DEPRECATED_START size_t buffer_size = 128 * 3 * 224 * 224; - ngraph::runtime::AlignedBuffer data(buffer_size * sizeof(float), 4096); + ov::AlignedBuffer data(buffer_size * sizeof(float), 4096); float* f = static_cast(data.get_ptr()); // vector data(buffer_size); std::mt19937 rng(2112); @@ -153,53 +152,36 @@ TEST(benchmark, bfloat16) { OPENVINO_INFO << "buffer size " << buffer_size << " floats or " << data.size() << " bytes"; { - ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096); + ov::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096); bfloat16* p = static_cast(bf_data.get_ptr()); - ngraph::stopwatch timer; - timer.start(); for (size_t i = 0; i < buffer_size; ++i) { p[i] = bfloat16(f[i]); } - timer.stop(); - OPENVINO_INFO << "float to bfloat16 ctor " << timer.get_milliseconds() << "ms"; } { - ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096); + ov::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096); bfloat16* p = static_cast(bf_data.get_ptr()); - ngraph::stopwatch timer; - timer.start(); for (size_t i = 0; i < buffer_size; ++i) { p[i] = bfloat16::truncate(f[i]); } - timer.stop(); - OPENVINO_INFO << "float to bfloat16 truncate " << timer.get_milliseconds() << "ms"; } { - ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096); + ov::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096); bfloat16* p = static_cast(bf_data.get_ptr()); - ngraph::stopwatch timer; - timer.start(); for (size_t i = 0; i < buffer_size; ++i) { p[i] = bfloat16::round_to_nearest(f[i]); } - timer.stop(); - OPENVINO_INFO << "float to bfloat16 round to nearest " << timer.get_milliseconds() << "ms"; } { - ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096); + ov::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096); bfloat16* p = static_cast(bf_data.get_ptr()); - ngraph::stopwatch timer; - timer.start(); for (size_t i = 0; i < buffer_size; ++i) { p[i] = bfloat16::round_to_nearest_even(f[i]); } - timer.stop(); - OPENVINO_INFO << "float to bfloat16 round to nearest even " << timer.get_milliseconds() << "ms"; } - OPENVINO_SUPPRESS_DEPRECATED_END } TEST(bfloat16, assigns) { diff --git a/src/core/tests/constant.cpp b/src/core/tests/constant.cpp index 45ad60d153627a..010bb83b3e75d9 100644 --- a/src/core/tests/constant.cpp +++ b/src/core/tests/constant.cpp @@ -10,6 +10,8 @@ #include "common_test_utils/type_prop.hpp" #include "openvino/core/except.hpp" +#include "openvino/runtime/aligned_buffer.hpp" +#include "openvino/runtime/shared_buffer.hpp" using namespace ov; using namespace std; @@ -266,8 +268,8 @@ TEST(constant, int4_string) { EXPECT_EQ(v[2], -1); const auto p = c.get_data_ptr(); - EXPECT_EQ(0x10, p[0]); - EXPECT_EQ(0xF0, p[1] & 0xF0); + EXPECT_EQ(0x01, p[0]); + EXPECT_EQ(0x0F, p[1] & 0x0F); EXPECT_EQ(input, c.get_value_strings()); @@ -318,8 +320,8 @@ TEST(constant, int4_vector_negative_number) { EXPECT_EQ(v[2], int8_t(-1)); const auto p = c.get_data_ptr(); - EXPECT_EQ(0xFE, p[0]); - EXPECT_EQ(0xF0, p[1] & 0xF0); + EXPECT_EQ(0xEF, p[0]); + EXPECT_EQ(0x0F, p[1] & 0x0F); } TEST(constant, int4_vector_positive_number) { @@ -332,8 +334,8 @@ TEST(constant, int4_vector_positive_number) { EXPECT_EQ(v[2], int8_t(5)); const auto p = c.get_data_ptr(); - EXPECT_EQ(0x12, p[0]); - EXPECT_EQ(0x50, p[1] & 0xF0); + EXPECT_EQ(0x21, p[0]); + EXPECT_EQ(0x05, p[1] & 0x0F); } TEST(constant, int4_vector_broadcast_negative_number) { @@ -795,8 +797,8 @@ TEST(constant, uint4_string) { EXPECT_EQ(v[3], 0); const auto p = c.get_data_ptr(); - EXPECT_EQ(p[0], 0x10); - EXPECT_EQ(p[1], 0x10); + EXPECT_EQ(p[0], 0x01); + EXPECT_EQ(p[1], 0x01); EXPECT_EQ(input, c.get_value_strings()); @@ -831,8 +833,8 @@ TEST(constant, uint4_vector) { EXPECT_EQ(v[3], 0); const auto p = c.get_data_ptr(); - EXPECT_EQ(p[0], 0x10); - EXPECT_EQ(p[1], 0x10); + EXPECT_EQ(p[0], 0x01); + EXPECT_EQ(p[1], 0x01); } TEST(constant, uint4_vector_broadcast) { @@ -1726,14 +1728,12 @@ TEST(constant, lazy_bitwise_identical) { auto shape = Shape{10, 1000, 1000}; auto type = element::i32; auto byte_size = shape_size(shape) * sizeof(int32_t); - OPENVINO_SUPPRESS_DEPRECATED_START - auto aligned_weights_buffer = std::make_shared(byte_size); + auto aligned_weights_buffer = std::make_shared(byte_size); std::memset(aligned_weights_buffer->get_ptr(), 1, byte_size); - auto weights = std::make_shared>>( - aligned_weights_buffer->get_ptr(), - aligned_weights_buffer->size(), - aligned_weights_buffer); - OPENVINO_SUPPRESS_DEPRECATED_END + auto weights = + std::make_shared>>(aligned_weights_buffer->get_ptr(), + aligned_weights_buffer->size(), + aligned_weights_buffer); using namespace std::chrono; auto create_constant = [&]() { diff --git a/src/core/tests/eval.cpp b/src/core/tests/eval.cpp index 86b3cc2ecf82ce..fabf47f0f2f248 100644 --- a/src/core/tests/eval.cpp +++ b/src/core/tests/eval.cpp @@ -178,6 +178,27 @@ TEST(eval, evaluate_dynamic_range_sum) { ASSERT_EQ(cval, seq); } +TEST(eval, evaluate_dynamic_range_fp16_out) { + auto p_start = make_shared(element::i32, PartialShape{}); + auto p_stop = make_shared(element::i32, PartialShape{}); + auto p_step = make_shared(element::i32, PartialShape{}); + auto range = make_shared(p_start, p_stop, p_step, ov::element::f16); + auto model = make_shared(OutputVector{range}, ParameterVector{p_start, p_stop, p_step}); + auto result_tensor = ov::Tensor(); + auto out_vector = ov::TensorVector{result_tensor}; + auto in_vector = ov::TensorVector{make_tensor({}, {0}), + make_tensor({}, {3087}), + make_tensor({}, {1})}; + ASSERT_TRUE(model->evaluate(out_vector, in_vector)); + result_tensor = out_vector.at(0); + EXPECT_EQ(result_tensor.get_element_type(), element::f16); + EXPECT_EQ(result_tensor.get_shape(), (Shape{3087})); + auto cval = read_vector(result_tensor); + for (size_t i = 0; i < 3087; i++) { + ASSERT_EQ(cval[i], ov::float16(i)); + } +} + TEST(eval, evaluate_broadcast_v3_bidirectional) { Shape shape_a{4, 1}; auto A = make_shared(element::f32, shape_a); diff --git a/src/core/tests/int4.cpp b/src/core/tests/int4.cpp index 2edb82dda0183c..d9a20fbf3649b2 100644 --- a/src/core/tests/int4.cpp +++ b/src/core/tests/int4.cpp @@ -15,9 +15,9 @@ TEST(int4, convert_i4_to_string) { vector values{171, 16}; auto constant = make_shared(element::i4, Shape{3}, &values[0]); - vector ref{"-6", "-5", "1"}; + vector ref{"-5", "-6", "0"}; for (size_t i = 0; i < 3; ++i) { - ASSERT_EQ(constant->convert_value_to_string(i), ref[i]); + EXPECT_EQ(constant->convert_value_to_string(i), ref[i]); } } diff --git a/src/core/tests/type_prop/mod.cpp b/src/core/tests/type_prop/mod.cpp index b1dbab11eea61f..0e5af52401b412 100644 --- a/src/core/tests/type_prop/mod.cpp +++ b/src/core/tests/type_prop/mod.cpp @@ -5,7 +5,171 @@ #include "openvino/op/mod.hpp" #include "arithmetic_ops.hpp" +#include "openvino/core/validation_util.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/squeeze.hpp" using Type = ::testing::Types; INSTANTIATE_TYPED_TEST_SUITE_P(type_prop_mod, ArithmeticOperator, Type); + +using ov::op::v0::Constant; +using ov::op::v0::Parameter; +using ov::op::v0::Squeeze; +using ov::op::v3::Broadcast; +using ov::op::v3::ShapeOf; + +class TypePropModV1Test : public TypePropOpTest {}; + +TEST_F(TypePropModV1Test, preserve_constant_data_on_inputs) { + const auto a = Constant::create(ov::element::i32, ov::Shape{4}, {4, 10, 22, 5}); + const auto b = Constant::create(ov::element::i32, ov::Shape{4}, {3, 4, 8, 3}); + const auto op = make_op(a, b); + + const auto param = std::make_shared(ov::element::i32, ov::Shape{1}); + auto bc = std::make_shared(param, op, ov::op::BroadcastType::BIDIRECTIONAL); + const auto& output_shape = bc->get_output_partial_shape(0); + EXPECT_EQ(output_shape, ov::PartialShape({1, 2, 6, 2})); +} + +TEST_F(TypePropModV1Test, preserve_partial_values_on_inputs) { + const auto a = std::make_shared(ov::element::i64, ov::PartialShape{{5, 6}, 22, {3, 7}, -1, {7, 9}}); + const auto b = std::make_shared(ov::element::i64, ov::PartialShape{3, {12, 18}, {4, 6}, -1, {0, 4}}); + const auto op = make_op(std::make_shared(a), std::make_shared(b)); + + const auto param = std::make_shared(ov::element::i64, ov::Shape{1}); + auto bc = std::make_shared(param, op, ov::op::BroadcastType::BIDIRECTIONAL); + + const auto& output_shape = bc->get_output_partial_shape(0); + EXPECT_EQ(output_shape, ov::PartialShape({{0, 2}, {4, 10}, {0, 5}, -1, -1})); +} + +TEST_F(TypePropModV1Test, preserve_partial_values_when_m_is_interval_scalar) { + const auto a = std::make_shared(ov::element::i64, ov::PartialShape{{5, 6}, 22, {3, 7}, -1, {7, 9}}); + const auto b = std::make_shared(ov::element::i64, ov::PartialShape{{12, 18}}); + const auto b_scalar = std::make_shared(std::make_shared(b)); + const auto op = make_op(std::make_shared(a), b_scalar); + + const auto param = std::make_shared(ov::element::i64, ov::Shape{1}); + auto bc = std::make_shared(param, op, ov::op::BroadcastType::BIDIRECTIONAL); + + const auto& output_shape = bc->get_output_partial_shape(0); + EXPECT_EQ(output_shape, ov::PartialShape({{5, 6}, {4, 10}, {3, 7}, -1, {7, 9}})); +} + +TEST_F(TypePropModV1Test, preserve_partial_values_when_value_is_interval_scalar) { + const auto a = std::make_shared(ov::element::i64, ov::PartialShape{{3, 7}}); + const auto b = std::make_shared(ov::element::i64, ov::PartialShape{3, {12, 18}, {4, 6}, -1, {0, 4}}); + const auto a_scalar = std::make_shared(std::make_shared(a)); + const auto op = make_op(a_scalar, std::make_shared(b)); + + const auto param = std::make_shared(ov::element::i64, ov::Shape{1}); + auto bc = std::make_shared(param, op, ov::op::BroadcastType::BIDIRECTIONAL); + + const auto& output_shape = bc->get_output_partial_shape(0); + EXPECT_EQ(output_shape, ov::PartialShape({{0, 2}, {3, 7}, {0, 5}, -1, -1})); +} + +// test params as {a, b, exp_result} +using IntervalModuloParams = std::tuple; + +class SingleDimModV1Test : public TypePropModV1Test, public testing::WithParamInterface { +protected: + void SetUp() override { + std::tie(a_dim, b_dim, exp_dim) = GetParam(); + } + + ov::Dimension a_dim, b_dim, exp_dim; +}; + +const auto v_and_m_static = testing::Values(IntervalModuloParams{{0, 0}, {1, 1}, {0, 0}}, + IntervalModuloParams{{0, 0}, {9, 9}, {0, 0}}, + IntervalModuloParams{{0, 0}, {1000, 1000}, {0, 0}}, + IntervalModuloParams{{10, 10}, {3, 3}, {1, 1}}, + IntervalModuloParams{{10, 10}, {6, 6}, {4, 4}}, + IntervalModuloParams{{10, 10}, {5, 5}, {0, 0}}, + IntervalModuloParams{{10, 10}, {15, 15}, {10, 10}}); + +const auto v_interval_m_static = testing::Values(IntervalModuloParams{{6, 7}, {4, 4}, {2, 3}}, + IntervalModuloParams{{6, 8}, {4, 4}, {0, 3}}, // Result [0,2,3] + IntervalModuloParams{{6, 8}, {10, 10}, {6, 8}}, + IntervalModuloParams{{6, 8}, {7, 7}, {0, 6}}, + IntervalModuloParams{{4, 8}, {7, 7}, {0, 6}}, + IntervalModuloParams{{15, 16}, {7, 7}, {1, 2}}, + IntervalModuloParams{{5, 20}, {5, 5}, {0, 4}}, + + IntervalModuloParams{{5, 10}, {7, 7}, {0, 6}}); + +const auto v_static_m_interval = testing::Values(IntervalModuloParams{{0, 0}, {3, 13}, {0, 0}}, + IntervalModuloParams{{10, 10}, {2, 4}, {0, 3}}, + IntervalModuloParams{{10, 10}, {2, 6}, {0, 4}}, + IntervalModuloParams{{10, 10}, {6, 9}, {1, 4}}, + IntervalModuloParams{{10, 10}, {9, 11}, {0, 10}}, + IntervalModuloParams{{10, 10}, {3, 11}, {0, 10}}, + IntervalModuloParams{{10, 10}, {3, 10}, {0, 9}}, + IntervalModuloParams{{10, 10}, {7, 8}, {2, 3}}, + IntervalModuloParams{{100, 100}, {2, 20}, {0, 19}}, + // can be estimated accurate as only two results are possible + IntervalModuloParams{{100, 100}, {15, 16}, {4, 10}}, + // can not be estimated accurate as there are three results [10,4,15] + // Requires to calculate all possibilities and pick min, max + IntervalModuloParams{{100, 100}, {15, 17}, {0, 16}}); + +const auto v_and_m_intervals = testing::Values(IntervalModuloParams{{1, 10}, {2, 9}, {0, 8}}, + IntervalModuloParams{{1, 10}, {6, 9}, {0, 8}}, + IntervalModuloParams{{1, 10}, {2, 12}, {0, 10}}, + IntervalModuloParams{{1, 10}, {6, 12}, {0, 10}}, + IntervalModuloParams{{1, 10}, {11, 12}, {1, 10}}, + IntervalModuloParams{{1, 10}, {11, 15}, {1, 10}}, + IntervalModuloParams{{4, 10}, {10, 13}, {0, 10}}, + IntervalModuloParams{{10, 20}, {3, 5}, {0, 4}}, + IntervalModuloParams{{10, 10}, {3, 10}, {0, 9}}, + IntervalModuloParams{{5, 20}, {5, 10}, {0, 9}}, + IntervalModuloParams{{10, 100}, {3, 20}, {0, 19}}, + IntervalModuloParams{{10, 100}, {2, 20}, {0, 19}}, + IntervalModuloParams{{10, 100}, {51, 60}, {0, 59}}); + +// If input is infinite or m has 0 then output is undefined. +const auto v_and_m_special_values = testing::Values(IntervalModuloParams{{0, -1}, {5, 5}, {0, -1}}, + IntervalModuloParams{{10, -1}, {4, 4}, {0, -1}}, + // Evaluate low/up return [0, max] + // but evaluate both bounds return [0] as `m` has same bounds + IntervalModuloParams{{11, 11}, {0, 0}, {0, 0}}, + IntervalModuloParams{{11, 11}, {0, 5}, {0, -1}}, + IntervalModuloParams{{11, 20}, {0, 5}, {0, -1}}, + IntervalModuloParams{{11, 20}, {0, -1}, {0, -1}}, + IntervalModuloParams{{0, -1}, {0, -1}, {0, -1}}); + +INSTANTIATE_TEST_SUITE_P(v_and_m_static, SingleDimModV1Test, v_and_m_static); +INSTANTIATE_TEST_SUITE_P(value_interval_m_static, SingleDimModV1Test, v_interval_m_static); +INSTANTIATE_TEST_SUITE_P(value_static_m_interval, SingleDimModV1Test, v_static_m_interval); +INSTANTIATE_TEST_SUITE_P(value_and_m_as_intervals, SingleDimModV1Test, v_and_m_intervals); +INSTANTIATE_TEST_SUITE_P(value_and_m_special_values, SingleDimModV1Test, v_and_m_special_values); + +TEST_P(SingleDimModV1Test, preserve_value_on_inputs_i64) { + constexpr auto et = ov::element::i64; + const auto a = std::make_shared(et, ov::PartialShape{a_dim}); + const auto b = std::make_shared(et, ov::PartialShape{b_dim}); + const auto op = make_op(std::make_shared(a), std::make_shared(b)); + + const auto param = std::make_shared(et, ov::Shape{1}); + const auto bc = std::make_shared(param, op, ov::op::BroadcastType::BIDIRECTIONAL); + const auto& output_shape = bc->get_output_partial_shape(0); + + EXPECT_EQ(output_shape, ov::PartialShape({exp_dim})); +} + +TEST_P(SingleDimModV1Test, preserve_value_on_inputs_i32) { + constexpr auto et = ov::element::i32; + const auto a = std::make_shared(et, ov::PartialShape{a_dim}); + const auto b = std::make_shared(et, ov::PartialShape{b_dim}); + const auto op = make_op(std::make_shared(a, et), std::make_shared(b, et)); + + const auto param = std::make_shared(et, ov::Shape{1}); + const auto bc = std::make_shared(param, op, ov::op::BroadcastType::BIDIRECTIONAL); + const auto& output_shape = bc->get_output_partial_shape(0); + + EXPECT_EQ(output_shape, ov::PartialShape({exp_dim})); +} diff --git a/src/core/tests/uint4.cpp b/src/core/tests/uint4.cpp index 5c3b0a5e06af20..8285fdb3cd5e1c 100644 --- a/src/core/tests/uint4.cpp +++ b/src/core/tests/uint4.cpp @@ -13,9 +13,9 @@ TEST(uint4, convert_u4_to_string) { vector values{171, 16}; auto constant = make_shared(element::u4, Shape{3}, &values[0]); - vector ref{"10", "11", "1"}; + vector ref{"11", "10", "0"}; for (size_t i = 0; i < 3; ++i) { - ASSERT_EQ(constant->convert_value_to_string(i), ref[i]); + EXPECT_EQ(constant->convert_value_to_string(i), ref[i]); } } diff --git a/src/core/tests/visitors/visitors.hpp b/src/core/tests/visitors/visitors.hpp index 838eade854181b..893d982a59b3eb 100644 --- a/src/core/tests/visitors/visitors.hpp +++ b/src/core/tests/visitors/visitors.hpp @@ -10,12 +10,13 @@ #include #include "ngraph/factory.hpp" -#include "ngraph/runtime/aligned_buffer.hpp" #include "openvino/core/attribute_visitor.hpp" #include "openvino/core/deprecated.hpp" #include "openvino/op/util/framework_node.hpp" #include "openvino/op/util/sub_graph_base.hpp" #include "openvino/op/util/variable.hpp" +#include "openvino/runtime/aligned_buffer.hpp" +#include "openvino/runtime/tensor.hpp" namespace ov { namespace test { @@ -217,10 +218,9 @@ class DeserializeAttributeVisitor : public AttributeVisitor { } void on_adapter(const std::string& name, ValueAccessor& adapter) override { - OPENVINO_SUPPRESS_DEPRECATED_START - if (auto a = ::ov::as_type<::ov::AttributeAdapter>>(&adapter)) { - auto& data = m_values.get(name); - data->read(a->get()->get_ptr(), a->get()->size()); + if (auto a = ::ov::as_type<::ov::AttributeAdapter>>(&adapter)) { + auto& data = m_values.get(name); + std::memcpy(a->get()->get_ptr(), data.data(), a->get()->size()); } else if (auto a = ov::as_type< ov::AttributeAdapter>>>( &adapter)) { @@ -240,7 +240,6 @@ class DeserializeAttributeVisitor : public AttributeVisitor { } else { OPENVINO_THROW("Attribute \"", name, "\" cannot be unmarshalled"); } - OPENVINO_SUPPRESS_DEPRECATED_END } // The remaining adapter methods fall back on the void adapter if not implemented void on_adapter(const std::string& name, ValueAccessor& adapter) override { @@ -309,10 +308,9 @@ class SerializeAttributeVisitor : public AttributeVisitor { } void on_adapter(const std::string& name, ValueAccessor& adapter) override { - OPENVINO_SUPPRESS_DEPRECATED_START - if (auto a = ::ov::as_type<::ov::AttributeAdapter>>(&adapter)) { - ngraph::HostTensorPtr data = std::make_shared(element::u8, Shape{a->get()->size()}); - data->write(a->get()->get_ptr(), a->get()->size()); + if (auto a = ::ov::as_type<::ov::AttributeAdapter>>(&adapter)) { + ov::Tensor data(element::u8, Shape{a->get()->size()}); + std::memcpy(data.data(), a->get()->get_ptr(), a->get()->size()); m_values.insert(name, data); } else if (auto a = ov::as_type< ov::AttributeAdapter>>>( @@ -333,7 +331,6 @@ class SerializeAttributeVisitor : public AttributeVisitor { } else { OPENVINO_THROW("Attribute \"", name, "\" cannot be marshalled"); } - OPENVINO_SUPPRESS_DEPRECATED_END } // The remaining adapter methods fall back on the void adapter if not implemented void on_adapter(const std::string& name, ValueAccessor& adapter) override { diff --git a/src/frontends/common/src/manager.cpp b/src/frontends/common/src/manager.cpp index 35df484c2cab26..6194fca7583937 100644 --- a/src/frontends/common/src/manager.cpp +++ b/src/frontends/common/src/manager.cpp @@ -20,21 +20,6 @@ class FrontEndManager::Impl { std::mutex m_loading_mutex; std::vector m_plugins; - // Note, static methods below are required to create an order of initialization of static variables - // e.g. if users (not encouraged) created ov::Model globally, we need to ensure proper order of initialization - - /// \return map of shared object per frontend - static std::unordered_map>& get_shared_objects_map() { - static std::unordered_map> shared_objects_map; - return shared_objects_map; - } - - /// \return Mutex to guard access the shared object map - static std::mutex& get_shared_objects_mutex() { - static std::mutex shared_objects_map_mutex; - return shared_objects_map_mutex; - } - public: Impl() { search_all_plugins(); @@ -46,10 +31,6 @@ class FrontEndManager::Impl { auto fe_obj = std::make_shared(); fe_obj->m_shared_object = std::make_shared(plugin.get_so_pointer()); fe_obj->m_actual = plugin.get_creator().m_creator(); - - std::lock_guard guard(get_shared_objects_mutex()); - get_shared_objects_map().emplace(plugin.get_creator().m_name, fe_obj->m_shared_object); - return fe_obj; } @@ -164,6 +145,7 @@ class FrontEndManager::Impl { {".xml", {"ir", "ir"}}, {".onnx", {"onnx", "onnx"}}, {".pb", {"tf", "tensorflow"}}, + {".pbtxt", {"tf", "tensorflow"}}, {".tflite", {"tflite", "tensorflow_lite"}}, {".pdmodel", {"paddle", "paddle"}}, // {".ts", {"pytorch", "pytorch"}}, diff --git a/src/frontends/common/src/plugin_loader.cpp b/src/frontends/common/src/plugin_loader.cpp index a044152d8d590d..a98eff766bbc0d 100644 --- a/src/frontends/common/src/plugin_loader.cpp +++ b/src/frontends/common/src/plugin_loader.cpp @@ -16,17 +16,32 @@ #include -#include #include #include #include "openvino/util/file_util.hpp" +#include "openvino/util/log.hpp" #include "openvino/util/shared_object.hpp" #include "plugin_loader.hpp" using namespace ov; using namespace ov::frontend; +// Note, static methods below are required to create an order of initialization of static variables +// e.g. if users (not encouraged) created ov::Model globally, we need to ensure proper order of initialization + +/// \return map of shared object per frontend +std::unordered_map>& ov::frontend::get_shared_objects_map() { + static std::unordered_map> shared_objects_map; + return shared_objects_map; +} + +/// \return Mutex to guard access the shared object map +std::mutex& ov::frontend::get_shared_objects_mutex() { + static std::mutex shared_objects_map_mutex; + return shared_objects_map_mutex; +} + #ifdef OPENVINO_STATIC_LIBRARY # include "ov_frontends.hpp" @@ -131,6 +146,10 @@ bool PluginInfo::load() { m_load_failed = true; return false; } + + std::lock_guard guard(get_shared_objects_mutex()); + get_shared_objects_map().emplace(get_creator().m_name, get_so_pointer()); + return true; } diff --git a/src/frontends/common/src/plugin_loader.hpp b/src/frontends/common/src/plugin_loader.hpp index 93e6a5cc2eb5a3..dccf8ddf7a39f3 100644 --- a/src/frontends/common/src/plugin_loader.hpp +++ b/src/frontends/common/src/plugin_loader.hpp @@ -4,7 +4,12 @@ #pragma once -#include +#include +#include +#include +#include + +#include "openvino/frontend/manager.hpp" #ifdef _WIN32 static const char PathSeparator[] = ";"; @@ -15,6 +20,9 @@ static const char PathSeparator[] = ":"; namespace ov { namespace frontend { +std::unordered_map>& get_shared_objects_map(); +std::mutex& get_shared_objects_mutex(); + /// \brief Internal data structure holding by each frontend. Includes library handle and extensions. class FrontEndSharedData { friend inline void add_extension_to_shared_data(std::shared_ptr& obj, diff --git a/src/frontends/ir/src/frontend.cpp b/src/frontends/ir/src/frontend.cpp index 8b8dca4d995ffb..ba515b5560641f 100644 --- a/src/frontends/ir/src/frontend.cpp +++ b/src/frontends/ir/src/frontend.cpp @@ -9,10 +9,10 @@ #include #include "input_model.hpp" -#include "ngraph/runtime/aligned_buffer.hpp" -#include "ngraph/runtime/shared_buffer.hpp" #include "openvino/core/any.hpp" #include "openvino/core/so_extension.hpp" +#include "openvino/runtime/aligned_buffer.hpp" +#include "openvino/runtime/shared_buffer.hpp" #include "openvino/util/file_util.hpp" #include "openvino/util/mmap_object.hpp" #include "transformations/resolve_names_collisions.hpp" @@ -116,8 +116,7 @@ void FrontEnd::add_extension(const ov::Extension::Ptr& ext) { InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const { std::ifstream local_model_stream; std::istream* provided_model_stream = nullptr; - OPENVINO_SUPPRESS_DEPRECATED_START - std::shared_ptr weights; + std::shared_ptr weights; auto create_extensions_map = [&]() -> std::unordered_map { std::unordered_map exts; @@ -180,8 +179,8 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const } else if (variant.is()) { weights_path = variant.as(); #endif - } else if (variant.is>()) { - weights = variant.as>(); + } else if (variant.is>()) { + weights = variant.as>(); } } bool enable_mmap = variants[variants.size() - 1].is() ? variants[variants.size() - 1].as() : false; @@ -204,10 +203,9 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const if (!weights_path.empty()) { if (enable_mmap) { auto mapped_memory = ov::load_mmap_object(weights_path); - weights = - std::make_shared>>(mapped_memory->data(), - mapped_memory->size(), - mapped_memory); + weights = std::make_shared>>(mapped_memory->data(), + mapped_memory->size(), + mapped_memory); } else { std::ifstream bin_stream; bin_stream.open(weights_path.c_str(), std::ios::binary); @@ -222,17 +220,16 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const size_t file_size = bin_stream.tellg(); bin_stream.seekg(0, std::ios::beg); - auto aligned_weights_buffer = std::make_shared(file_size); + auto aligned_weights_buffer = std::make_shared(file_size); bin_stream.read(aligned_weights_buffer->get_ptr(), aligned_weights_buffer->size()); bin_stream.close(); - weights = std::make_shared>>( + weights = std::make_shared>>( aligned_weights_buffer->get_ptr(), aligned_weights_buffer->size(), aligned_weights_buffer); } } - OPENVINO_SUPPRESS_DEPRECATED_END return create_input_model(); } diff --git a/src/frontends/ir/src/input_model.cpp b/src/frontends/ir/src/input_model.cpp index 2f58a68c94f89b..6a32b22f786b52 100644 --- a/src/frontends/ir/src/input_model.cpp +++ b/src/frontends/ir/src/input_model.cpp @@ -18,10 +18,9 @@ #include "openvino/util/common_util.hpp" #include "utils.hpp" -OPENVINO_SUPPRESS_DEPRECATED_START namespace { void parse_pre_process(pugi::xml_node& root, - std::shared_ptr weights, + std::shared_ptr weights, std::shared_ptr model) { /* Preprocessing block can have two preprocessing types: * @@ -183,7 +182,9 @@ void parse_pre_process(pugi::xml_node& root, const char* data = weights->get_ptr() + offset; per_channel_values[item.first] = ov::op::v0::Constant::create(input_type, mean_shape, data); } + OPENVINO_SUPPRESS_DEPRECATED_START auto const_node = get_constant_from_source(std::make_shared(per_channel_values, 0)); + OPENVINO_SUPPRESS_DEPRECATED_END OPENVINO_ASSERT(const_node); const auto& consumers = input_node->output(0).get_target_inputs(); auto add = std::make_shared(input_node, const_node); @@ -193,15 +194,13 @@ void parse_pre_process(pugi::xml_node& root, } } } // namespace -OPENVINO_SUPPRESS_DEPRECATED_END namespace ov { namespace frontend { namespace ir { -OPENVINO_SUPPRESS_DEPRECATED_START class InputModel::InputModelIRImpl { - std::shared_ptr m_weights; + std::shared_ptr m_weights; std::unordered_map m_extensions; std::unordered_map m_opsets; pugi::xml_node m_root; @@ -209,7 +208,7 @@ class InputModel::InputModelIRImpl { public: InputModelIRImpl(std::istream& stream, - const std::shared_ptr& weights, + const std::shared_ptr& weights, const std::unordered_map& extensions) : m_weights(weights), m_extensions(extensions) { @@ -227,11 +226,10 @@ class InputModel::InputModelIRImpl { }; InputModel::InputModel(std::istream& stream, - const std::shared_ptr& weights, + const std::shared_ptr& weights, const std::unordered_map& extensions) { _impl = std::make_shared(stream, weights, extensions); } -OPENVINO_SUPPRESS_DEPRECATED_END std::shared_ptr InputModel::convert() { return _impl->convert(); diff --git a/src/frontends/ir/src/input_model.hpp b/src/frontends/ir/src/input_model.hpp index 1b4da95f098b64..d5a9b64abaf0f8 100644 --- a/src/frontends/ir/src/input_model.hpp +++ b/src/frontends/ir/src/input_model.hpp @@ -7,9 +7,9 @@ #include #include -#include "ngraph/runtime/aligned_buffer.hpp" #include "openvino/frontend/manager.hpp" #include "openvino/frontend/visibility.hpp" +#include "openvino/runtime/aligned_buffer.hpp" namespace ov { namespace frontend { @@ -20,11 +20,9 @@ class InputModel : public ov::frontend::InputModel { std::shared_ptr _impl; public: - OPENVINO_SUPPRESS_DEPRECATED_START InputModel(std::istream& stream, - const std::shared_ptr& weights, + const std::shared_ptr& weights, const std::unordered_map& extensions); - OPENVINO_SUPPRESS_DEPRECATED_END std::shared_ptr convert(); }; diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp index 42be66281d5d24..d245301633e4e3 100644 --- a/src/frontends/ir/src/ir_deserializer.cpp +++ b/src/frontends/ir/src/ir_deserializer.cpp @@ -20,6 +20,8 @@ #include "openvino/op/util/read_value_base.hpp" #include "openvino/op/util/sub_graph_base.hpp" #include "openvino/op/util/variable.hpp" +#include "openvino/runtime/aligned_buffer.hpp" +#include "openvino/runtime/shared_buffer.hpp" #include "rt_info_deserializer.hpp" #include "transformations/rt_info/attributes.hpp" #include "utils.hpp" @@ -258,7 +260,6 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor< if (skip_names.count(name) && !getStrAttribute(m_node.child("data"), name, val)) return; - OPENVINO_SUPPRESS_DEPRECATED_START if (auto a = ov::as_type>(&adapter)) { static_cast(*a) = ov::element::Type(val); } else if (auto a = ov::as_type>(&adapter)) { @@ -322,7 +323,7 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor< ov::op::util::VariableInfo{ov::PartialShape::dynamic(), ov::element::dynamic, variable_id}); } a->set(m_variables[variable_id]); - } else if (auto a = ov::as_type>>(&adapter)) { + } else if (auto a = ov::as_type>>(&adapter)) { std::string value; pugi::xml_node dn = m_node.child("data"); auto type = pugixml::utils::get_str_attr(m_node, "type"); @@ -331,7 +332,7 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor< OPENVINO_THROW("No attrtibutes defined for ", type, " op!"); if (getStrAttribute(dn, name, value)) { - auto buffer = std::make_shared(value.size()); + auto buffer = std::make_shared(value.size()); auto data = static_cast(buffer->get_ptr()); value.copy(data, value.size()); a->set(buffer); @@ -356,11 +357,7 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor< OPENVINO_THROW("Attribute and shape size are inconsistent for ", type, " op!"); char* data = m_weights->get_ptr() + offset; - auto buffer = - std::make_shared>>( - data, - size, - m_weights); + auto buffer = std::make_shared>>(data, size, m_weights); a->set(buffer); } } else if (auto a = ov::as_type>(&adapter)) { @@ -388,7 +385,6 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor< } else { OPENVINO_THROW("Error IR reading. Attribute adapter can not be found for ", name, " parameter"); } - OPENVINO_SUPPRESS_DEPRECATED_END } void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor>& adapter) { @@ -409,10 +405,8 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor< adapter.set(model); } -OPENVINO_SUPPRESS_DEPRECATED_START -std::shared_ptr ov::XmlDeserializer::parse_function( - const pugi::xml_node& root, - const std::shared_ptr& weights) { +std::shared_ptr ov::XmlDeserializer::parse_function(const pugi::xml_node& root, + const std::shared_ptr& weights) { // OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::V10Reader_RT, "V10Parser", "Parse"); struct FunctionNodes { @@ -553,7 +547,6 @@ std::shared_ptr ov::XmlDeserializer::parse_function( return function; } -OPENVINO_SUPPRESS_DEPRECATED_END class MetaDataParser : public ov::Meta { public: @@ -751,12 +744,10 @@ static const std::string& translate_type_name(const std::string& name) { return name; } -OPENVINO_SUPPRESS_DEPRECATED_START -std::shared_ptr ov::XmlDeserializer::create_node( - const std::vector>& inputs, - const pugi::xml_node& node, - const std::shared_ptr& weights, - const GenericLayerParams& params) { +std::shared_ptr ov::XmlDeserializer::create_node(const std::vector>& inputs, + const pugi::xml_node& node, + const std::shared_ptr& weights, + const GenericLayerParams& params) { // Check that inputs are correctly defined for (size_t i = 0; i < inputs.size(); i++) { if (!inputs[i].get_node()) @@ -959,4 +950,3 @@ std::shared_ptr ov::XmlDeserializer::create_node( return ovNode; } -OPENVINO_SUPPRESS_DEPRECATED_END diff --git a/src/frontends/ir/src/ir_deserializer.hpp b/src/frontends/ir/src/ir_deserializer.hpp index f2062393f2986a..0b0d606ea4170b 100644 --- a/src/frontends/ir/src/ir_deserializer.hpp +++ b/src/frontends/ir/src/ir_deserializer.hpp @@ -10,11 +10,11 @@ #include #include "input_model.hpp" -#include "ngraph/runtime/aligned_buffer.hpp" #include "openvino/core/attribute_visitor.hpp" #include "openvino/core/op_extension.hpp" #include "openvino/op/loop.hpp" #include "openvino/op/util/sub_graph_base.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "utils.hpp" namespace ov { @@ -58,9 +58,8 @@ struct GenericLayerParams { class XmlDeserializer : public ov::AttributeVisitor { public: - OPENVINO_SUPPRESS_DEPRECATED_START explicit XmlDeserializer(const pugi::xml_node& node, - const std::shared_ptr& weights, + const std::shared_ptr& weights, const std::unordered_map& opsets, const std::unordered_map& extensions, std::unordered_map>& variables, @@ -71,7 +70,6 @@ class XmlDeserializer : public ov::AttributeVisitor { m_extensions(extensions), m_variables(variables), m_version(version) {} - OPENVINO_SUPPRESS_DEPRECATED_END void on_adapter(const std::string& name, ov::ValueAccessor& value) override { std::string val; @@ -164,14 +162,12 @@ class XmlDeserializer : public ov::AttributeVisitor { // TODO consider to call only once per layer/TI-Loop node IoMap updated_io_map(const pugi::xml_node& node, const pugi::xml_node& body_node); - OPENVINO_SUPPRESS_DEPRECATED_START /// \brief Traverses xml node representation in order to create ov function for it. /// \param node xml node representation /// \param weights weights attached to current node /// \return shared pointer to function representing input node std::shared_ptr parse_function(const pugi::xml_node& root, - const std::shared_ptr& weights); - OPENVINO_SUPPRESS_DEPRECATED_END + const std::shared_ptr& weights); /// \brief Traverses xml node representation in order to get the purpose attribute of /// inputs/outputs in the body of Loop op. \param node xml node representation \return struct /// with value of purpuse attribute @@ -179,12 +175,10 @@ class XmlDeserializer : public ov::AttributeVisitor { GenericLayerParams parse_generic_params(const pugi::xml_node& node); - OPENVINO_SUPPRESS_DEPRECATED_START std::shared_ptr create_node(const ov::OutputVector& inputs, const pugi::xml_node& node, - const std::shared_ptr& weights, + const std::shared_ptr& weights, const GenericLayerParams& params); - OPENVINO_SUPPRESS_DEPRECATED_END void read_meta_data(const std::shared_ptr& model, const pugi::xml_node& meta_section); @@ -194,9 +188,7 @@ class XmlDeserializer : public ov::AttributeVisitor { // -- DATA -- const pugi::xml_node m_node; - OPENVINO_SUPPRESS_DEPRECATED_START - const std::shared_ptr& m_weights; - OPENVINO_SUPPRESS_DEPRECATED_END + const std::shared_ptr& m_weights; const std::unordered_map& m_opsets; const std::unordered_map& m_extensions; std::unordered_map>& m_variables; diff --git a/src/frontends/onnx/frontend/src/core/tensor.hpp b/src/frontends/onnx/frontend/src/core/tensor.hpp index cb54edf8e95e22..76a97b057f2a61 100644 --- a/src/frontends/onnx/frontend/src/core/tensor.hpp +++ b/src/frontends/onnx/frontend/src/core/tensor.hpp @@ -15,6 +15,7 @@ #include "ngraph/shape.hpp" #include "ngraph/type/element_type.hpp" #include "onnx_common/utils.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "utils/common.hpp" #include "utils/tensor_external_data.hpp" @@ -302,15 +303,13 @@ class Tensor { template std::vector get_external_data() const { const auto ext_data = detail::TensorExternalData(*m_tensor_proto); - OPENVINO_SUPPRESS_DEPRECATED_START - std::shared_ptr buffer = nullptr; + std::shared_ptr buffer = nullptr; if (m_mmap_cache) { buffer = ext_data.load_external_mmap_data(m_model_dir, m_mmap_cache); } else { buffer = ext_data.load_external_data(m_model_dir); } return std::vector(buffer->get_ptr(), buffer->get_ptr() + buffer->size()); - OPENVINO_SUPPRESS_DEPRECATED_END } const void* get_data_ptr() const { diff --git a/src/frontends/onnx/frontend/src/op/blackmanwindow.cpp b/src/frontends/onnx/frontend/src/op/blackmanwindow.cpp new file mode 100644 index 00000000000000..8ebca88b32f4cf --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/blackmanwindow.cpp @@ -0,0 +1,86 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "op/blackmanwindow.hpp" + +#include + +#include "default_opset.hpp" +#include "utils/common.hpp" +#define _USE_MATH_DEFINES +#include + +OPENVINO_SUPPRESS_DEPRECATED_START +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { +OutputVector blackmanwindow(const Node& node) { + const auto size = node.get_ng_inputs().at(0); + const auto output_datatype = + common::get_ngraph_element_type(node.get_attribute_value("output_datatype", 1)); + const bool periodic = node.get_attribute_value("periodic", 1) == 1; + + const ov::PartialShape shape = size.get_partial_shape(); + const std::vector axis_lengths = shape.to_shape(); + + // Weights as described in ONNX BlackmanWindow docs + // https://github.com/onnx/onnx/blob/main/docs/Operators.md#blackmanwindow + const auto float_size = std::make_shared(size, ov::element::f32); + const auto a_0 = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{0.42f}); + const auto a_1 = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{-0.50f}); + const auto a_2 = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{0.08f}); + + const auto start = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{0.0f}); + const auto one_const = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{1.0f}); + const auto two_const = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{2.0f}); + const auto four_const = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{4.0f}); + const auto range = std::make_shared(start, size, one_const, ov::element::f32); + const auto pi = + default_opset::Constant::create(ov::element::f32, ov::Shape(), std::vector{static_cast(M_PI)}); + std::shared_ptr factor_1, factor_2; + if (periodic) { + factor_1 = std::make_shared( + range, + std::make_shared(std::make_shared(pi, two_const), + float_size)); + factor_2 = std::make_shared( + range, + std::make_shared(std::make_shared(pi, four_const), + float_size)); + } else { + factor_1 = std::make_shared( + range, + std::make_shared(std::make_shared(pi, two_const), + std::make_shared(float_size, one_const))); + factor_2 = std::make_shared( + range, + std::make_shared(std::make_shared(pi, four_const), + std::make_shared(float_size, one_const))); + } + + const auto cos_1 = std::make_shared(factor_1); + const auto cos_2 = std::make_shared(factor_2); + const auto scaled_cos_1 = std::make_shared(cos_1, a_1); + const auto scaled_cos_2 = std::make_shared(cos_2, a_2); + const auto y_values = + std::make_shared(std::make_shared(a_0, scaled_cos_1), scaled_cos_2); + + if (output_datatype == element::f32) { + return {y_values}; + } else { + return {std::make_shared(y_values, output_datatype)}; + } +} +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph +OPENVINO_SUPPRESS_DEPRECATED_END \ No newline at end of file diff --git a/src/frontends/onnx/frontend/src/op/blackmanwindow.hpp b/src/frontends/onnx/frontend/src/op/blackmanwindow.hpp new file mode 100644 index 00000000000000..ccff09c84817af --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/blackmanwindow.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "openvino/core/deprecated.hpp" +OPENVINO_SUPPRESS_DEPRECATED_START + +#include "ngraph/node.hpp" +#include "onnx_import/core/node.hpp" + +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { + +OutputVector blackmanwindow(const Node& node); + +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph +OPENVINO_SUPPRESS_DEPRECATED_END \ No newline at end of file diff --git a/src/frontends/onnx/frontend/src/op/hammingwindow.cpp b/src/frontends/onnx/frontend/src/op/hammingwindow.cpp new file mode 100644 index 00000000000000..25d557f7de6bdc --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/hammingwindow.cpp @@ -0,0 +1,72 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "op/hammingwindow.hpp" + +#include + +#include "default_opset.hpp" +#include "utils/common.hpp" +#define _USE_MATH_DEFINES +#include + +OPENVINO_SUPPRESS_DEPRECATED_START +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { +OutputVector hammingwindow(const Node& node) { + const auto size = node.get_ng_inputs().at(0); + const auto output_datatype = + common::get_ngraph_element_type(node.get_attribute_value("output_datatype", 1)); + const bool periodic = node.get_attribute_value("periodic", 1) == 1; + + const ov::PartialShape shape = size.get_partial_shape(); + const std::vector axis_lengths = shape.to_shape(); + + // Weights as described in ONNX HammingWindow docs + // https://github.com/onnx/onnx/blob/main/docs/Operators.md#hammingwindow + const auto float_size = std::make_shared(size, ov::element::f32); + const auto a_0 = std::make_shared( + std::make_shared(ov::element::f32, ov::Shape(), std::vector{25.0f}), + std::make_shared(ov::element::f32, ov::Shape(), std::vector{46.0f})); + const auto a_1 = std::make_shared( + std::make_shared(ov::element::f32, ov::Shape(), std::vector{1.0f}), + a_0); + + const auto start = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{0.0f}); + const auto one_const = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{1.0f}); + const auto two_const = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{2.0f}); + const auto range = std::make_shared(start, size, one_const, ov::element::f32); + const auto pi = + default_opset::Constant::create(ov::element::f32, ov::Shape(), std::vector{static_cast(M_PI)}); + std::shared_ptr factor; + if (periodic) { + factor = std::make_shared( + range, + std::make_shared(std::make_shared(pi, two_const), + float_size)); + } else { + factor = std::make_shared( + range, + std::make_shared(std::make_shared(pi, two_const), + std::make_shared(float_size, one_const))); + } + + const auto cos = std::make_shared(factor); + const auto scaled_cos = std::make_shared(cos, a_1); + const auto y_values = std::make_shared(a_0, scaled_cos); + if (output_datatype == element::f32) { + return {y_values}; + } else { + return {std::make_shared(y_values, output_datatype)}; + } +} +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph +OPENVINO_SUPPRESS_DEPRECATED_END \ No newline at end of file diff --git a/src/frontends/onnx/frontend/src/op/hammingwindow.hpp b/src/frontends/onnx/frontend/src/op/hammingwindow.hpp new file mode 100644 index 00000000000000..d088b4105abc3a --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/hammingwindow.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "openvino/core/deprecated.hpp" +OPENVINO_SUPPRESS_DEPRECATED_START + +#include "ngraph/node.hpp" +#include "onnx_import/core/node.hpp" + +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { + +OutputVector hammingwindow(const Node& node); + +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph +OPENVINO_SUPPRESS_DEPRECATED_END \ No newline at end of file diff --git a/src/frontends/onnx/frontend/src/op/hannwindow.cpp b/src/frontends/onnx/frontend/src/op/hannwindow.cpp new file mode 100644 index 00000000000000..b0e28afd2e5570 --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/hannwindow.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "op/hannwindow.hpp" + +#include + +#include "default_opset.hpp" +#include "utils/common.hpp" +#define _USE_MATH_DEFINES +#include + +OPENVINO_SUPPRESS_DEPRECATED_START +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { +OutputVector hannwindow(const Node& node) { + const auto size = node.get_ng_inputs().at(0); + const auto output_datatype = + common::get_ngraph_element_type(node.get_attribute_value("output_datatype", 1)); + const bool periodic = node.get_attribute_value("periodic", 1) == 1; + + const ov::PartialShape shape = size.get_partial_shape(); + const std::vector axis_lengths = shape.to_shape(); + + // Weights as described in ONNX HannWindow docs + // https://github.com/onnx/onnx/blob/main/docs/Operators.md#hannwindow + const auto float_size = std::make_shared(size, ov::element::f32); + const auto a_0 = std::make_shared(ov::element::f32, ov::Shape(), std::vector{0.5f}); + const auto a_1 = a_0; + + const auto start = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{0.0f}); + const auto one_const = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{1.0f}); + const auto two_const = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{2.0f}); + const auto range = std::make_shared(start, size, one_const, ov::element::f32); + const auto pi = + default_opset::Constant::create(ov::element::f32, ov::Shape(), std::vector{static_cast(M_PI)}); + std::shared_ptr factor; + if (periodic) { + factor = std::make_shared( + range, + std::make_shared(std::make_shared(pi, two_const), + float_size)); + } else { + factor = std::make_shared( + range, + std::make_shared(std::make_shared(pi, two_const), + std::make_shared(float_size, one_const))); + } + + const auto cos = std::make_shared(factor); + const auto scaled_cos = std::make_shared(cos, a_1); + const auto y_values = std::make_shared(a_0, scaled_cos); + if (output_datatype == element::f32) { + return {y_values}; + } else { + return {std::make_shared(y_values, output_datatype)}; + } +} +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph +OPENVINO_SUPPRESS_DEPRECATED_END \ No newline at end of file diff --git a/src/frontends/onnx/frontend/src/op/hannwindow.hpp b/src/frontends/onnx/frontend/src/op/hannwindow.hpp new file mode 100644 index 00000000000000..0c9e6993048ef3 --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/hannwindow.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "openvino/core/deprecated.hpp" +OPENVINO_SUPPRESS_DEPRECATED_START + +#include "ngraph/node.hpp" +#include "onnx_import/core/node.hpp" + +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { + +OutputVector hannwindow(const Node& node); + +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph +OPENVINO_SUPPRESS_DEPRECATED_END \ No newline at end of file diff --git a/src/frontends/onnx/frontend/src/ops_bridge.cpp b/src/frontends/onnx/frontend/src/ops_bridge.cpp index e6707335afd0b8..c4d9a50c4ca637 100644 --- a/src/frontends/onnx/frontend/src/ops_bridge.cpp +++ b/src/frontends/onnx/frontend/src/ops_bridge.cpp @@ -29,6 +29,7 @@ #include "op/average_pool.hpp" #include "op/batch_norm.hpp" #include "op/bitshift.hpp" +#include "op/blackmanwindow.hpp" #include "op/cast.hpp" #include "op/cast_like.hpp" #include "op/ceil.hpp" @@ -75,6 +76,8 @@ #include "op/greater.hpp" #include "op/grid_sample.hpp" #include "op/gru.hpp" +#include "op/hammingwindow.hpp" +#include "op/hannwindow.hpp" #include "op/hard_sigmoid.hpp" #include "op/hard_swish.hpp" #include "op/hardmax.hpp" @@ -345,6 +348,7 @@ OperatorsBridge::OperatorsBridge() { REGISTER_OPERATOR("BatchNormalization", 1, batch_norm); REGISTER_OPERATOR("BatchNormalization", 7, batch_norm); REGISTER_OPERATOR("BitShift", 1, bitshift); + REGISTER_OPERATOR("BlackmanWindow", 1, blackmanwindow); REGISTER_OPERATOR("Cast", 1, cast); REGISTER_OPERATOR("CastLike", 1, cast_like); REGISTER_OPERATOR("Ceil", 1, ceil); @@ -392,6 +396,8 @@ OperatorsBridge::OperatorsBridge() { REGISTER_OPERATOR("Greater", 1, greater); REGISTER_OPERATOR("GridSample", 1, grid_sample); REGISTER_OPERATOR("GRU", 1, gru); + REGISTER_OPERATOR("HannWindow", 1, hannwindow); + REGISTER_OPERATOR("HammingWindow", 1, hammingwindow); REGISTER_OPERATOR("Hardmax", 1, hardmax); REGISTER_OPERATOR("Hardmax", 13, hardmax); REGISTER_OPERATOR("HardSigmoid", 1, hard_sigmoid); diff --git a/src/frontends/onnx/frontend/src/utils/tensor_external_data.cpp b/src/frontends/onnx/frontend/src/utils/tensor_external_data.cpp index 53e83e5d714101..9a40d1fc6d7595 100644 --- a/src/frontends/onnx/frontend/src/utils/tensor_external_data.cpp +++ b/src/frontends/onnx/frontend/src/utils/tensor_external_data.cpp @@ -12,7 +12,6 @@ #include "openvino/util/file_util.hpp" #include "openvino/util/log.hpp" -OPENVINO_SUPPRESS_DEPRECATED_START namespace ngraph { namespace onnx_import { namespace detail { @@ -51,13 +50,13 @@ Buffer TensorExternalData::load_external_mmap_data(const std:: if (m_data_length > mapped_memory->size() || mapped_memory->size() == 0) { throw error::invalid_external_data{*this}; } - return std::make_shared>>( + return std::make_shared>>( mapped_memory->data() + m_offset, m_data_length > 0 ? m_data_length : static_cast(file_size) - m_offset, mapped_memory); } -Buffer TensorExternalData::load_external_data(const std::string& model_dir) const { +Buffer TensorExternalData::load_external_data(const std::string& model_dir) const { auto full_path = ov::util::path_join({model_dir, m_data_location}); #if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) NGRAPH_SUPPRESS_DEPRECATED_START @@ -82,14 +81,13 @@ Buffer TensorExternalData::load_external_data(co // default value of m_offset is 0 external_data_stream.seekg(m_offset, std::ios::beg); - auto read_data = std::make_shared(read_data_length); + auto read_data = std::make_shared(read_data_length); external_data_stream.read(read_data->get_ptr(), read_data_length); external_data_stream.close(); - auto buffer = std::make_shared>>( - read_data->get_ptr(), - read_data->size(), - read_data); + auto buffer = std::make_shared>>(read_data->get_ptr(), + read_data->size(), + read_data); return buffer; } diff --git a/src/frontends/onnx/frontend/src/utils/tensor_external_data.hpp b/src/frontends/onnx/frontend/src/utils/tensor_external_data.hpp index a13ccd457f485c..eb04e001e7ed4c 100644 --- a/src/frontends/onnx/frontend/src/utils/tensor_external_data.hpp +++ b/src/frontends/onnx/frontend/src/utils/tensor_external_data.hpp @@ -6,15 +6,15 @@ #include -#include "ngraph/runtime/shared_buffer.hpp" +#include "openvino/runtime/aligned_buffer.hpp" +#include "openvino/runtime/shared_buffer.hpp" #include "openvino/util/mmap_object.hpp" namespace ngraph { namespace onnx_import { namespace detail { -OPENVINO_SUPPRESS_DEPRECATED_START template -using Buffer = std::shared_ptr>>; +using Buffer = std::shared_ptr>>; using MappedMemoryHandles = std::shared_ptr>>; /// \brief Helper class used to load tensor data from external files class TensorExternalData { @@ -28,7 +28,7 @@ class TensorExternalData { /// the invalid_external_data exception is thrown. /// /// \return External binary data loaded into the SharedBuffer - Buffer load_external_data(const std::string& model_dir) const; + Buffer load_external_data(const std::string& model_dir) const; /// \brief Map (mmap for lin, MapViewOfFile for win) external data from tensor passed to constructor /// @@ -50,7 +50,6 @@ class TensorExternalData { uint64_t m_data_length = 0; std::string m_sha1_digest{}; }; -OPENVINO_SUPPRESS_DEPRECATED_END } // namespace detail } // namespace onnx_import } // namespace ngraph diff --git a/src/frontends/onnx/tests/__init__.py b/src/frontends/onnx/tests/__init__.py index 857c3853cf8fd2..87220792d2d349 100644 --- a/src/frontends/onnx/tests/__init__.py +++ b/src/frontends/onnx/tests/__init__.py @@ -127,6 +127,7 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): "Not equal to tolerance") xfail_issue_58033 = xfail_test(reason="Einsum operation misses support for complex ellipsis equations") xfail_issue_58676 = xfail_test(reason="AssertionError: Not equal to tolerance rtol=0.001, atol=1e-07") +skip_issue_58676 = pytest.mark.skip(reason="AssertionError: Not equal to tolerance rtol=0.001, atol=1e-07") xfail_issue_onnx_models_140 = xfail_test(reason="https://github.com/onnx/models/issues/140") xfail_issue_63033 = xfail_test(reason="BatchNormalization: Training mode is not supported") diff --git a/src/frontends/onnx/tests/models/blackmanwindow_periodic.prototxt b/src/frontends/onnx/tests/models/blackmanwindow_periodic.prototxt new file mode 100644 index 00000000000000..f8759ce921028a --- /dev/null +++ b/src/frontends/onnx/tests/models/blackmanwindow_periodic.prototxt @@ -0,0 +1,46 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "size" + output: "y" + op_type: "BlackmanWindow" + attribute { + name: "output_datatype" + i: 1 # Use 1 for f32 + type: INT + } + attribute { + name: "periodic" + i: 1 # Set to 1 for periodic, 0 for non-periodic + type: INT + } + } + name: "test_blackmanwindow_periodic" + input { + name: "size" + type { + tensor_type { + elem_type: 7 # INT64 + shape { + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 # FLOAT + shape { + dim { + dim_value: 10 # Modify this based on your expected output shape + } + } + } + } + } +} +opset_import { + version: 17 +} diff --git a/src/frontends/onnx/tests/models/blackmanwindow_symmetric.prototxt b/src/frontends/onnx/tests/models/blackmanwindow_symmetric.prototxt new file mode 100644 index 00000000000000..1d60e783ead99a --- /dev/null +++ b/src/frontends/onnx/tests/models/blackmanwindow_symmetric.prototxt @@ -0,0 +1,46 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "size" + output: "y" + op_type: "BlackmanWindow" + attribute { + name: "output_datatype" + i: 1 # Use 1 for f32 + type: INT + } + attribute { + name: "periodic" + i: 0 # Set to 1 for periodic, 0 for non-periodic + type: INT + } + } + name: "test_blackmanwindow_symmetric" + input { + name: "size" + type { + tensor_type { + elem_type: 7 # INT64 + shape { + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 # FLOAT + shape { + dim { + dim_value: 10 # Modify this based on your expected output shape + } + } + } + } + } +} +opset_import { + version: 17 +} diff --git a/src/frontends/onnx/tests/models/hammingwindow_periodic.prototxt b/src/frontends/onnx/tests/models/hammingwindow_periodic.prototxt new file mode 100644 index 00000000000000..2bf75ed29fe7f6 --- /dev/null +++ b/src/frontends/onnx/tests/models/hammingwindow_periodic.prototxt @@ -0,0 +1,46 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "size" + output: "y" + op_type: "HammingWindow" + attribute { + name: "output_datatype" + i: 1 # Use 1 for f32 + type: INT + } + attribute { + name: "periodic" + i: 1 # Set to 1 for periodic, 0 for non-periodic + type: INT + } + } + name: "test_hammingwindow_periodic" + input { + name: "size" + type { + tensor_type { + elem_type: 7 # INT64 + shape { + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 # FLOAT + shape { + dim { + dim_value: 10 # Modify this based on your expected output shape + } + } + } + } + } +} +opset_import { + version: 17 +} diff --git a/src/frontends/onnx/tests/models/hammingwindow_symmetric.prototxt b/src/frontends/onnx/tests/models/hammingwindow_symmetric.prototxt new file mode 100644 index 00000000000000..1c9a9019829383 --- /dev/null +++ b/src/frontends/onnx/tests/models/hammingwindow_symmetric.prototxt @@ -0,0 +1,46 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "size" + output: "y" + op_type: "HammingWindow" + attribute { + name: "output_datatype" + i: 1 # Use 1 for f32 + type: INT + } + attribute { + name: "periodic" + i: 0 # Set to 0 for symmetric, 1 for periodic + type: INT + } + } + name: "test_hammingwindow_symmetric" + input { + name: "size" + type { + tensor_type { + elem_type: 7 # INT64 + shape { + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 # FLOAT + shape { + dim { + dim_value: 10 # Modify this based on your expected output shape + } + } + } + } + } +} +opset_import { + version: 17 +} diff --git a/src/frontends/onnx/tests/models/hannwindow_periodic.prototxt b/src/frontends/onnx/tests/models/hannwindow_periodic.prototxt new file mode 100644 index 00000000000000..2895bf5ad9b4d9 --- /dev/null +++ b/src/frontends/onnx/tests/models/hannwindow_periodic.prototxt @@ -0,0 +1,46 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "size" + output: "y" + op_type: "HannWindow" + attribute { + name: "output_datatype" + i: 1 # Use 1 for f32 + type: INT + } + attribute { + name: "periodic" + i: 1 # Set to 1 for periodic, 0 for non-periodic + type: INT + } + } + name: "test_hannwindow_periodic" + input { + name: "size" + type { + tensor_type { + elem_type: 7 # INT64 + shape { + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 # FLOAT + shape { + dim { + dim_value: 10 # Modify this based on your expected output shape + } + } + } + } + } +} +opset_import { + version: 17 +} diff --git a/src/frontends/onnx/tests/models/hannwindow_symmetric.prototxt b/src/frontends/onnx/tests/models/hannwindow_symmetric.prototxt new file mode 100644 index 00000000000000..ec2bc2b8e42bef --- /dev/null +++ b/src/frontends/onnx/tests/models/hannwindow_symmetric.prototxt @@ -0,0 +1,46 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "size" + output: "y" + op_type: "HannWindow" + attribute { + name: "output_datatype" + i: 1 # Use 1 for f32 + type: INT + } + attribute { + name: "periodic" + i: 0 # Set to 0 for symmetric, 1 for periodic + type: INT + } + } + name: "test_hannwindow_symmetric" + input { + name: "size" + type { + tensor_type { + elem_type: 7 # INT64 + shape { + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 # FLOAT + shape { + dim { + dim_value: 10 # Modify this based on your expected output shape + } + } + } + } + } +} +opset_import { + version: 17 +} diff --git a/src/frontends/onnx/tests/onnx_import.in.cpp b/src/frontends/onnx/tests/onnx_import.in.cpp index a442160ed2379c..361805e45cf0d4 100644 --- a/src/frontends/onnx/tests/onnx_import.in.cpp +++ b/src/frontends/onnx/tests/onnx_import.in.cpp @@ -6716,3 +6716,171 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_unique_3d_with_duplicates_and_axis_2) test_case.run(); } + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_blackmanwindow_periodic) { + auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(), + SERIALIZED_ZOO, + "onnx/blackmanwindow_periodic.onnx")); + + auto test_case = ov::test::TestCase(function, s_device); + + test_case.add_input({10}); + test_case.add_expected_output(Shape{10}, + {-0.000000014901161f, + 0.040212844f, + 0.20077012f, + 0.50978714f, + 0.8492299f, + 0.99999994f, + 0.84922975f, + 0.5097869f, + 0.20077008f, + 0.040212862f}); + + // GPU has an accuracy drop, need to use different tolerance + if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) { + test_case.run_with_tolerance_as_fp(); + } else { + test_case.run_with_tolerance_as_fp(0.01f); + } +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_blackmanwindow_symmetric) { + auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(), + SERIALIZED_ZOO, + "onnx/blackmanwindow_symmetric.onnx")); + + auto test_case = ov::test::TestCase(function, s_device); + + test_case.add_input({10}); + test_case.add_expected_output(Shape{10}, + {-0.00000001f, + 0.05086961f, + 0.25800052f, + 0.63000000f, + 0.95112991f, + 0.95112979f, + 0.62999994f, + 0.25800028f, + 0.05086958f, + -0.00000001f}); + + // GPU has an accuracy drop, need to use different tolerance + if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) { + test_case.run_with_tolerance_as_fp(); + } else { + test_case.run_with_tolerance_as_fp(0.01f); + } +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_hammingwindow_periodic) { + auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(), + SERIALIZED_ZOO, + "onnx/hammingwindow_periodic.onnx")); + + auto test_case = ov::test::TestCase(function, s_device); + + test_case.add_input({10}); + test_case.add_expected_output(Shape{10}, + {0.08695650f, + 0.17414439f, + 0.40240526f, + 0.68455124f, + 0.91281211f, + 1.00000000f, + 0.91281211f, + 0.68455112f, + 0.40240520f, + 0.17414442f}); + + // GPU has an accuracy drop, need to use different tolerance + if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) { + test_case.run_with_tolerance_as_fp(); + } else { + test_case.run_with_tolerance_as_fp(0.01f); + } +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_hammingwindow_symmetric) { + auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(), + SERIALIZED_ZOO, + "onnx/hammingwindow_symmetric.onnx")); + + auto test_case = ov::test::TestCase(function, s_device); + + test_case.add_input({10}); + test_case.add_expected_output(Shape{10}, + {0.08695650f, + 0.19376230f, + 0.46420413f, + 0.77173913f, + 0.97246838f, + 0.97246838f, + 0.77173907f, + 0.46420389f, + 0.19376221f, + 0.08695650f}); + + // GPU has an accuracy drop, need to use different tolerance + if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) { + test_case.run_with_tolerance_as_fp(); + } else { + test_case.run_with_tolerance_as_fp(0.01f); + } +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_hannwindow_periodic) { + auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(), + SERIALIZED_ZOO, + "onnx/hannwindow_periodic.onnx")); + + auto test_case = ov::test::TestCase(function, s_device); + + test_case.add_input({10}); + test_case.add_expected_output(Shape{10}, + {0.00000000f, + 0.09549150f, + 0.34549153f, + 0.65450853f, + 0.90450847f, + 1.00000000f, + 0.90450847f, + 0.65450835f, + 0.34549144f, + 0.09549153f}); + + // GPU has an accuracy drop, need to use different tolerance + if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) { + test_case.run_with_tolerance_as_fp(); + } else { + test_case.run_with_tolerance_as_fp(0.01f); + } +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_hannwindow_symmetric) { + auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(), + SERIALIZED_ZOO, + "onnx/hannwindow_symmetric.onnx")); + + auto test_case = ov::test::TestCase(function, s_device); + + test_case.add_input({10}); + test_case.add_expected_output(Shape{10}, + {0.00000000f, + 0.11697778f, + 0.41317594f, + 0.75000000f, + 0.96984637f, + 0.96984625f, + 0.74999994f, + 0.41317570f, + 0.11697769f, + 0.00000000f}); + + // GPU has an accuracy drop, need to use different tolerance + if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) { + test_case.run_with_tolerance_as_fp(); + } else { + test_case.run_with_tolerance_as_fp(0.01f); + } +} diff --git a/src/frontends/onnx/tests/skip_tests_config.cpp b/src/frontends/onnx/tests/skip_tests_config.cpp index 234cb99dfe9257..99d6bc297a1110 100644 --- a/src/frontends/onnx/tests/skip_tests_config.cpp +++ b/src/frontends/onnx/tests/skip_tests_config.cpp @@ -9,9 +9,11 @@ std::vector disabledTestPatterns() { return { -#ifndef BUILD_SHARED_LIBS +#ifdef OPENVINO_STATIC_LIBRARY // Disable tests for static libraries - ".*FrontendLibCloseTest.*" + ".*FrontendLibCloseTest.*", #endif + // CVS-123201 + ".*testUnloadLibBeforeDeletingDependentObject.*", }; } diff --git a/src/frontends/onnx/tests/tests_python/test_backend.py b/src/frontends/onnx/tests/tests_python/test_backend.py index d1ef686bdd4124..779444658d1e28 100644 --- a/src/frontends/onnx/tests/tests_python/test_backend.py +++ b/src/frontends/onnx/tests/tests_python/test_backend.py @@ -2,6 +2,7 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform import logging import onnx.backend.test @@ -24,6 +25,7 @@ xfail_issue_38735, skip_issue_39658, skip_issue_39658, + skip_issue_58676, xfail_issue_44858, xfail_issue_44965, xfail_issue_45180, @@ -376,12 +378,6 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None ), ( xfail_issue_90649, - "OnnxBackendNodeModelTest.test_blackmanwindow_cpu", - "OnnxBackendNodeModelTest.test_blackmanwindow_symmetric_cpu", - "OnnxBackendNodeModelTest.test_hammingwindow_cpu", - "OnnxBackendNodeModelTest.test_hammingwindow_symmetric_cpu", - "OnnxBackendNodeModelTest.test_hannwindow_cpu", - "OnnxBackendNodeModelTest.test_hannwindow_symmetric_cpu", "OnnxBackendNodeModelTest.test_melweightmatrix_cpu", "OnnxBackendNodeModelTest.test_sequence_map_add_1_sequence_1_tensor_cpu", "OnnxBackendNodeModelTest.test_sequence_map_add_2_sequences_cpu", @@ -683,6 +679,22 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None ), ] +if platform.system() == 'Darwin': + tests_expected_to_fail.extend([ + ( + skip_issue_58676, + "OnnxBackendNodeModelTest.test_mish_expanded_cpu" + ), + ( + skip_issue_58676, + "OnnxBackendNodeModelTest.test_resize_downsample_scales_linear_cpu" + ), + ( + skip_issue_58676, + "OnnxBackendNodeModelTest.test_div_uint8_cpu" + )] + ) + for test_group in tests_expected_to_fail: for test_case in test_group[1:]: expect_fail(f"{test_case}", test_group[0]) diff --git a/src/frontends/paddle/tests/skip_tests_config.cpp b/src/frontends/paddle/tests/skip_tests_config.cpp index 234cb99dfe9257..144e9d001ae276 100644 --- a/src/frontends/paddle/tests/skip_tests_config.cpp +++ b/src/frontends/paddle/tests/skip_tests_config.cpp @@ -9,7 +9,7 @@ std::vector disabledTestPatterns() { return { -#ifndef BUILD_SHARED_LIBS +#ifdef OPENVINO_STATIC_LIBRARY // Disable tests for static libraries ".*FrontendLibCloseTest.*" #endif diff --git a/src/frontends/pytorch/README.md b/src/frontends/pytorch/README.md new file mode 100644 index 00000000000000..92a38d693d9b21 --- /dev/null +++ b/src/frontends/pytorch/README.md @@ -0,0 +1,141 @@ +# OpenVINO PyTorch Frontend + +The PyTorch Frontend (PT FE) is a C++ based OpenVINO Frontend component that is +responsible for reading and converting a PyTorch model to an `ov::Model` object +that can be further serialized into the Intermediate Representation (IR) format. + +## Key Contacts + +People from the [openvino-pytorch-frontend-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-pytorch-frontend-maintainers) +have the rights to approve and merge PRs to the PyTorch Frontend component. +They can assist with any questions about the component. + +## Components + +The structure of OpenVINO PyTorch Frontend sources includes the following +directories: + +* [include](./include) is a public frontend API. +* [src](./src/) folder contains the sources of the component. + +## Architecture + +OpenVINO PyTorch Frontend is a C++ component that uses [TorchScriptPythonDecoder](../../bindings/python/src/openvino/frontend/pytorch/ts_decoder.py) +in Python code to parse a PyTorch model from a Python object. Usually, the frontend is +used inside [openvino.convert_model](../../../tools/ovc) in Python code or inside +openvino backend in `torch.compile_model`, in which case `TorchFXPythonDecoder` +is used to decode `torch.fx.graph`. The entire model conversion workflow can be +represented by the following diagram. + +```mermaid +flowchart TD + A[(torch.nn.Module)] --> torch.compile + subgraph torch.compile + subgraph TorchFXPythonDecoder + torch.fx.graph_module.GraphModule + end + TorchFXPythonDecoder --> E("pytorch::FrontEnd::load()") + E -->|ov::InputModel| F("pytorch::FrontEnd::convert()") + F --> G[(ov::Model)] + end + A[(torch.nn.Module)] --> openvino.convert_model + subgraph openvino.convert_model + subgraph TorchScriptPythonDecoder + torch.jit.trace ~~~ torch.jit.script + end + TorchScriptPythonDecoder --> B("pytorch::FrontEnd::load()") + B -->|ov::InputModel| C("pytorch::FrontEnd::convert()") + end + openvino.convert_model --> D[(ov::Model)] +``` + +OpenVINO PyTorch Frontend supports extensions. To add an extension, use +`ov::frontend::pytorch::Frontend::add_extension()` API. +The following extension types are supported: + +* `ov::frontend::tensorflow::ConversionExtension` or `ov::frontend::ConversionExtension` - add a new Loader into the conversion pipeline. +* `ov::TelemetryExtension` - enable telemetry for the frontend. +* `ov::BaseOpExtension` - enable support for a custom operation. +* `ov::detail::SOExtension` - allow support for `ov::BaseOpExtension` extensions loaded from an external library. + +## How to Implement Support for a New PyTorch Operation + +PyTorch conversion into the OpenVINO opset operations consists of two stages: +1. Conversion of PyTorch operations to OpenVINO opset using [translators](./src/op/), + which directly transforms a PyTorch operation into a sub-graph of the OpenVINO + opset. This is a 1->N conversion. +2. [Internal Transformations](./src/transforms) that transform a sub-graph of + operations into a sub-graph of the OpenVINO opset. This is an N->N conversion. + +### Operation Translation + +Most PyTorch operations can be converted by a single `translator`. The +dictionary of `translators` is placed in the [op_table.cpp](./src/op_table.cpp) +file and each translator is located in the [op](../tensorflow_common/src/op/) +directory: + +https://github.com/openvinotoolkit/openvino/blob/491454103ea2f29b242587c6084c19868a879a82/src/frontends/pytorch/src/op_table.cpp#L222-L227 + +The main rules for translator implementation: +1. Support dynamic shapes and ranks, undefined types, including future support of new types, such as strings and complex numbers. +2. Try to maintain the same algorithmic complexity of the decomposition. Fewer operations are usually better. +3. Use the latest OpenVINO opset version for the translation. +4. Use helper routines for operation checks and graph construction from `utils.hpp`. +5. Call `NodeContext::mark_mode()` for each created node. + +#### Inplace and Mutable Operations + +Some PyTorch operations modify the input tensor rather than the output. For example, +`aten::add` writes the result of addition to the output, but `aten::add_` writes the result +to its first input. To correctly convert such an operation: +* Ensure that the output tensor produced by the translation has the same type and shape as the initial input. +* Call `NodeContext::mutate_input()` to change the input tensor with the new value. + +#### PtFrameworkNode Primitive + +`PtFrameworkNode` is used to represent unconverted operation from the original +model. You can use `FrontEnd::convert_partially()` instead of `Frontend::convert()` +to get an `ov::Model` containing unconverted operations. + +#### Operations Accepting Strings + +At the moment, OpenVINO core does not support strings. However, since strings in models are usually constants, you can extract them as `std::string` directly from Python using `NodeContext::const_input()`. + +#### Operations with lists, tuples, dicts + +These types are also not supported by OpenVINO core and generally require +implementing transformation for N->N conversion. However, in some simple cases, lists +and tuples can be processed. Helpers for working with lists can be found in `utils.hpp`. +For example, `get_list_as_outputs` enables you to get list elements to work with them +in the translator or transformation. + +### Internal Transformations + +In rare cases, converting PyTorch operations requires transformation. The main +difference between transformation and translation is that transformation works on the graph rather +than on the `NodeContext` of a single operation. This means that some functionality +provided by `NodeContext` is not accessible in transformation and usually +requires working with `PtFramworkNode` directly. [General rules](https://docs.openvino.ai/2023.1/openvino_docs_transformations.html) +for writing transformations also apply to PT FE transformations. + +### PyTorch Frontend Layer Tests + +The layer tests are Python-based tests that check if a PyTorch operation is +supported by PT FE. The testing pipeline of the layer tests consists of four +steps: +1. Create a simple model containing the PyTorch operation to be tested. +2. Convert this model into an OpenVINO Model. +3. Infer the original model using PyTorch and infer the OpenVINO Model. +4. Compare the inference results between both frameworks. + +To set up the environment for running the layer tests, follow these [instructions](../../../tests/layer_tests/README.md). + +To test the entire suite of the PyTorch operation set support, run the following command: +```bash +python -m pytest layer_tests/pytorch_tests +``` + +## See Also + * [OpenVINO README](../../../README.md) + * [OpenVINO Core Components](../../README.md) + * [Developer documentation](../../../docs/dev/index.md) diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp index 066c203e3a1938..d5878783c314af 100644 --- a/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp +++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp @@ -40,6 +40,9 @@ class TorchDecoder : public IDecoder { // Return shape if inputs has torch::Tensor type in the original model, otherwise returns the shape [] of a scalar virtual PartialShape get_input_shape(size_t index) const = 0; + // Return strides if inputs has torch::Tensor type in original model, otherwise return []. + virtual const std::vector& get_input_strides(size_t index) const = 0; + // Return element::Type when it the original type can be represented, otherwise returns PT-specific data type object // (see custom_type.hpp) virtual Any get_input_type(size_t index) const = 0; diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp index 0910aa3e057e72..36d4027dcc426f 100644 --- a/src/frontends/pytorch/src/frontend.cpp +++ b/src/frontends/pytorch/src/frontend.cpp @@ -20,6 +20,7 @@ #include "transformations/op_conversions/convert_convertlike.hpp" #include "transformations/resolve_names_collisions.hpp" #include "transforms.hpp" +#include "transforms/align_types_removal.hpp" #include "transforms/append_list_unpack_replacer.hpp" #include "transforms/aten_cat_replacer.hpp" #include "transforms/aten_getitem_replacer.hpp" @@ -41,6 +42,7 @@ #include "transforms/softmax_reshape_elimination.hpp" #include "transforms/string_equality_replacer.hpp" #include "transforms/tuple_unpack_replacer.hpp" +#include "transforms/u4_block_repack.hpp" #include "translate_session.hpp" namespace ov { @@ -176,6 +178,7 @@ void FrontEnd::normalize(const std::shared_ptr& model) const { manager.register_pass( element::TypeVector{element::u8, element::i8, element::u4, element::i4}); manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); @@ -200,8 +203,11 @@ void FrontEnd::normalize(const std::shared_ptr& model) const { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); + // Second pass of AlignTypesRemoval after all converting transformations + manager.register_pass(); manager.register_pass(); manager.run_passes(model); diff --git a/src/frontends/pytorch/src/helper_ops/align_types.hpp b/src/frontends/pytorch/src/helper_ops/align_types.hpp new file mode 100644 index 00000000000000..cd69af250fa30d --- /dev/null +++ b/src/frontends/pytorch/src/helper_ops/align_types.hpp @@ -0,0 +1,43 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "internal_op.hpp" +#include "openvino/frontend/decoder.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { + +class AlignTypes : public InternalOperation { +public: + AlignTypes(const Output& lhs, const Output& rhs, bool align_scalars) + : InternalOperation("ov::align_types", + {lhs, rhs}, + 2, + "This is internal operation for type alignment and should be removed " + "at normalization step. It can't be removed if types can't be resolved."), + m_align_scalars(align_scalars) { + validate_and_infer_types(); + } + + void validate_and_infer_types() override { + auto lhs = input_value(0); + auto rhs = input_value(1); + auto out_type = infer_types(lhs, rhs, m_align_scalars); + set_output_type(0, out_type, get_input_partial_shape(0)); + set_output_type(1, out_type, get_input_partial_shape(1)); + } + +private: + const bool m_align_scalars; +}; +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/helper_ops/internal_op.hpp b/src/frontends/pytorch/src/helper_ops/internal_op.hpp new file mode 100644 index 00000000000000..510654dce8620a --- /dev/null +++ b/src/frontends/pytorch/src/helper_ops/internal_op.hpp @@ -0,0 +1,56 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "openvino/frontend/decoder.hpp" +#include "pt_framework_node.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { + +class InternalOpDecoder : public DummyDecoder { +public: + explicit InternalOpDecoder(const std::string& op_type, const size_t num_outputs) + : m_op_type(op_type), + m_num_outputs(num_outputs) {} + const std::string& get_op_type() const override { + return m_op_type; + } + size_t num_of_outputs() const override { + return m_num_outputs; + } + size_t get_subgraph_size() const override { + return 0; + } + const std::string& decoder_type_name() const override { + return m_decoder_type; + } + +private: + const std::string m_op_type; + const std::string m_decoder_type = "internal_op"; + const size_t m_num_outputs; +}; + +class InternalOperation : public PtFrameworkNode { +protected: + InternalOperation(const std::string& op_type, + const OutputVector& inputs, + size_t num_outputs, + const std::string& no_conversion_reason) + : PtFrameworkNode(std::make_shared(op_type, num_outputs), inputs) { + auto attrs = get_attrs(); + attrs[PtFrameworkNode::failed_conversion_key] = no_conversion_reason; + set_attrs(attrs); + } +}; +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/op/add.cpp b/src/frontends/pytorch/src/op/add.cpp index f0a997b6f8fa3a..33699ad90fa39c 100644 --- a/src/frontends/pytorch/src/op/add.cpp +++ b/src/frontends/pytorch/src/op/add.cpp @@ -15,7 +15,9 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_add(const NodeContext& context) { +using namespace ov::op; + +OutputVector translate_add_common(const NodeContext& context, bool inplace) { num_inputs_check(context, 2, 3); auto lhs = context.get_input(0); auto rhs = context.get_input(1); @@ -26,12 +28,28 @@ OutputVector translate_add(const NodeContext& context) { // Case when two lists gets concatenated FRONT_END_OP_CONVERSION_CHECK(false, "aten::add is used for concatenation of lists, not possible to convert"); } - align_eltwise_input_types(context, lhs, rhs, true); + if (inplace) { + if (lhs.get_element_type().is_dynamic() || lhs.get_element_type() != rhs.get_element_type()) + rhs = context.mark_node(std::make_shared(rhs, lhs)); + } else { + align_eltwise_input_types(context, lhs, rhs, true); + } if (!context.input_is_none(2)) { - auto converted_alpha = context.mark_node(std::make_shared(context.get_input(2), rhs)); - rhs = context.mark_node(std::make_shared(converted_alpha, rhs)); + auto converted_alpha = context.mark_node(std::make_shared(context.get_input(2), rhs)); + rhs = context.mark_node(std::make_shared(converted_alpha, rhs)); } - return {context.mark_node(std::make_shared(lhs, rhs))}; + auto add = context.mark_node(std::make_shared(lhs, rhs)); + if (inplace) + context.mutate_input(0, add); + return {add}; +}; + +OutputVector translate_add(const NodeContext& context) { + return translate_add_common(context, false); +}; + +OutputVector translate_add_(const NodeContext& context) { + return translate_add_common(context, true); }; } // namespace op diff --git a/src/frontends/pytorch/src/op/as_strided.cpp b/src/frontends/pytorch/src/op/as_strided.cpp new file mode 100644 index 00000000000000..5d1dfe38bdaa17 --- /dev/null +++ b/src/frontends/pytorch/src/op/as_strided.cpp @@ -0,0 +1,106 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/range.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/scatter_update.hpp" +#include "openvino/op/tile.hpp" +#include "openvino/op/transpose.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +using namespace ov::op; +bool compare_strides(const std::tuple& a, const std::tuple& b) { + return std::get<0>(a) > std::get<0>(b); +} +OutputVector translate_as_strided(const NodeContext& context) { + // "aten::as_strided(Tensor(a) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a)" + num_inputs_check(context, 3, 4); + auto decoder = context.get_decoder(); + auto input = context.get_input(0); + auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1})); + auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); + auto const_neg_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1})); + auto input_strides = decoder->get_input_strides(0); + FRONT_END_OP_CONVERSION_CHECK(input_strides.size() != 0, + "aten::as_strided: Couldn't retrive input stride information from torchscript."); + + std::vector idxs(input_strides.size()); + iota(idxs.begin(), idxs.end(), 0); + std::vector> stride_idxs(idxs.size()); + std::for_each(idxs.rbegin(), idxs.rend(), [&](size_t& idx) { + stride_idxs[idx] = {input_strides[idx], idx}; + }); + + std::sort(stride_idxs.begin(), stride_idxs.end(), compare_strides); + std::vector transpose_idx(idxs.size()); + int transpose_counter = 0; + std::for_each(stride_idxs.begin(), stride_idxs.end(), [&](std::tuple& pair) { + transpose_idx[transpose_counter] = uint64_t(std::get<1>(pair)); + transpose_counter++; + }); + auto transpose_idx_const = + context.mark_node(v0::Constant::create(element::i32, Shape{transpose_idx.size()}, transpose_idx)); + auto transposed_input = context.mark_node(std::make_shared(input, transpose_idx_const)); + auto flat_input = context.mark_node(std::make_shared(transposed_input, const_neg_1, false)); + std::deque> sizes; + std::deque> strides; + if (std::dynamic_pointer_cast(context.get_input_from_visible_context(1).get_node_shared_ptr())) { + auto input_vector = context.const_input>(1); + std::for_each(input_vector.rbegin(), input_vector.rend(), [&](int64_t input_val) { + auto const_input = context.mark_node(v0::Constant::create(element::i32, Shape{}, {input_val})); + sizes.push_front(const_input); + }); + } else { + sizes = get_list_as_outputs(context.get_input(1)); + } + if (std::dynamic_pointer_cast(context.get_input_from_visible_context(2).get_node_shared_ptr())) { + auto input_vector = context.const_input>(2); + std::for_each(input_vector.rbegin(), input_vector.rend(), [&](int64_t input_val) { + auto const_input = context.mark_node(v0::Constant::create(element::i32, Shape{}, {input_val})); + strides.push_front(const_input); + }); + } else { + strides = get_list_as_outputs(context.get_input(2)); + } + auto offset = const_0->output(0); + if (!context.input_is_none(3)) { + offset = context.get_input(3); + } + FRONT_END_OP_CONVERSION_CHECK(sizes.size() == strides.size(), + "aten::as_strided: Vector for strides and sizes need to have equal length."); + auto strides_size = strides.size() - 1; + auto i = 0; + auto strides_length_const = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {strides.size()})); + auto ones_strides_len = context.mark_node(std::make_shared(const_1, strides_length_const)); + auto indices = const_0; + std::for_each(strides.rbegin(), strides.rend(), [&](Output& stride) { + auto const_num_iter = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {strides_size - i})); + stride = context.mark_node(std::make_shared(stride, element::i32)); + auto size = sizes.at(strides_size - i); + auto range = context.mark_node(std::make_shared(const_0, size, const_1, element::i32)); + range = context.mark_node(std::make_shared(range, stride)); + auto iteration_shape = context.mark_node( + std::make_shared(ones_strides_len, const_num_iter, const_neg_1, const_0)); + range = context.mark_node(std::make_shared(range, iteration_shape, false)); + indices = context.mark_node(std::make_shared(indices, range)); + i++; + }); + indices = context.mark_node(std::make_shared(indices, offset)); + auto gather = context.mark_node(std::make_shared(flat_input, indices, const_0)); + return {gather}; +}; +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/op/bitwise.cpp b/src/frontends/pytorch/src/op/bitwise.cpp index 6e3b1fe5f49ee4..8cbae192ca6bef 100644 --- a/src/frontends/pytorch/src/op/bitwise.cpp +++ b/src/frontends/pytorch/src/op/bitwise.cpp @@ -17,7 +17,7 @@ OutputVector translate_bitwise_not(const NodeContext& context) { num_inputs_check(context, 1, 2); auto x = context.get_input(0); FRONT_END_OP_CONVERSION_CHECK(x.get_element_type().compatible(element::boolean), - "aten::bitwise_not suppored only for boolean input"); + "aten::bitwise_not supported only for boolean input"); auto not_x = context.mark_node(std::make_shared(x)); if (!context.input_is_none(1)) { context.mutate_input(1, not_x); @@ -30,7 +30,7 @@ OutputVector translate_bitwise_and(const NodeContext& context) { auto x = context.get_input(0); auto y = context.get_input(1); FRONT_END_OP_CONVERSION_CHECK(x.get_element_type().compatible(element::boolean), - "aten::bitwise_not suppored only for boolean input"); + "aten::bitwise_not supported only for boolean input"); auto and_x = context.mark_node(std::make_shared(x, y)); return {and_x}; }; @@ -40,7 +40,7 @@ OutputVector translate_bitwise_or(const NodeContext& context) { auto x = context.get_input(0); auto y = context.get_input(1); FRONT_END_OP_CONVERSION_CHECK(x.get_element_type().compatible(element::boolean), - "aten::bitwise_not suppored only for boolean input"); + "aten::bitwise_not supported only for boolean input"); auto or_x = context.mark_node(std::make_shared(x, y)); return {or_x}; }; diff --git a/src/frontends/pytorch/src/op/div.cpp b/src/frontends/pytorch/src/op/div.cpp index 7fb12ec253413a..dbbb6c89af7e6c 100644 --- a/src/frontends/pytorch/src/op/div.cpp +++ b/src/frontends/pytorch/src/op/div.cpp @@ -17,7 +17,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_div(const NodeContext& context) { +OutputVector translate_div_common(const NodeContext& context, bool inplace) { num_inputs_check(context, 2, 3); auto x = context.get_input(0); auto y = context.get_input(1); @@ -34,7 +34,12 @@ OutputVector translate_div(const NodeContext& context) { y = context.mark_node(std::make_shared(y, element::f32)); } } - align_eltwise_input_types(context, x, y, true); + if (inplace) { + if (x.get_element_type().is_dynamic() || x.get_element_type() != y.get_element_type()) + y = context.mark_node(std::make_shared(x, y)); + } else { + align_eltwise_input_types(context, x, y, true); + } auto res = context.mark_node(std::make_shared(x, y, true)); // TODO: ticket 103296; Temporarily disable ConvertDivide transformation disable_divide_conversion(res); @@ -44,9 +49,19 @@ OutputVector translate_div(const NodeContext& context) { const auto convert = context.mark_node(std::make_shared(res, element::i32)); res = context.mark_node(std::make_shared(convert, x)); } + if (inplace) + context.mutate_input(0, res); return {res}; }; +OutputVector translate_div(const NodeContext& context) { + return translate_div_common(context, false); +}; + +OutputVector translate_div_(const NodeContext& context) { + return translate_div_common(context, true); +}; + } // namespace op } // namespace pytorch } // namespace frontend diff --git a/src/frontends/pytorch/src/op/scaled_dot_product_attention.cpp b/src/frontends/pytorch/src/op/scaled_dot_product_attention.cpp index 735324405d1f11..82231472e401be 100644 --- a/src/frontends/pytorch/src/op/scaled_dot_product_attention.cpp +++ b/src/frontends/pytorch/src/op/scaled_dot_product_attention.cpp @@ -15,6 +15,7 @@ #include "openvino/op/matmul.hpp" #include "openvino/op/multiply.hpp" #include "openvino/op/range.hpp" +#include "openvino/op/reshape.hpp" #include "openvino/op/select.hpp" #include "openvino/op/shape_of.hpp" #include "openvino/op/softmax.hpp" @@ -22,6 +23,7 @@ #include "openvino/op/squeeze.hpp" #include "openvino/op/transpose.hpp" #include "openvino/op/unsqueeze.hpp" +#include "openvino/op/util/framework_node.hpp" #include "utils.hpp" namespace ov { @@ -31,10 +33,7 @@ namespace op { using namespace ov::op; -OutputVector translate_scaled_dot_product_attention(const NodeContext& context) { - // aten::scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float - // dropout_p=0., bool is_causal=False) - num_inputs_check(context, 6, 6); +std::shared_ptr translate_scaled_dot_product_attention_common(const NodeContext& context) { auto query = context.get_input(0); auto key = context.get_input(1); auto value = context.get_input(2); @@ -68,7 +67,10 @@ OutputVector translate_scaled_dot_product_attention(const NodeContext& context) minus_inf = context.mark_node(std::make_shared(minus_inf, scaled_atten)); // two types of masks are supported. A boolean mask where a value of True indicates that the element should take // part in attention. A float mask of the same type as query, key, value that is added to the attention score. - auto is_causal = context.const_input(5); + auto is_causal = false; + if (!context.input_is_none(5)) { + is_causal = context.const_input(5); + } if (is_causal || !context.input_is_none(3)) { Output mask; Output atten_mask; @@ -100,10 +102,30 @@ OutputVector translate_scaled_dot_product_attention(const NodeContext& context) scaled_atten = context.mark_node(std::make_shared(scaled_atten, atten_mask)); } scaled_atten = context.mark_node(std::make_shared(scaled_atten, -1)); - return {context.mark_node(std::make_shared(scaled_atten, value))}; + return context.mark_node(std::make_shared(scaled_atten, value)); +}; + +OutputVector translate_scaled_dot_product_attention(const NodeContext& context) { + // aten::scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float + // dropout_p=0., bool is_causal=False) + num_inputs_check(context, 6, 6); + return {translate_scaled_dot_product_attention_common(context)}; +}; + +OutputVector translate_scaled_dot_product_attention_fx(const NodeContext& context) { + // aten::scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float + // dropout_p=0., bool is_causal=False) + num_inputs_check(context, 3, 6); + auto output = translate_scaled_dot_product_attention_common(context); + // TODO: scaled_dot_product_flash_attention has 9 outputs but for most cases only + // the first input is used. Rest of the outputs should be returned properly as + // needed. + ov::OutputVector out_vec; + out_vec.push_back(output); + return {context.mark_node(make_list_construct(out_vec))}; }; } // namespace op } // namespace pytorch } // namespace frontend -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/frontends/pytorch/src/op/sub.cpp b/src/frontends/pytorch/src/op/sub.cpp index 94963ed9bdb61f..62534aee53864b 100644 --- a/src/frontends/pytorch/src/op/sub.cpp +++ b/src/frontends/pytorch/src/op/sub.cpp @@ -15,18 +15,34 @@ namespace op { using namespace ov::op; -OutputVector translate_sub(const NodeContext& context) { +OutputVector translate_sub_common(const NodeContext& context, bool inplace) { num_inputs_check(context, 2, 3); auto x = context.get_input(0); auto y = context.get_input(1); - align_eltwise_input_types(context, x, y); + if (inplace) { + if (x.get_element_type().is_dynamic() || x.get_element_type() != y.get_element_type()) + y = context.mark_node(std::make_shared(x, y)); + } else { + align_eltwise_input_types(context, x, y); + } // default alpha is 1 so no need to multiply if alpha is not provided if (!context.input_is_none(2)) { auto alpha = context.get_input(2); auto casted_alpha = context.mark_node(std::make_shared(alpha, y)); y = context.mark_node(std::make_shared(casted_alpha, y)); } - return {context.mark_node(std::make_shared(x, y))}; + auto sub = context.mark_node(std::make_shared(x, y)); + if (inplace) + context.mutate_input(0, sub); + return {sub}; +}; + +OutputVector translate_sub(const NodeContext& context) { + return translate_sub_common(context, false); +}; + +OutputVector translate_sub_(const NodeContext& context) { + return translate_sub_common(context, true); }; } // namespace op diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index eaf66fa0cd8094..1124c9f7ec6de9 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -23,6 +23,7 @@ OP_CONVERTER(translate_adaptive_max_pool3d); OP_CONVERTER(translate_adaptive_max_pool2d); OP_CONVERTER(translate_adaptive_max_pool1d); OP_CONVERTER(translate_add); +OP_CONVERTER(translate_add_); OP_CONVERTER(translate_addcmul); OP_CONVERTER(translate_addmm); OP_CONVERTER(translate_all); @@ -34,6 +35,7 @@ OP_CONVERTER(translate_argmax); OP_CONVERTER(translate_argsort); OP_CONVERTER(translate_argmax); OP_CONVERTER(translate_argmin); +OP_CONVERTER(translate_as_strided); OP_CONVERTER(translate_as_tensor); OP_CONVERTER(translate_avg_poolnd); OP_CONVERTER(translate_bool); @@ -56,6 +58,7 @@ OP_CONVERTER(translate_deform_conv); OP_CONVERTER(translate_derive_index); OP_CONVERTER(translate_dim); OP_CONVERTER(translate_div); +OP_CONVERTER(translate_div_); OP_CONVERTER(translate_elu); OP_CONVERTER(translate_embedding); OP_CONVERTER(translate_embedding_bag); @@ -175,6 +178,7 @@ OP_CONVERTER(translate_squeeze); OP_CONVERTER(translate_std); OP_CONVERTER(translate_std_mean); OP_CONVERTER(translate_sub); +OP_CONVERTER(translate_sub_); OP_CONVERTER(translate_sum); OP_CONVERTER(translate_t); OP_CONVERTER(translate_to); @@ -214,6 +218,7 @@ OP_CONVERTER(translate_group_norm_fx); OP_CONVERTER(translate_index_fx); OP_CONVERTER(translate_layer_norm_fx); OP_CONVERTER(translate_max_poolnd_fx); +OP_CONVERTER(translate_scaled_dot_product_attention_fx); OP_CONVERTER(translate_slice_fx); OP_CONVERTER(translate_softmax_fx); OP_CONVERTER(translate_transpose_fx); @@ -246,7 +251,7 @@ const std::map get_supported_ops_ts() { {"aten::adaptive_max_pool2d", op::quantizable_op}, {"aten::adaptive_max_pool3d", op::quantizable_op}, {"aten::add", op::translate_add}, - {"aten::add_", op::inplace_op}, + {"aten::add_", op::translate_add_}, {"aten::addcmul", op::translate_addcmul}, {"aten::addmm", op::translate_addmm}, {"aten::all", op::translate_all}, @@ -256,6 +261,7 @@ const std::map get_supported_ops_ts() { {"aten::argmax", op::translate_argmax}, {"aten::argmin", op::translate_argmin}, {"aten::argsort", op::translate_argsort}, + {"aten::as_strided", op::translate_as_strided}, {"aten::as_tensor", op::translate_as_tensor}, {"aten::asin", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, {"aten::asin_", op::inplace_op>}, @@ -307,7 +313,7 @@ const std::map get_supported_ops_ts() { {"aten::dequantize", op::skip_node}, // we convert model to fp32 using FQ, so dequantization is not needed {"aten::dim", op::translate_dim}, {"aten::div", op::translate_div}, - {"aten::div_", op::inplace_op}, + {"aten::div_", op::translate_div_}, {"aten::dropout", op::skip_node}, {"aten::dropout_", op::skip_node}, {"aten::elu", op::translate_elu}, @@ -403,9 +409,9 @@ const std::map get_supported_ops_ts() { {"aten::minimum", op::translate_minimum}, {"aten::mm", op::translate_1to1_match_2_inputs}, {"aten::mul", op::translate_1to1_match_2_inputs_align_types}, - {"aten::mul_", op::inplace_op>}, + {"aten::mul_", op::inplace_translate_1to1_match_2_inputs_align_types}, {"aten::multiply", op::translate_1to1_match_2_inputs_align_types}, - {"aten::multiply_", op::inplace_op>}, + {"aten::multiply_", op::inplace_translate_1to1_match_2_inputs_align_types}, {"aten::narrow", op::translate_narrow}, {"aten::ne", op::translate_1to1_match_2_inputs_align_types}, {"aten::neg", op::translate_neg}, @@ -476,7 +482,7 @@ const std::map get_supported_ops_ts() { {"aten::std", op::translate_std}, {"aten::std_mean", op::translate_std_mean}, {"aten::sub", op::translate_sub}, - {"aten::sub_", op::inplace_op}, + {"aten::sub_", op::translate_sub_}, {"aten::sum", op::translate_sum}, {"aten::swapaxes", op::quantizable_op}, {"aten::t", op::translate_t}, @@ -557,6 +563,7 @@ const std::map get_supported_ops_fx() { {"aten.arange.default", op::translate_arange_fx}, {"aten.argmax.default", op::translate_argmax}, {"aten.avg_pool2d.default", op::translate_avg_poolnd}, + {"aten.baddbmm.default", op::translate_addmm}, {"aten.bitwise_and.Tensor", op::translate_bitwise_and}, {"aten.bmm.default", op::translate_1to1_match_2_inputs_align_types}, {"aten.cat.default", op::translate_cat_fx}, @@ -583,6 +590,7 @@ const std::map get_supported_ops_fx() { {"aten.hardswish_.default", op::inplace_op>}, {"aten.hardtanh_.default", op::inplace_op}, {"aten.index.Tensor", op::translate_index_fx}, + {"aten.leaky_relu_.default", op::inplace_op>}, {"aten.lift_fresh_copy.default", op::skip_node}, {"aten.linalg_vector_norm.default", op::translate_linalg_vector_norm}, {"aten.log.default", op::translate_log}, @@ -605,6 +613,7 @@ const std::map get_supported_ops_fx() { {"aten.relu.default", op::translate_1to1_match_1_inputs}, {"aten.relu_.default", op::inplace_op>}, {"aten.rsub.Scalar", op::translate_rsub}, + {"aten._scaled_dot_product_flash_attention.default", op::translate_scaled_dot_product_attention_fx}, {"aten.select.int", op::translate_select}, {"aten.sigmoid.default", op::translate_1to1_match_1_inputs}, {"aten.silu.default", op::translate_1to1_match_1_inputs}, diff --git a/src/frontends/pytorch/src/pt_framework_node.hpp b/src/frontends/pytorch/src/pt_framework_node.hpp index 04b71d1169ae81..00d967200405cb 100644 --- a/src/frontends/pytorch/src/pt_framework_node.hpp +++ b/src/frontends/pytorch/src/pt_framework_node.hpp @@ -20,14 +20,17 @@ class PtFrameworkNode : public ov::op::util::FrameworkNode { PtFrameworkNode(const std::shared_ptr& decoder, const OutputVector& inputs, size_t output_size, - bool is_backprop = false) + bool is_reverseprop = false) : ov::op::util::FrameworkNode(inputs, output_size, decoder->get_subgraph_size()), m_decoder(decoder) { ov::op::util::FrameworkNodeAttrs attrs; attrs.set_type_name("PTFrameworkNode"); - if (is_backprop) { - attrs[op_type_key] = m_decoder->get_op_type() + "_backprop"; + if (is_reverseprop) { + attrs[op_type_key] = m_decoder->get_op_type() + "_reverseprop"; attrs[schema_key] = "None"; + attrs[failed_conversion_key] = + "This is an internal openvino operation representing reverse data propagation. It should not appear in " + "graph in normal conversion flow and might be result of other failures."; } else { attrs[op_type_key] = m_decoder->get_op_type(); attrs[schema_key] = m_decoder->get_schema(); diff --git a/src/frontends/pytorch/src/transforms/align_types_removal.cpp b/src/frontends/pytorch/src/transforms/align_types_removal.cpp new file mode 100644 index 00000000000000..c5e43d8af13004 --- /dev/null +++ b/src/frontends/pytorch/src/transforms/align_types_removal.cpp @@ -0,0 +1,60 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "align_types_removal.hpp" + +#include +#include + +#include "helper_ops/align_types.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/split.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/op/util/framework_node.hpp" +#include "openvino/pass/pattern/matcher.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace pass { + +using namespace ov::op; + +AlignTypesRemoval::AlignTypesRemoval() { + auto align_types_pattern = ov::pass::pattern::wrap_type(); + + ov::matcher_pass_callback callback = [](ov::pass::pattern::Matcher& m) { + auto align_types = std::dynamic_pointer_cast(m.get_match_root()); + if (!align_types) + return false; + auto lhs_itype = align_types->get_input_element_type(0); + auto rhs_itype = align_types->get_input_element_type(1); + auto lhs_otype = align_types->get_output_element_type(0); + auto rhs_otype = align_types->get_output_element_type(1); + if (lhs_otype.is_static() && rhs_otype.is_static()) { + auto out1 = align_types->input_value(0); + auto out2 = align_types->input_value(1); + if (lhs_itype != lhs_otype) + out1 = std::make_shared(align_types->input_value(0), lhs_otype); + if (rhs_itype != rhs_otype) + out2 = std::make_shared(align_types->input_value(1), rhs_otype); + align_types->output(0).replace(out1); + align_types->output(1).replace(out2); + return true; + } + return false; + }; + + auto m = std::make_shared(align_types_pattern, + "ov::frontend::pytorch::pass::AlignTypesRemoval"); + this->register_matcher(m, callback); +}; + +} // namespace pass +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/transforms/align_types_removal.hpp b/src/frontends/pytorch/src/transforms/align_types_removal.hpp new file mode 100644 index 00000000000000..bba81df9e0e086 --- /dev/null +++ b/src/frontends/pytorch/src/transforms/align_types_removal.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" +#include "openvino/pass/pass.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace pass { + +class AlignTypesRemoval : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ov::frontend::pytorch::pass::AlignTypesRemoval"); + AlignTypesRemoval(); +}; + +} // namespace pass +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp b/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp index f8de5275b69ae8..67ea5f4f9e1ff9 100644 --- a/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp @@ -12,6 +12,7 @@ #include "openvino/pass/pattern/matcher.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "utils.hpp" +#include "utils_quantize.hpp" namespace ov { namespace frontend { @@ -38,22 +39,31 @@ AtenStackListConstructReplacer::AtenStackListConstructReplacer() { auto axis_node = pattern_map.at(axis).get_node_shared_ptr(); auto axis_const = std::dynamic_pointer_cast(axis_node); auto axis = axis_const->cast_vector(); + if (axis.size() != 1) { + add_exception_to_fw_node(stack, "aten::stack has multiple axes, only one is supported."); + return false; + } // Check if ListConstruct is an input if (auto list_construct_node = cast_fw_node(input_node, "prim::ListConstruct")) { const auto& list_inputs = list_construct_node->input_values(); - OutputVector node_vector; - auto zero = v0::Constant::create(element::i32, Shape{}, {0}); - // Iterate over values in ListConstruct - for (const auto& list_input : list_inputs) { - auto node = concat_list_construct(list_input); - auto unsqueezed_node = std::make_shared(node, axis_const); - node_vector.push_back(unsqueezed_node); + std::shared_ptr node; + if (auto compression = u4_compression_stack(list_inputs, axis[0])) { + node = compression; + } else { + OutputVector node_vector; + auto zero = v0::Constant::create(element::i32, Shape{}, {0}); + // Iterate over values in ListConstruct + for (const auto& list_input : list_inputs) { + auto node = concat_list_construct(list_input); + auto unsqueezed_node = std::make_shared(node, axis_const); + node_vector.push_back(unsqueezed_node); + } + // Concat vectors on provided axis + node = std::make_shared(node_vector, axis[0]); } - // Concat vectors on provided axis - auto concat = std::make_shared(node_vector, axis[0]); - copy_runtime_info_and_name(stack, {concat}, {input_node}); - replace_node(stack, concat); + copy_runtime_info_and_name(stack, {node}, {input_node}); + replace_node(stack, node); return true; } add_exception_to_fw_node(stack, "Unsupported case of aten::stack."); diff --git a/src/frontends/pytorch/src/transforms/string_equality_replacer.cpp b/src/frontends/pytorch/src/transforms/string_equality_replacer.cpp index 0219600799a3c0..f7e5e80b604a76 100644 --- a/src/frontends/pytorch/src/transforms/string_equality_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/string_equality_replacer.cpp @@ -26,16 +26,8 @@ using namespace ov::op; StringEqualityReplacer::StringEqualityReplacer() { auto framework_node_lhs = pattern::wrap_type(); auto framework_node_rhs = pattern::wrap_type(); - auto convert_lhs = pattern::wrap_type({framework_node_lhs}); - auto convert_like_lhs = pattern::wrap_type({framework_node_lhs, framework_node_rhs}); - auto convert_rhs = pattern::wrap_type({framework_node_rhs}); - auto convert_like_rhs = pattern::wrap_type({framework_node_rhs, framework_node_lhs}); - auto lhs_pattern = - std::make_shared(OutputVector{framework_node_lhs, convert_lhs, convert_like_lhs}); - auto rhs_pattern = - std::make_shared(OutputVector{framework_node_rhs, convert_rhs, convert_like_rhs}); - auto equal_op = pattern::wrap_type({lhs_pattern, rhs_pattern}); - auto not_equal_op = pattern::wrap_type({lhs_pattern, rhs_pattern}); + auto equal_op = pattern::wrap_type({framework_node_lhs, framework_node_rhs}); + auto not_equal_op = pattern::wrap_type({framework_node_lhs, framework_node_rhs}); auto string_equality_pattern = std::make_shared(OutputVector{equal_op, not_equal_op}); diff --git a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp new file mode 100644 index 00000000000000..e08ebd728b050e --- /dev/null +++ b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp @@ -0,0 +1,98 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "u4_block_repack.hpp" + +#include "openvino/core/rt_info.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/pass/pattern/matcher.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "utils.hpp" +#include "utils_quantize.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace pass { + +using namespace ov::op; +using namespace ov::pass::pattern; + +U4BlockRepack::U4BlockRepack() { + const auto m_constant = ov::pass::pattern::wrap_type(); + const auto m_reshape1 = ov::pass::pattern::wrap_type({m_constant, any_input()}); + const auto m_transpose = ov::pass::pattern::wrap_type({m_reshape1, any_input()}); + const auto m_reshape2 = ov::pass::pattern::wrap_type({m_transpose, any_input()}); + + auto pack_byte = [](uint8_t lo, uint8_t hi) { + return (hi << 4) | (lo & 0x0F); + }; // swap halfs because Convert op assumes this layout + + auto get_u4 = [](const uint8_t* src, size_t idx) { + const size_t byte_idx = idx / 2; + const uint8_t bit_shift = 4 * (idx % 2); + return (src[byte_idx] >> bit_shift) & 0xF; + }; + + register_matcher( + std::make_shared(m_reshape2, "ov::frontend::pytorch::pass::U4BlockRepack"), + [=](ov::pass::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); + auto constant = + std::dynamic_pointer_cast(pattern_to_output[m_constant].get_node_shared_ptr()); + if (!constant) + return false; + auto reshape1 = pattern_to_output[m_reshape1].get_node_shared_ptr(); + auto transpose = pattern_to_output[m_transpose].get_node_shared_ptr(); + auto reshape2 = pattern_to_output[m_reshape2].get_node_shared_ptr(); + + if (constant->get_element_type() != element::u4) + return false; + + // FIXME: Check reshape/transpose/reshape target shapes and axes permutation; now they are supposed to be + // always in expected form + + auto source_shape = reshape1->get_output_shape(0); + + if (source_shape.size() != 3) + return false; + + auto destination_shape = reshape2->get_output_shape(0); + + size_t n_blocks = source_shape[0]; + size_t block_height = source_shape[1]; + size_t lane_size = source_shape[2]; // size in u4 units + size_t block_size = block_height * lane_size / 2; // size in bytes + + auto src = constant->get_data_ptr(); + + auto new_const = std::make_shared(element::u4, destination_shape); + auto dst = const_cast( // const_cast? + reinterpret_cast(new_const->get_data_ptr())); // TODO: How to better accees u4 data? + + for (size_t iblock = 0; iblock < n_blocks; ++iblock) { + auto src_block = src + iblock * block_size; + auto dst_block = dst + iblock * block_size; + for (size_t i = 0; i < lane_size; ++i) { + for (size_t j = 0; j < block_height / 2; ++j) { // /2 because we handle two bytes at once + uint8_t lo = get_u4(src_block, 2 * j * lane_size + i); + uint8_t hi = get_u4(src_block, (2 * j + 1) * lane_size + i); + dst_block[i * block_height / 2 + j] = pack_byte(lo, hi); + } + } + } + + copy_runtime_info(NodeVector{constant, reshape1, transpose, reshape2}, new_const); + replace_node(reshape2, new_const); + + return true; + }); +}; + +} // namespace pass +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/transforms/u4_block_repack.hpp b/src/frontends/pytorch/src/transforms/u4_block_repack.hpp new file mode 100644 index 00000000000000..aa6e00f70e564c --- /dev/null +++ b/src/frontends/pytorch/src/transforms/u4_block_repack.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" +#include "openvino/pass/pass.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace pass { + +class U4BlockRepack : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ov::frontend::pytorch::pass::U4BlockRepack"); + U4BlockRepack(); +}; + +} // namespace pass +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/utils.cpp b/src/frontends/pytorch/src/utils.cpp index afd9b5ebf85216..7decae35b30bbb 100644 --- a/src/frontends/pytorch/src/utils.cpp +++ b/src/frontends/pytorch/src/utils.cpp @@ -4,6 +4,7 @@ #include "utils.hpp" +#include "helper_ops/align_types.hpp" #include "op_table.hpp" #include "openvino/core/rt_info.hpp" #include "openvino/frontend/pytorch/decoder.hpp" @@ -381,33 +382,17 @@ std::unordered_map bit_to_int{ }; } // namespace -void align_eltwise_input_types(const NodeContext& context, Output& lhs, Output& rhs, bool align_scalars) { +element::Type infer_types(const Output& lhs, const Output& rhs, bool align_scalars) { const auto& lhs_type = lhs.get_element_type(); const auto& rhs_type = rhs.get_element_type(); - auto out_type = context.get_output_type(0); - if (out_type.is()) { - auto otype = out_type.as(); - if (otype.is_real()) { - if (otype != lhs_type) { - lhs = context.mark_node(std::make_shared(lhs, otype)); - } - if (otype != rhs_type) { - rhs = context.mark_node(std::make_shared(rhs, otype)); - } - return; - } - } if (lhs_type.is_dynamic() || rhs_type.is_dynamic()) { - // if any of types is not known, align to lhs type. - // TODO: can be fixed with special operation? - rhs = context.mark_node(std::make_shared(rhs, lhs)); - return; + return element::dynamic; } // Both types are static, align types. If float and int types are used convert int type to f32, after that align // to the largest bitness, if both float or both int, just align bitness if (lhs_type == rhs_type) - return; + return lhs_type; // if one of operands is scalar, the resulting type is taken from the other operand except when scalar is float // type and other operand is int, in that case BOTH operands get fp32 type @@ -429,11 +414,9 @@ void align_eltwise_input_types(const NodeContext& context, Output& lhs, Ou if (!align_scalars) rhs_dst_type = element::f32; } else if (is_lhs_scalar && rhs_type != element::boolean) { - lhs = context.mark_node(std::make_shared(lhs, rhs)); - return; + return rhs_type; } else if (is_rhs_scalar && lhs_type != element::boolean) { - rhs = context.mark_node(std::make_shared(rhs, lhs)); - return; + return lhs_type; } if (!lhs_dst_type.is_real() && rhs_dst_type.is_real()) { @@ -470,13 +453,39 @@ void align_eltwise_input_types(const NodeContext& context, Output& lhs, Ou } } } + return lhs_dst_type; +} - // Cast to destination types - if (lhs_dst_type != lhs_type) { - lhs = context.mark_node(std::make_shared(lhs, lhs_dst_type)); +void align_eltwise_input_types(const NodeContext& context, Output& lhs, Output& rhs, bool align_scalars) { + const auto& lhs_type = lhs.get_element_type(); + const auto& rhs_type = rhs.get_element_type(); + auto out_type = context.get_output_type(0); + if (out_type.is()) { + auto otype = out_type.as(); + if (otype.is_real()) { + if (otype != lhs_type) { + lhs = context.mark_node(std::make_shared(lhs, otype)); + } + if (otype != rhs_type) { + rhs = context.mark_node(std::make_shared(rhs, otype)); + } + return; + } + } + auto dst_type = infer_types(lhs, rhs, align_scalars); + if (dst_type.is_dynamic()) { + // We can't decide the type at this point, create a special operation + auto at = std::make_shared(lhs, rhs, align_scalars); + lhs = at->output(0); + rhs = at->output(1); + return; + } + // Cast to destination type + if (dst_type != lhs_type) { + lhs = context.mark_node(std::make_shared(lhs, dst_type)); } - if (rhs_dst_type != rhs_type) { - rhs = context.mark_node(std::make_shared(rhs, rhs_dst_type)); + if (dst_type != rhs_type) { + rhs = context.mark_node(std::make_shared(rhs, dst_type)); } } diff --git a/src/frontends/pytorch/src/utils.hpp b/src/frontends/pytorch/src/utils.hpp index 1635296e612dff..20bae6fa62f5c3 100644 --- a/src/frontends/pytorch/src/utils.hpp +++ b/src/frontends/pytorch/src/utils.hpp @@ -7,6 +7,7 @@ #include "openvino/frontend/pytorch/node_context.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/convert.hpp" +#include "openvino/op/convert_like.hpp" namespace ov { @@ -65,11 +66,11 @@ Any simplified_type_interpret(Any type); void add_exception_to_fw_node(std::shared_ptr node, const std::string& msg); +element::Type infer_types(const Output& lhs, const Output& rhs, bool align_scalars); void align_eltwise_input_types(const NodeContext& context, Output& lhs, Output& rhs, bool align_scalars = false); - void align_output_types(const NodeContext& context, OutputVector& outputs); std::deque> get_list_as_outputs(const Output& start); @@ -125,12 +126,31 @@ OutputVector translate_1to1_match_2_inputs_align_types(const NodeContext& contex FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(0) && !context.input_is_none(1), "Inputs should not be None."); auto lhs = context.get_input(0); auto rhs = context.get_input(1); - align_eltwise_input_types(context, lhs, rhs, true); + auto lhs_type = context.get_input_type(0); + auto rhs_type = context.get_input_type(1); + // If type is string or None, we shouldn't align + if (!lhs_type.is() && !rhs_type.is() && !lhs_type.is() && + !rhs_type.is()) + align_eltwise_input_types(context, lhs, rhs, true); OutputVector res = {context.mark_node(std::make_shared(lhs, rhs))}; align_output_types(context, res); return res; } +template +OutputVector inplace_translate_1to1_match_2_inputs_align_types(const NodeContext& context) { + num_inputs_check(context, 2, 2); + FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(0) && !context.input_is_none(1), "Inputs should not be None."); + auto lhs = context.get_input(0); + auto rhs = context.get_input(1); + // For inplace op we know direction of type alignment + if (lhs.get_element_type().is_dynamic() || lhs.get_element_type() != rhs.get_element_type()) + rhs = context.mark_node(std::make_shared(rhs, lhs)); + OutputVector res = {context.mark_node(std::make_shared(lhs, rhs))}; + context.mutate_input(idx, res[0]); + return res; +} + inline OutputVector return_false_scalar(const NodeContext& context) { return {context.mark_node(ov::op::v0::Constant::create(element::boolean, Shape{}, {false}))}; } @@ -158,6 +178,9 @@ class DummyDecoder : public TorchDecoder { virtual PartialShape get_input_shape(size_t index) const override { FRONT_END_NOT_IMPLEMENTED(get_input_shape); } + virtual const std::vector& get_input_strides(size_t index) const override { + FRONT_END_NOT_IMPLEMENTED(get_input_strides); + } virtual Any get_input_type(size_t index) const override { FRONT_END_NOT_IMPLEMENTED(get_input_type); } @@ -165,7 +188,7 @@ class DummyDecoder : public TorchDecoder { FRONT_END_NOT_IMPLEMENTED(get_output_debug_name); } virtual PartialShape get_output_shape(size_t index) const override { - FRONT_END_NOT_IMPLEMENTED(get_output_shape); + return PartialShape::dynamic(); } virtual Any get_output_type(size_t index) const override { FRONT_END_NOT_IMPLEMENTED(get_output_type); @@ -186,7 +209,7 @@ class DummyDecoder : public TorchDecoder { FRONT_END_NOT_IMPLEMENTED(get_op_type); } virtual const std::string& get_schema() const override { - FRONT_END_NOT_IMPLEMENTED(get_schema); + return m_schema; } virtual size_t num_of_outputs() const override { FRONT_END_NOT_IMPLEMENTED(num_of_outputs); @@ -215,6 +238,9 @@ class DummyDecoder : public TorchDecoder { virtual OutputVector inlined_inputs(size_t start_index) const override { FRONT_END_NOT_IMPLEMENTED(inlined_inputs); } + +private: + const std::string m_schema = "NONE"; }; } // namespace pytorch diff --git a/src/frontends/pytorch/src/utils_quantize.cpp b/src/frontends/pytorch/src/utils_quantize.cpp index 5af546f3d5be5d..1346fd76971fcc 100644 --- a/src/frontends/pytorch/src/utils_quantize.cpp +++ b/src/frontends/pytorch/src/utils_quantize.cpp @@ -6,6 +6,7 @@ #include "openvino/frontend/pytorch/node_context.hpp" #include "openvino/op/broadcast.hpp" +#include "openvino/op/constant.hpp" #include "openvino/op/convert.hpp" #include "openvino/op/convert_like.hpp" #include "openvino/op/fake_quantize.hpp" @@ -13,6 +14,7 @@ #include "openvino/op/reshape.hpp" #include "openvino/op/scatter_elements_update.hpp" #include "openvino/op/subtract.hpp" +#include "transformations/utils/utils.hpp" namespace ov { namespace frontend { @@ -168,6 +170,53 @@ std::shared_ptr cast_quantized_fw_node(std::shared_ptr no return quant_node; } +std::shared_ptr u4_compression_stack(const OutputVector& list_elems, int64_t axis) { + // Part 1: Detect pattern + + if (list_elems.size() != 2) + return nullptr; + auto bitwise_and = cast_fw_node(list_elems[0].get_node_shared_ptr(), "aten::bitwise_and"); + if (!bitwise_and) + return nullptr; + auto bitwise_shift = cast_fw_node(list_elems[1].get_node_shared_ptr(), "aten::bitwise_right_shift"); + if (!bitwise_shift) + return nullptr; + + auto weights_u8 = std::dynamic_pointer_cast(bitwise_and->get_input_node_shared_ptr(0)); + if (weights_u8 != std::dynamic_pointer_cast(bitwise_shift->get_input_node_shared_ptr(0))) + return nullptr; + + if (weights_u8->get_output_element_type(0) != element::u8) + return nullptr; + + if (axis != -1 && static_cast(axis) != weights_u8->get_shape().size() - 1) + return nullptr; + + if (!ov::op::util::has_constant_value(bitwise_and->get_input_node_shared_ptr(1), 0x0F)) + return nullptr; + + if (!ov::op::util::has_constant_value(bitwise_shift->get_input_node_shared_ptr(1), 4)) + return nullptr; + + // Pattern detected, weights_u8 is target u8 packed constant with weights + + // Part 2: Form u4 constant by repacking of the original weights_u8 + // Repacking transformes half of lanes to interleaved representation. + + auto u8_shape = weights_u8->get_shape(); + size_t full_size = shape_size(u8_shape); + auto src = weights_u8->get_data_ptr(); + + auto u4_shape = u8_shape; + u4_shape.push_back(2); + auto new_const = std::make_shared(element::u4, u4_shape); + auto dst = const_cast(reinterpret_cast(new_const->get_data_ptr())); + + std::copy(src, src + full_size, dst); // TODO: Avoid copying, reuse the same constant + copy_runtime_info_and_name(weights_u8, {new_const}, {weights_u8, bitwise_and, bitwise_shift}); + return new_const; +} + } // namespace pytorch } // namespace frontend } // namespace ov diff --git a/src/frontends/pytorch/src/utils_quantize.hpp b/src/frontends/pytorch/src/utils_quantize.hpp index 69917e7b8bce3e..e02bce880d2480 100644 --- a/src/frontends/pytorch/src/utils_quantize.hpp +++ b/src/frontends/pytorch/src/utils_quantize.hpp @@ -166,6 +166,12 @@ OutputVector quantizable_op(const NodeContext& context) { } } // namespace op +/** + * Captures aten::stack([aten::bitwise_and(Constant(u8)), aten::bitwise_right_shift(Constant(u8))], dim=-1). + * This pattern is transformed to a single Constant with element_type=u4. + */ +std::shared_ptr u4_compression_stack(const OutputVector& list_elems, int64_t axis); + } // namespace pytorch } // namespace frontend } // namespace ov diff --git a/src/frontends/tensorflow/README.md b/src/frontends/tensorflow/README.md index 7fc421cd92c7f4..4a48203a2b41dc 100644 --- a/src/frontends/tensorflow/README.md +++ b/src/frontends/tensorflow/README.md @@ -205,6 +205,7 @@ py.test tensorflow_tests/test_tf_Unique.py --use_new_frontend ``` ## See also + * [Supported Operations](./docs/supported_ops.md) * [OpenVINO README](../../../README.md) * [OpenVINO Core Components](../../README.md) * [Developer documentation](../../../docs/dev/index.md) diff --git a/src/frontends/tensorflow/docs/supported_ops.md b/src/frontends/tensorflow/docs/supported_ops.md new file mode 100644 index 00000000000000..5794e3f16653fd --- /dev/null +++ b/src/frontends/tensorflow/docs/supported_ops.md @@ -0,0 +1,1406 @@ +# TensorFlow Operations Supported by OpenVINO TensorFlow Frontend + +Here is a table of operations supported by the TensorFlow Frontend from [tf.raw_ops](https://www.tensorflow.org/api_docs/python/tf/raw_ops). +A "supported operation" is one that TensorFlow Frontend can convert to the OpenVINO representation. + +| Operation Name | Supported | Limitation | +|---------------------------------------------------------|-------------------------------|-------------------------------| +| Abort | NO | | +| Abs | YES | | +| AccumulateNV2 | NO | | +| AccumulatorApplyGradient | NO | | +| AccumulatorNumAccumulated | NO | | +| AccumulatorSetGlobalStep | NO | | +| AccumulatorTakeGradient | NO | | +| Acos | YES | | +| Acosh | YES | | +| Add | YES | | +| AddManySparseToTensorsMap | NO | | +| AddN | YES | | +| AddSparseToTensorsMap | NO | | +| AddV2 | YES | | +| AdjustContrast | NO | | +| AdjustContrastv2 | YES | | +| AdjustHue | NO | | +| AdjustSaturation | NO | | +| All | YES | | +| AllCandidateSampler | NO | | +| AllToAll | NO | | +| Angle | NO | | +| AnonymousHashTable | NO | | +| AnonymousIterator | NO | | +| AnonymousIteratorV2 | NO | | +| AnonymousIteratorV3 | NO | | +| AnonymousMemoryCache | NO | | +| AnonymousMultiDeviceIterator | NO | | +| AnonymousMultiDeviceIteratorV3 | NO | | +| AnonymousMutableDenseHashTable | NO | | +| AnonymousMutableHashTable | NO | | +| AnonymousMutableHashTableOfTensors | NO | | +| AnonymousRandomSeedGenerator | NO | | +| AnonymousSeedGenerator | NO | | +| Any | YES | | +| ApplyAdaMax | NO | | +| ApplyAdadelta | NO | | +| ApplyAdagrad | NO | | +| ApplyAdagradDA | NO | | +| ApplyAdagradV2 | NO | | +| ApplyAdam | NO | | +| ApplyAddSign | NO | | +| ApplyCenteredRMSProp | NO | | +| ApplyFtrl | NO | | +| ApplyFtrlV2 | NO | | +| ApplyGradientDescent | NO | | +| ApplyMomentum | NO | | +| ApplyPowerSign | NO | | +| ApplyProximalAdagrad | NO | | +| ApplyProximalGradientDescent | NO | | +| ApplyRMSProp | NO | | +| ApproxTopK | NO | | +| ApproximateEqual | NO | | +| ArgMax | YES | | +| ArgMin | YES | | +| AsString | NO | | +| Asin | YES | | +| Asinh | YES | | +| Assert | YES | | +| AssertCardinalityDataset | NO | | +| AssertNextDataset | NO | | +| AssertPrevDataset | NO | | +| Assign | YES | | +| AssignAdd | NO | | +| AssignAddVariableOp | NO | | +| AssignSub | NO | | +| AssignSubVariableOp | NO | | +| AssignVariableOp | YES | | +| AssignVariableXlaConcatND | NO | | +| Atan | YES | | +| Atan2 | NO | | +| Atanh | YES | | +| AudioSpectrogram | NO | | +| AudioSummary | NO | | +| AudioSummaryV2 | NO | | +| AutoShardDataset | NO | | +| AvgPool | YES | | +| AvgPool3D | YES | | +| AvgPool3DGrad | NO | | +| AvgPoolGrad | NO | | +| BandedTriangularSolve | NO | | +| Barrier | NO | | +| BarrierClose | NO | | +| BarrierIncompleteSize | NO | | +| BarrierInsertMany | NO | | +| BarrierReadySize | NO | | +| BarrierTakeMany | NO | | +| Batch | NO | | +| BatchCholesky | NO | | +| BatchCholeskyGrad | NO | | +| BatchDataset | NO | | +| BatchDatasetV2 | NO | | +| BatchFFT | NO | | +| BatchFFT2D | NO | | +| BatchFFT3D | NO | | +| BatchFunction | NO | | +| BatchIFFT | NO | | +| BatchIFFT2D | NO | | +| BatchIFFT3D | NO | | +| BatchMatMul | YES | | +| BatchMatMulV2 | YES | | +| BatchMatMulV3 | YES | | +| BatchMatrixBandPart | NO | | +| BatchMatrixDeterminant | NO | | +| BatchMatrixDiag | NO | | +| BatchMatrixDiagPart | NO | | +| BatchMatrixInverse | NO | | +| BatchMatrixSetDiag | NO | | +| BatchMatrixSolve | NO | | +| BatchMatrixSolveLs | NO | | +| BatchMatrixTriangularSolve | NO | | +| BatchNormWithGlobalNormalization | NO | | +| BatchNormWithGlobalNormalizationGrad | NO | | +| BatchSelfAdjointEig | NO | | +| BatchSelfAdjointEigV2 | NO | | +| BatchSvd | NO | | +| BatchToSpace | NO | | +| BatchToSpaceND | YES | | +| BesselI0 | NO | | +| BesselI0e | NO | | +| BesselI1 | NO | | +| BesselI1e | NO | | +| BesselJ0 | NO | | +| BesselJ1 | NO | | +| BesselK0 | NO | | +| BesselK0e | NO | | +| BesselK1 | NO | | +| BesselK1e | NO | | +| BesselY0 | NO | | +| BesselY1 | NO | | +| Betainc | NO | | +| BiasAdd | YES | | +| BiasAddGrad | NO | | +| BiasAddV1 | NO | | +| Bincount | NO | | +| Bitcast | NO | | +| BitwiseAnd | NO | | +| BitwiseOr | NO | | +| BitwiseXor | NO | | +| BlockLSTM | YES | | +| BlockLSTMGrad | NO | | +| BlockLSTMGradV2 | NO | | +| BlockLSTMV2 | NO | | +| BoostedTreesAggregateStats | NO | | +| BoostedTreesBucketize | NO | | +| BoostedTreesCalculateBestFeatureSplit | NO | | +| BoostedTreesCalculateBestFeatureSplitV2 | NO | | +| BoostedTreesCalculateBestGainsPerFeature | NO | | +| BoostedTreesCenterBias | NO | | +| BoostedTreesCreateEnsemble | NO | | +| BoostedTreesCreateQuantileStreamResource | NO | | +| BoostedTreesDeserializeEnsemble | NO | | +| BoostedTreesEnsembleResourceHandleOp | NO | | +| BoostedTreesExampleDebugOutputs | NO | | +| BoostedTreesFlushQuantileSummaries | NO | | +| BoostedTreesGetEnsembleStates | NO | | +| BoostedTreesMakeQuantileSummaries | NO | | +| BoostedTreesMakeStatsSummary | NO | | +| BoostedTreesPredict | NO | | +| BoostedTreesQuantileStreamResourceAddSummaries | NO | | +| BoostedTreesQuantileStreamResourceDeserialize | NO | | +| BoostedTreesQuantileStreamResourceFlush | NO | | +| BoostedTreesQuantileStreamResourceGetBucketBoundaries | NO | | +| BoostedTreesQuantileStreamResourceHandleOp | NO | | +| BoostedTreesSerializeEnsemble | NO | | +| BoostedTreesSparseAggregateStats | NO | | +| BoostedTreesSparseCalculateBestFeatureSplit | NO | | +| BoostedTreesTrainingPredict | NO | | +| BoostedTreesUpdateEnsemble | NO | | +| BoostedTreesUpdateEnsembleV2 | NO | | +| BroadcastArgs | YES | | +| BroadcastGradientArgs | NO | | +| BroadcastTo | YES | | +| Bucketize | YES | | +| BytesProducedStatsDataset | NO | | +| CSRSparseMatrixComponents | NO | | +| CSRSparseMatrixToDense | NO | | +| CSRSparseMatrixToSparseTensor | NO | | +| CSVDataset | NO | | +| CSVDatasetV2 | NO | | +| CTCBeamSearchDecoder | NO | | +| CTCGreedyDecoder | YES | | +| CTCLoss | YES | | +| CTCLossV2 | NO | | +| CacheDataset | NO | | +| CacheDatasetV2 | NO | | +| Case | NO | | +| Cast | YES | | +| Ceil | YES | | +| CheckNumerics | YES | | +| CheckNumericsV2 | YES | | +| Cholesky | NO | | +| CholeskyGrad | NO | | +| ChooseFastestBranchDataset | NO | | +| ChooseFastestDataset | NO | | +| ClipByValue | YES | | +| CloseSummaryWriter | NO | | +| CollectiveAllToAllV2 | NO | | +| CollectiveAllToAllV3 | NO | | +| CollectiveAssignGroupV2 | NO | | +| CollectiveBcastRecv | NO | | +| CollectiveBcastRecvV2 | NO | | +| CollectiveBcastSend | NO | | +| CollectiveBcastSendV2 | NO | | +| CollectiveGather | NO | | +| CollectiveGatherV2 | NO | | +| CollectiveInitializeCommunicator | NO | | +| CollectivePermute | NO | | +| CollectiveReduce | NO | | +| CollectiveReduceScatterV2 | NO | | +| CollectiveReduceV2 | NO | | +| CollectiveReduceV3 | NO | | +| CombinedNonMaxSuppression | NO | | +| Complex | NO | | +| ComplexAbs | NO | | +| CompositeTensorVariantFromComponents | NO | | +| CompositeTensorVariantToComponents | NO | | +| CompressElement | NO | | +| ComputeAccidentalHits | NO | | +| ComputeBatchSize | NO | | +| Concat | YES | | +| ConcatOffset | NO | | +| ConcatV2 | YES | | +| ConcatenateDataset | NO | | +| ConditionalAccumulator | NO | | +| ConfigureDistributedTPU | NO | | +| ConfigureTPUEmbedding | NO | | +| Conj | NO | | +| ConjugateTranspose | NO | | +| Const | YES | | +| ConsumeMutexLock | NO | | +| ControlTrigger | NO | | +| Conv | NO | | +| Conv2D | YES | | +| Conv2DBackpropFilter | NO | | +| Conv2DBackpropFilterV2 | NO | | +| Conv2DBackpropInput | YES | | +| Conv2DBackpropInputV2 | NO | | +| Conv3D | YES | | +| Conv3DBackpropFilter | NO | | +| Conv3DBackpropFilterV2 | NO | | +| Conv3DBackpropInput | NO | | +| Conv3DBackpropInputV2 | YES | | +| Copy | NO | | +| CopyHost | NO | | +| Cos | YES | | +| Cosh | YES | | +| CountUpTo | NO | | +| CreateSummaryDbWriter | NO | | +| CreateSummaryFileWriter | NO | | +| CropAndResize | YES | | +| CropAndResizeGradBoxes | NO | | +| CropAndResizeGradImage | NO | | +| Cross | NO | | +| CrossReplicaSum | NO | | +| CudnnRNN | NO | | +| CudnnRNNBackprop | NO | | +| CudnnRNNBackpropV2 | NO | | +| CudnnRNNBackpropV3 | NO | | +| CudnnRNNCanonicalToParams | NO | | +| CudnnRNNCanonicalToParamsV2 | NO | | +| CudnnRNNParamsSize | NO | | +| CudnnRNNParamsToCanonical | NO | | +| CudnnRNNParamsToCanonicalV2 | NO | | +| CudnnRNNV2 | NO | | +| CudnnRNNV3 | NO | | +| Cumprod | NO | | +| Cumsum | YES | | +| CumulativeLogsumexp | NO | | +| DataFormatDimMap | NO | | +| DataFormatVecPermute | NO | | +| DataServiceDataset | NO | | +| DataServiceDatasetV2 | NO | | +| DataServiceDatasetV3 | NO | | +| DataServiceDatasetV4 | NO | | +| DatasetCardinality | NO | | +| DatasetFromGraph | NO | | +| DatasetToGraph | NO | | +| DatasetToGraphV2 | NO | | +| DatasetToSingleElement | NO | | +| DatasetToTFRecord | NO | | +| Dawsn | NO | | +| DebugGradientIdentity | NO | | +| DebugGradientRefIdentity | NO | | +| DebugIdentity | NO | | +| DebugIdentityV2 | NO | | +| DebugIdentityV3 | NO | | +| DebugNanCount | NO | | +| DebugNumericSummary | NO | | +| DebugNumericSummaryV2 | NO | | +| DecodeAndCropJpeg | NO | | +| DecodeBase64 | NO | | +| DecodeBmp | NO | | +| DecodeCSV | NO | | +| DecodeCompressed | NO | | +| DecodeGif | NO | | +| DecodeImage | NO | | +| DecodeJSONExample | NO | | +| DecodeJpeg | NO | | +| DecodePaddedRaw | NO | | +| DecodePng | NO | | +| DecodeProtoV2 | NO | | +| DecodeRaw | NO | | +| DecodeWav | NO | | +| DeepCopy | NO | | +| DeleteIterator | NO | | +| DeleteMemoryCache | NO | | +| DeleteMultiDeviceIterator | NO | | +| DeleteRandomSeedGenerator | NO | | +| DeleteSeedGenerator | NO | | +| DeleteSessionTensor | NO | | +| DenseBincount | NO | | +| DenseCountSparseOutput | NO | | +| DenseToCSRSparseMatrix | NO | | +| DenseToDenseSetOperation | NO | | +| DenseToSparseBatchDataset | NO | | +| DenseToSparseSetOperation | NO | | +| DepthToSpace | YES | | +| DepthwiseConv2dNative | YES | | +| DepthwiseConv2dNativeBackpropFilter | NO | | +| DepthwiseConv2dNativeBackpropInput | NO | | +| Dequantize | NO | | +| DeserializeIterator | NO | | +| DeserializeManySparse | NO | | +| DeserializeSparse | NO | | +| DestroyResourceOp | NO | | +| DestroyTemporaryVariable | NO | | +| DeviceIndex | NO | | +| Diag | NO | | +| DiagPart | NO | | +| Digamma | NO | | +| Dilation2D | NO | | +| Dilation2DBackpropFilter | NO | | +| Dilation2DBackpropInput | NO | | +| DirectedInterleaveDataset | NO | | +| DisableCopyOnRead | NO | | +| DistributedSave | NO | | +| Div | NO | | +| DivNoNan | YES | | +| DrawBoundingBoxes | NO | | +| DrawBoundingBoxesV2 | NO | | +| DummyIterationCounter | NO | | +| DummyMemoryCache | NO | | +| DummySeedGenerator | NO | | +| DynamicEnqueueTPUEmbeddingArbitraryTensorBatch | NO | | +| DynamicPartition | YES | | +| DynamicStitch | YES | | +| EagerPyFunc | NO | | +| EditDistance | NO | | +| Eig | NO | | +| Einsum | YES | | +| Elu | YES | | +| EluGrad | NO | | +| Empty | NO | | +| EmptyTensorList | YES | | +| EncodeBase64 | NO | | +| EncodeJpeg | NO | | +| EncodeJpegVariableQuality | NO | | +| EncodePng | NO | | +| EncodeProto | NO | | +| EncodeWav | NO | | +| EnqueueTPUEmbeddingArbitraryTensorBatch | NO | | +| EnqueueTPUEmbeddingIntegerBatch | NO | | +| EnqueueTPUEmbeddingRaggedTensorBatch | NO | | +| EnqueueTPUEmbeddingSparseBatch | NO | | +| EnqueueTPUEmbeddingSparseTensorBatch | NO | | +| EnsureShape | YES | | +| Enter | YES | | +| Equal | YES | | +| Erf | YES | | +| Erfc | NO | | +| Erfinv | NO | | +| EuclideanNorm | YES | | +| Exit | YES | | +| Exp | YES | | +| ExpandDims | YES | | +| ExperimentalAssertNextDataset | NO | | +| ExperimentalAutoShardDataset | NO | | +| ExperimentalBytesProducedStatsDataset | NO | | +| ExperimentalCSVDataset | NO | | +| ExperimentalChooseFastestDataset | NO | | +| ExperimentalDatasetCardinality | NO | | +| ExperimentalDatasetToTFRecord | NO | | +| ExperimentalDenseToSparseBatchDataset | NO | | +| ExperimentalDirectedInterleaveDataset | NO | | +| ExperimentalGroupByReducerDataset | NO | | +| ExperimentalGroupByWindowDataset | NO | | +| ExperimentalIgnoreErrorsDataset | NO | | +| ExperimentalIteratorGetDevice | NO | | +| ExperimentalLMDBDataset | NO | | +| ExperimentalLatencyStatsDataset | NO | | +| ExperimentalMapAndBatchDataset | NO | | +| ExperimentalMapDataset | NO | | +| ExperimentalMatchingFilesDataset | NO | | +| ExperimentalMaxIntraOpParallelismDataset | NO | | +| ExperimentalNonSerializableDataset | NO | | +| ExperimentalParallelInterleaveDataset | NO | | +| ExperimentalParseExampleDataset | NO | | +| ExperimentalPrivateThreadPoolDataset | NO | | +| ExperimentalRandomDataset | NO | | +| ExperimentalRebatchDataset | NO | | +| ExperimentalScanDataset | NO | | +| ExperimentalSetStatsAggregatorDataset | NO | | +| ExperimentalSleepDataset | NO | | +| ExperimentalSlidingWindowDataset | NO | | +| ExperimentalSqlDataset | NO | | +| ExperimentalStatsAggregatorHandle | NO | | +| ExperimentalStatsAggregatorSummary | NO | | +| ExperimentalTakeWhileDataset | NO | | +| ExperimentalThreadPoolDataset | NO | | +| ExperimentalThreadPoolHandle | NO | | +| ExperimentalUnbatchDataset | NO | | +| ExperimentalUniqueDataset | NO | | +| Expint | NO | | +| Expm1 | NO | | +| ExtractGlimpse | NO | | +| ExtractGlimpseV2 | NO | | +| ExtractImagePatches | YES | | +| ExtractJpegShape | NO | | +| ExtractVolumePatches | NO | | +| FFT | NO | | +| FFT2D | NO | | +| FFT3D | NO | | +| FIFOQueue | YES | | +| FIFOQueueV2 | YES | | +| Fact | NO | | +| FakeParam | NO | | +| FakeQuantWithMinMaxArgs | YES | | +| FakeQuantWithMinMaxArgsGradient | NO | | +| FakeQuantWithMinMaxVars | YES | | +| FakeQuantWithMinMaxVarsGradient | NO | | +| FakeQuantWithMinMaxVarsPerChannel | YES | | +| FakeQuantWithMinMaxVarsPerChannelGradient | NO | | +| FakeQueue | NO | | +| Fill | YES | | +| FilterByLastComponentDataset | NO | | +| FilterDataset | NO | | +| FinalizeDataset | NO | | +| Fingerprint | NO | | +| FixedLengthRecordDataset | NO | | +| FixedLengthRecordDatasetV2 | NO | | +| FixedLengthRecordReader | NO | | +| FixedLengthRecordReaderV2 | NO | | +| FixedUnigramCandidateSampler | NO | | +| FlatMapDataset | NO | | +| Floor | YES | | +| FloorDiv | YES | | +| FloorMod | YES | | +| FlushSummaryWriter | NO | | +| For | NO | | +| FractionalAvgPool | NO | | +| FractionalAvgPoolGrad | NO | | +| FractionalMaxPool | NO | | +| FractionalMaxPoolGrad | NO | | +| FresnelCos | NO | | +| FresnelSin | NO | | +| FusedBatchNorm | YES | | +| FusedBatchNormGrad | NO | | +| FusedBatchNormGradV2 | NO | | +| FusedBatchNormGradV3 | NO | | +| FusedBatchNormV2 | YES | | +| FusedBatchNormV3 | YES | | +| FusedPadConv2D | NO | | +| FusedResizeAndPadConv2D | NO | | +| GRUBlockCell | YES | | +| GRUBlockCellGrad | NO | | +| Gather | YES | | +| GatherNd | YES | | +| GatherV2 | YES | | +| GenerateBoundingBoxProposals | NO | | +| GenerateVocabRemapping | NO | | +| GeneratorDataset | NO | | +| GetElementAtIndex | NO | | +| GetOptions | NO | | +| GetSessionHandle | NO | | +| GetSessionHandleV2 | NO | | +| GetSessionTensor | NO | | +| Greater | YES | | +| GreaterEqual | YES | | +| GroupByReducerDataset | NO | | +| GroupByWindowDataset | NO | | +| GuaranteeConst | NO | | +| HSVToRGB | NO | | +| HashTable | YES | | +| HashTableV2 | YES | | +| HistogramFixedWidth | NO | | +| HistogramSummary | NO | | +| IFFT | NO | | +| IFFT2D | NO | | +| IFFT3D | NO | | +| IRFFT | NO | | +| IRFFT2D | NO | | +| IRFFT3D | NO | | +| Identity | YES | | +| IdentityN | YES | | +| IdentityReader | NO | | +| IdentityReaderV2 | NO | | +| If | YES | | +| Igamma | NO | | +| IgammaGradA | NO | | +| Igammac | NO | | +| IgnoreErrorsDataset | NO | | +| Imag | NO | | +| ImageProjectiveTransformV2 | NO | | +| ImageProjectiveTransformV3 | NO | | +| ImageSummary | NO | | +| ImmutableConst | NO | | +| ImportEvent | NO | | +| InTopK | NO | | +| InTopKV2 | NO | | +| InfeedDequeue | NO | | +| InfeedDequeueTuple | NO | | +| InfeedEnqueue | NO | | +| InfeedEnqueuePrelinearizedBuffer | NO | | +| InfeedEnqueueTuple | NO | | +| InitializeTable | NO | | +| InitializeTableFromDataset | NO | | +| InitializeTableFromTextFile | NO | | +| InitializeTableFromTextFileV2 | NO | | +| InitializeTableV2 | NO | | +| InplaceAdd | NO | | +| InplaceSub | NO | | +| InplaceUpdate | NO | | +| InterleaveDataset | NO | | +| Inv | NO | | +| InvGrad | NO | | +| Invert | NO | | +| InvertPermutation | YES | | +| IsBoostedTreesEnsembleInitialized | NO | | +| IsBoostedTreesQuantileStreamResourceInitialized | NO | | +| IsFinite | YES | | +| IsInf | YES | | +| IsNan | YES | | +| IsTPUEmbeddingInitialized | NO | | +| IsVariableInitialized | YES | | +| IsotonicRegression | NO | | +| Iterator | YES | | +| IteratorFromStringHandle | NO | | +| IteratorFromStringHandleV2 | NO | | +| IteratorGetDevice | NO | | +| IteratorGetNext | YES | | +| IteratorGetNextAsOptional | NO | | +| IteratorGetNextSync | NO | | +| IteratorToStringHandle | NO | | +| IteratorV2 | YES | | +| L2Loss | YES | | +| LMDBDataset | NO | | +| LMDBReader | NO | | +| LRN | YES | | +| LRNGrad | NO | | +| LSTMBlockCell | NO | | +| LSTMBlockCellGrad | NO | | +| LatencyStatsDataset | NO | | +| LeakyRelu | YES | | +| LeakyReluGrad | NO | | +| LearnedUnigramCandidateSampler | NO | | +| LeftShift | NO | | +| LegacyParallelInterleaveDatasetV2 | NO | | +| Less | YES | | +| LessEqual | YES | | +| Lgamma | NO | | +| LinSpace | YES | | +| ListDataset | NO | | +| ListDiff | YES | | +| LoadAndRemapMatrix | NO | | +| LoadDataset | NO | | +| LoadTPUEmbeddingADAMParameters | NO | | +| LoadTPUEmbeddingAdadeltaParameters | NO | | +| LoadTPUEmbeddingAdagradMomentumParameters | NO | | +| LoadTPUEmbeddingAdagradParameters | NO | | +| LoadTPUEmbeddingCenteredRMSPropParameters | NO | | +| LoadTPUEmbeddingFTRLParameters | NO | | +| LoadTPUEmbeddingFrequencyEstimatorParameters | NO | | +| LoadTPUEmbeddingMDLAdagradLightParameters | NO | | +| LoadTPUEmbeddingMomentumParameters | NO | | +| LoadTPUEmbeddingProximalAdagradParameters | NO | | +| LoadTPUEmbeddingProximalYogiParameters | NO | | +| LoadTPUEmbeddingRMSPropParameters | NO | | +| LoadTPUEmbeddingStochasticGradientDescentParameters | NO | | +| Log | YES | | +| Log1p | YES | | +| LogMatrixDeterminant | NO | | +| LogSoftmax | YES | | +| LogUniformCandidateSampler | NO | | +| LogicalAnd | YES | | +| LogicalNot | YES | | +| LogicalOr | YES | | +| LookupTableExport | NO | | +| LookupTableExportV2 | NO | | +| LookupTableFind | NO | | +| LookupTableFindV2 | NO | | +| LookupTableImport | NO | | +| LookupTableImportV2 | NO | | +| LookupTableInsert | YES | | +| LookupTableInsertV2 | YES | | +| LookupTableRemoveV2 | NO | | +| LookupTableSize | NO | | +| LookupTableSizeV2 | NO | | +| LoopCond | YES | | +| LowerBound | NO | | +| Lu | NO | | +| MakeIterator | NO | | +| MapAndBatchDataset | NO | | +| MapClear | NO | | +| MapDataset | NO | | +| MapDefun | NO | | +| MapIncompleteSize | NO | | +| MapPeek | NO | | +| MapSize | NO | | +| MapStage | NO | | +| MapUnstage | NO | | +| MapUnstageNoKey | NO | | +| MatMul | YES | | +| MatchingFiles | NO | | +| MatchingFilesDataset | NO | | +| MatrixBandPart | NO | | +| MatrixDeterminant | NO | | +| MatrixDiag | YES | | +| MatrixDiagPart | NO | | +| MatrixDiagPartV2 | NO | | +| MatrixDiagPartV3 | NO | | +| MatrixDiagV2 | NO | | +| MatrixDiagV3 | NO | | +| MatrixExponential | NO | | +| MatrixInverse | NO | | +| MatrixLogarithm | NO | | +| MatrixSetDiag | NO | | +| MatrixSetDiagV2 | NO | | +| MatrixSetDiagV3 | NO | | +| MatrixSolve | NO | | +| MatrixSolveLs | NO | | +| MatrixSquareRoot | NO | | +| MatrixTriangularSolve | NO | | +| Max | YES | | +| MaxIntraOpParallelismDataset | NO | | +| MaxPool | YES | | +| MaxPool3D | YES | | +| MaxPool3DGrad | NO | | +| MaxPool3DGradGrad | NO | | +| MaxPoolGrad | NO | | +| MaxPoolGradGrad | NO | | +| MaxPoolGradGradV2 | NO | | +| MaxPoolGradGradWithArgmax | NO | | +| MaxPoolGradV2 | NO | | +| MaxPoolGradWithArgmax | NO | | +| MaxPoolV2 | YES | | +| MaxPoolWithArgmax | YES | | +| Maximum | YES | | +| Mean | YES | | +| Merge | YES | | +| MergeSummary | NO | | +| MergeV2Checkpoints | YES | | +| Mfcc | NO | | +| Min | YES | | +| Minimum | YES | | +| MirrorPad | YES | | +| MirrorPadGrad | NO | | +| Mod | YES | | +| ModelDataset | NO | | +| Mul | YES | | +| MulNoNan | NO | | +| MultiDeviceIterator | NO | | +| MultiDeviceIteratorFromStringHandle | NO | | +| MultiDeviceIteratorGetNextFromShard | NO | | +| MultiDeviceIteratorInit | NO | | +| MultiDeviceIteratorToStringHandle | NO | | +| Multinomial | NO | | +| MutableDenseHashTable | NO | | +| MutableDenseHashTableV2 | NO | | +| MutableHashTable | YES | | +| MutableHashTableOfTensors | NO | | +| MutableHashTableOfTensorsV2 | NO | | +| MutableHashTableV2 | YES | | +| MutexLock | NO | | +| MutexV2 | NO | | +| NcclAllReduce | NO | | +| NcclBroadcast | NO | | +| NcclReduce | NO | | +| Ndtri | NO | | +| Neg | YES | | +| NextAfter | NO | | +| NextIteration | YES | | +| NoOp | YES | | +| NonDeterministicInts | NO | | +| NonMaxSuppression | YES | | +| NonMaxSuppressionV2 | YES | | +| NonMaxSuppressionV3 | YES | | +| NonMaxSuppressionV4 | YES | | +| NonMaxSuppressionV5 | YES | | +| NonMaxSuppressionWithOverlaps | NO | | +| NonSerializableDataset | NO | | +| NotEqual | YES | | +| NthElement | NO | | +| OneHot | YES | | +| OneShotIterator | YES | | +| OnesLike | YES | | +| OptimizeDataset | NO | | +| OptimizeDatasetV2 | NO | | +| OptionalFromValue | NO | | +| OptionalGetValue | NO | | +| OptionalHasValue | NO | | +| OptionalNone | NO | | +| OptionsDataset | NO | | +| OrderedMapClear | NO | | +| OrderedMapIncompleteSize | NO | | +| OrderedMapPeek | NO | | +| OrderedMapSize | NO | | +| OrderedMapStage | NO | | +| OrderedMapUnstage | NO | | +| OrderedMapUnstageNoKey | NO | | +| OutfeedDequeue | NO | | +| OutfeedDequeueTuple | NO | | +| OutfeedDequeueTupleV2 | NO | | +| OutfeedDequeueV2 | NO | | +| OutfeedEnqueue | NO | | +| OutfeedEnqueueTuple | NO | | +| Pack | YES | | +| Pad | YES | | +| PadV2 | YES | | +| PaddedBatchDataset | NO | | +| PaddedBatchDatasetV2 | NO | | +| PaddingFIFOQueue | NO | | +| PaddingFIFOQueueV2 | NO | | +| ParallelBatchDataset | NO | | +| ParallelConcat | NO | | +| ParallelDynamicStitch | YES | | +| ParallelFilterDataset | NO | | +| ParallelInterleaveDataset | NO | | +| ParallelInterleaveDatasetV2 | NO | | +| ParallelInterleaveDatasetV3 | NO | | +| ParallelInterleaveDatasetV4 | NO | | +| ParallelMapDataset | NO | | +| ParallelMapDatasetV2 | NO | | +| ParameterizedTruncatedNormal | NO | | +| ParseExample | NO | | +| ParseExampleDataset | NO | | +| ParseExampleDatasetV2 | NO | | +| ParseExampleV2 | NO | | +| ParseSequenceExample | NO | | +| ParseSequenceExampleV2 | NO | | +| ParseSingleExample | NO | | +| ParseSingleSequenceExample | NO | | +| ParseTensor | NO | | +| PartitionedCall | YES | | +| Placeholder | YES | | +| PlaceholderV2 | NO | | +| PlaceholderWithDefault | YES | | +| Polygamma | NO | | +| PopulationCount | NO | | +| Pow | YES | | +| PrefetchDataset | NO | | +| Prelinearize | NO | | +| PrelinearizeTuple | NO | | +| PreventGradient | YES | | +| Print | NO | | +| PrintV2 | NO | | +| PriorityQueue | NO | | +| PriorityQueueV2 | NO | | +| PrivateThreadPoolDataset | NO | | +| Prod | YES | | +| PyFunc | NO | | +| PyFuncStateless | NO | | +| Qr | NO | | +| QuantizeAndDequantize | NO | | +| QuantizeAndDequantizeV2 | NO | | +| QuantizeAndDequantizeV3 | NO | | +| QuantizeAndDequantizeV4 | NO | | +| QuantizeAndDequantizeV4Grad | NO | | +| QuantizeDownAndShrinkRange | NO | | +| QuantizeV2 | NO | | +| QuantizedAdd | NO | | +| QuantizedAvgPool | NO | | +| QuantizedBatchNormWithGlobalNormalization | NO | | +| QuantizedBiasAdd | NO | | +| QuantizedConcat | NO | | +| QuantizedConv2D | NO | | +| QuantizedConv2DAndRelu | NO | | +| QuantizedConv2DAndReluAndRequantize | NO | | +| QuantizedConv2DAndRequantize | NO | | +| QuantizedConv2DPerChannel | NO | | +| QuantizedConv2DWithBias | NO | | +| QuantizedConv2DWithBiasAndRelu | NO | | +| QuantizedConv2DWithBiasAndReluAndRequantize | NO | | +| QuantizedConv2DWithBiasAndRequantize | NO | | +| QuantizedConv2DWithBiasSignedSumAndReluAndRequantize | NO | | +| QuantizedConv2DWithBiasSumAndRelu | NO | | +| QuantizedConv2DWithBiasSumAndReluAndRequantize | NO | | +| QuantizedDepthwiseConv2D | NO | | +| QuantizedDepthwiseConv2DWithBias | NO | | +| QuantizedDepthwiseConv2DWithBiasAndRelu | NO | | +| QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize | NO | | +| QuantizedInstanceNorm | NO | | +| QuantizedMatMul | NO | | +| QuantizedMatMulWithBias | NO | | +| QuantizedMatMulWithBiasAndDequantize | NO | | +| QuantizedMatMulWithBiasAndRelu | NO | | +| QuantizedMatMulWithBiasAndReluAndRequantize | NO | | +| QuantizedMatMulWithBiasAndRequantize | NO | | +| QuantizedMaxPool | NO | | +| QuantizedMul | NO | | +| QuantizedRelu | NO | | +| QuantizedRelu6 | NO | | +| QuantizedReluX | NO | | +| QuantizedReshape | NO | | +| QuantizedResizeBilinear | NO | | +| QueueClose | NO | | +| QueueCloseV2 | NO | | +| QueueDequeue | YES | | +| QueueDequeueMany | YES | | +| QueueDequeueManyV2 | NO | | +| QueueDequeueUpTo | YES | | +| QueueDequeueUpToV2 | YES | | +| QueueDequeueV2 | YES | | +| QueueEnqueue | NO | | +| QueueEnqueueMany | NO | | +| QueueEnqueueManyV2 | NO | | +| QueueEnqueueV2 | NO | | +| QueueIsClosed | NO | | +| QueueIsClosedV2 | NO | | +| QueueSize | NO | | +| QueueSizeV2 | NO | | +| RFFT | NO | | +| RFFT2D | NO | | +| RFFT3D | NO | | +| RGBToHSV | NO | | +| RaggedBincount | NO | | +| RaggedCountSparseOutput | NO | | +| RaggedCross | NO | | +| RaggedFillEmptyRows | NO | | +| RaggedFillEmptyRowsGrad | NO | | +| RaggedGather | NO | | +| RaggedRange | NO | | +| RaggedTensorFromVariant | NO | | +| RaggedTensorToSparse | NO | | +| RaggedTensorToTensor | NO | | +| RaggedTensorToVariant | NO | | +| RaggedTensorToVariantGradient | NO | | +| RandomCrop | NO | | +| RandomDataset | NO | | +| RandomDatasetV2 | NO | | +| RandomGamma | NO | | +| RandomGammaGrad | NO | | +| RandomIndexShuffle | NO | | +| RandomPoisson | NO | | +| RandomPoissonV2 | NO | | +| RandomShuffle | NO | | +| RandomShuffleQueue | NO | | +| RandomShuffleQueueV2 | NO | | +| RandomStandardNormal | NO | | +| RandomUniform | YES | | +| RandomUniformInt | YES | | +| Range | YES | | +| RangeDataset | NO | | +| Rank | YES | | +| ReadFile | NO | | +| ReadVariableOp | YES | | +| ReadVariableXlaSplitND | NO | | +| ReaderNumRecordsProduced | NO | | +| ReaderNumRecordsProducedV2 | NO | | +| ReaderNumWorkUnitsCompleted | NO | | +| ReaderNumWorkUnitsCompletedV2 | NO | | +| ReaderRead | NO | | +| ReaderReadUpTo | NO | | +| ReaderReadUpToV2 | NO | | +| ReaderReadV2 | NO | | +| ReaderReset | NO | | +| ReaderResetV2 | NO | | +| ReaderRestoreState | NO | | +| ReaderRestoreStateV2 | NO | | +| ReaderSerializeState | NO | | +| ReaderSerializeStateV2 | NO | | +| Real | NO | | +| RealDiv | YES | | +| RebatchDataset | NO | | +| RebatchDatasetV2 | NO | | +| Reciprocal | YES | | +| ReciprocalGrad | NO | | +| RecordInput | NO | | +| Recv | NO | | +| RecvTPUEmbeddingActivations | NO | | +| ReduceDataset | NO | | +| ReduceJoin | NO | | +| RefEnter | NO | | +| RefExit | NO | | +| RefIdentity | NO | | +| RefMerge | NO | | +| RefNextIteration | NO | | +| RefSelect | NO | | +| RefSwitch | NO | | +| RegexFullMatch | NO | | +| RegexReplace | NO | | +| RegisterDataset | NO | | +| RegisterDatasetV2 | NO | | +| Relu | YES | | +| Relu6 | YES | | +| Relu6Grad | NO | | +| ReluGrad | NO | | +| RemoteCall | NO | | +| RepeatDataset | NO | | +| RequantizationRange | NO | | +| RequantizationRangePerChannel | NO | | +| Requantize | NO | | +| RequantizePerChannel | NO | | +| Reshape | YES | | +| ResizeArea | NO | | +| ResizeBicubic | NO | | +| ResizeBicubicGrad | NO | | +| ResizeBilinear | YES | | +| ResizeBilinearGrad | NO | | +| ResizeNearestNeighbor | YES | | +| ResizeNearestNeighborGrad | NO | | +| ResourceAccumulatorApplyGradient | NO | | +| ResourceAccumulatorNumAccumulated | NO | | +| ResourceAccumulatorSetGlobalStep | NO | | +| ResourceAccumulatorTakeGradient | NO | | +| ResourceApplyAdaMax | NO | | +| ResourceApplyAdadelta | NO | | +| ResourceApplyAdagrad | NO | | +| ResourceApplyAdagradDA | NO | | +| ResourceApplyAdagradV2 | NO | | +| ResourceApplyAdam | NO | | +| ResourceApplyAdamWithAmsgrad | NO | | +| ResourceApplyAddSign | NO | | +| ResourceApplyCenteredRMSProp | NO | | +| ResourceApplyFtrl | NO | | +| ResourceApplyFtrlV2 | NO | | +| ResourceApplyGradientDescent | NO | | +| ResourceApplyKerasMomentum | NO | | +| ResourceApplyMomentum | NO | | +| ResourceApplyPowerSign | NO | | +| ResourceApplyProximalAdagrad | NO | | +| ResourceApplyProximalGradientDescent | NO | | +| ResourceApplyRMSProp | NO | | +| ResourceConditionalAccumulator | NO | | +| ResourceCountUpTo | NO | | +| ResourceGather | YES | | +| ResourceGatherNd | NO | | +| ResourceScatterAdd | NO | | +| ResourceScatterDiv | NO | | +| ResourceScatterMax | NO | | +| ResourceScatterMin | NO | | +| ResourceScatterMul | NO | | +| ResourceScatterNdAdd | NO | | +| ResourceScatterNdMax | NO | | +| ResourceScatterNdMin | NO | | +| ResourceScatterNdSub | NO | | +| ResourceScatterNdUpdate | NO | | +| ResourceScatterSub | NO | | +| ResourceScatterUpdate | NO | | +| ResourceSparseApplyAdadelta | NO | | +| ResourceSparseApplyAdagrad | NO | | +| ResourceSparseApplyAdagradDA | NO | | +| ResourceSparseApplyAdagradV2 | NO | | +| ResourceSparseApplyCenteredRMSProp | NO | | +| ResourceSparseApplyFtrl | NO | | +| ResourceSparseApplyFtrlV2 | NO | | +| ResourceSparseApplyKerasMomentum | NO | | +| ResourceSparseApplyMomentum | NO | | +| ResourceSparseApplyProximalAdagrad | NO | | +| ResourceSparseApplyProximalGradientDescent | NO | | +| ResourceSparseApplyRMSProp | NO | | +| ResourceStridedSliceAssign | NO | | +| Restore | NO | | +| RestoreSlice | NO | | +| RestoreV2 | YES | | +| RetrieveTPUEmbeddingADAMParameters | NO | | +| RetrieveTPUEmbeddingAdadeltaParameters | NO | | +| RetrieveTPUEmbeddingAdagradMomentumParameters | NO | | +| RetrieveTPUEmbeddingAdagradParameters | NO | | +| RetrieveTPUEmbeddingCenteredRMSPropParameters | NO | | +| RetrieveTPUEmbeddingFTRLParameters | NO | | +| RetrieveTPUEmbeddingFrequencyEstimatorParameters | NO | | +| RetrieveTPUEmbeddingMDLAdagradLightParameters | NO | | +| RetrieveTPUEmbeddingMomentumParameters | NO | | +| RetrieveTPUEmbeddingProximalAdagradParameters | NO | | +| RetrieveTPUEmbeddingProximalYogiParameters | NO | | +| RetrieveTPUEmbeddingRMSPropParameters | NO | | +| RetrieveTPUEmbeddingStochasticGradientDescentParameters | NO | | +| Reverse | YES | | +| ReverseSequence | YES | | +| ReverseV2 | YES | | +| RewriteDataset | NO | | +| RightShift | NO | | +| Rint | NO | | +| RngReadAndSkip | NO | | +| RngSkip | NO | | +| Roll | YES | | +| Round | YES | | +| Rsqrt | YES | | +| RsqrtGrad | NO | | +| SampleDistortedBoundingBox | NO | | +| SampleDistortedBoundingBoxV2 | NO | | +| SamplingDataset | NO | | +| Save | NO | | +| SaveDataset | NO | | +| SaveDatasetV2 | NO | | +| SaveSlices | NO | | +| SaveV2 | YES | | +| ScalarSummary | NO | | +| ScaleAndTranslate | NO | | +| ScaleAndTranslateGrad | NO | | +| ScanDataset | NO | | +| ScatterAdd | NO | | +| ScatterDiv | NO | | +| ScatterMax | NO | | +| ScatterMin | NO | | +| ScatterMul | NO | | +| ScatterNd | YES | | +| ScatterNdAdd | NO | | +| ScatterNdMax | NO | | +| ScatterNdMin | NO | | +| ScatterNdNonAliasingAdd | NO | | +| ScatterNdSub | NO | | +| ScatterNdUpdate | NO | | +| ScatterSub | NO | | +| ScatterUpdate | NO | | +| SdcaFprint | NO | | +| SdcaOptimizer | NO | | +| SdcaOptimizerV2 | NO | | +| SdcaShrinkL1 | NO | | +| SegmentMax | NO | | +| SegmentMaxV2 | NO | | +| SegmentMean | NO | | +| SegmentMin | NO | | +| SegmentMinV2 | NO | | +| SegmentProd | NO | | +| SegmentProdV2 | NO | | +| SegmentSum | YES | | +| SegmentSumV2 | NO | | +| Select | YES | | +| SelectV2 | YES | | +| SelfAdjointEig | NO | | +| SelfAdjointEigV2 | NO | | +| Selu | YES | | +| SeluGrad | NO | | +| Send | NO | | +| SendTPUEmbeddingGradients | NO | | +| SerializeIterator | NO | | +| SerializeManySparse | NO | | +| SerializeSparse | NO | | +| SerializeTensor | NO | | +| SetSize | NO | | +| SetStatsAggregatorDataset | NO | | +| Shape | YES | | +| ShapeN | YES | | +| ShardDataset | NO | | +| ShardedFilename | YES | | +| ShardedFilespec | NO | | +| ShuffleAndRepeatDataset | NO | | +| ShuffleAndRepeatDatasetV2 | NO | | +| ShuffleDataset | NO | | +| ShuffleDatasetV2 | NO | | +| ShuffleDatasetV3 | NO | | +| ShutdownDistributedTPU | NO | | +| Sigmoid | YES | | +| SigmoidGrad | NO | | +| Sign | YES | | +| Sin | YES | | +| Sinh | YES | | +| Size | YES | | +| SkipDataset | NO | | +| SleepDataset | NO | | +| Slice | YES | | +| SlidingWindowDataset | NO | | +| Snapshot | YES | | +| SnapshotChunkDataset | NO | | +| SnapshotDataset | NO | | +| SnapshotDatasetReader | NO | | +| SnapshotDatasetV2 | NO | | +| SnapshotNestedDatasetReader | NO | | +| SobolSample | NO | | +| Softmax | YES | | +| SoftmaxCrossEntropyWithLogits | NO | | +| Softplus | YES | | +| SoftplusGrad | NO | | +| Softsign | YES | | +| SoftsignGrad | NO | | +| SpaceToBatch | NO | | +| SpaceToBatchND | YES | | +| SpaceToDepth | YES | | +| SparseAccumulatorApplyGradient | NO | | +| SparseAccumulatorTakeGradient | NO | | +| SparseAdd | NO | | +| SparseAddGrad | NO | | +| SparseApplyAdadelta | NO | | +| SparseApplyAdagrad | NO | | +| SparseApplyAdagradDA | NO | | +| SparseApplyAdagradV2 | NO | | +| SparseApplyCenteredRMSProp | NO | | +| SparseApplyFtrl | NO | | +| SparseApplyFtrlV2 | NO | | +| SparseApplyMomentum | NO | | +| SparseApplyProximalAdagrad | NO | | +| SparseApplyProximalGradientDescent | NO | | +| SparseApplyRMSProp | NO | | +| SparseBincount | NO | | +| SparseConcat | NO | | +| SparseConditionalAccumulator | NO | | +| SparseCountSparseOutput | NO | | +| SparseCross | NO | | +| SparseCrossHashed | NO | | +| SparseCrossV2 | NO | | +| SparseDenseCwiseAdd | NO | | +| SparseDenseCwiseDiv | NO | | +| SparseDenseCwiseMul | NO | | +| SparseFillEmptyRows | YES | | +| SparseFillEmptyRowsGrad | NO | | +| SparseMatMul | NO | | +| SparseMatrixAdd | NO | | +| SparseMatrixMatMul | NO | | +| SparseMatrixMul | NO | | +| SparseMatrixNNZ | NO | | +| SparseMatrixOrderingAMD | NO | | +| SparseMatrixSoftmax | NO | | +| SparseMatrixSoftmaxGrad | NO | | +| SparseMatrixSparseCholesky | NO | | +| SparseMatrixSparseMatMul | NO | | +| SparseMatrixTranspose | NO | | +| SparseMatrixZeros | NO | | +| SparseReduceMax | NO | | +| SparseReduceMaxSparse | NO | | +| SparseReduceSum | NO | | +| SparseReduceSumSparse | NO | | +| SparseReorder | NO | | +| SparseReshape | YES | | +| SparseSegmentMean | NO | | +| SparseSegmentMeanGrad | NO | | +| SparseSegmentMeanGradV2 | NO | | +| SparseSegmentMeanWithNumSegments | NO | | +| SparseSegmentSqrtN | NO | | +| SparseSegmentSqrtNGrad | NO | | +| SparseSegmentSqrtNGradV2 | NO | | +| SparseSegmentSqrtNWithNumSegments | NO | | +| SparseSegmentSum | YES | | +| SparseSegmentSumGrad | NO | | +| SparseSegmentSumGradV2 | NO | | +| SparseSegmentSumWithNumSegments | NO | | +| SparseSlice | NO | | +| SparseSliceGrad | NO | | +| SparseSoftmax | NO | | +| SparseSoftmaxCrossEntropyWithLogits | NO | | +| SparseSparseMaximum | NO | | +| SparseSparseMinimum | NO | | +| SparseSplit | NO | | +| SparseTensorDenseAdd | NO | | +| SparseTensorDenseMatMul | NO | | +| SparseTensorSliceDataset | NO | | +| SparseTensorToCSRSparseMatrix | NO | | +| SparseToDense | YES | | +| SparseToSparseSetOperation | NO | | +| Spence | NO | | +| Split | YES | | +| SplitV | YES | | +| SqlDataset | NO | | +| Sqrt | YES | | +| SqrtGrad | NO | | +| Square | YES | | +| SquaredDifference | YES | | +| Squeeze | YES | | +| Stack | NO | | +| StackClose | NO | | +| StackCloseV2 | NO | | +| StackPop | NO | | +| StackPopV2 | NO | | +| StackPush | NO | | +| StackPushV2 | NO | | +| StackV2 | NO | | +| Stage | NO | | +| StageClear | NO | | +| StagePeek | NO | | +| StageSize | NO | | +| StatefulPartitionedCall | YES | | +| StatefulRandomBinomial | NO | | +| StatefulStandardNormal | NO | | +| StatefulStandardNormalV2 | NO | | +| StatefulTruncatedNormal | NO | | +| StatefulUniform | NO | | +| StatefulUniformFullInt | NO | | +| StatefulUniformInt | NO | | +| StatelessCase | NO | | +| StatelessIf | YES | | +| StatelessMultinomial | NO | | +| StatelessParameterizedTruncatedNormal | NO | | +| StatelessRandomBinomial | NO | | +| StatelessRandomGammaV2 | NO | | +| StatelessRandomGammaV3 | NO | | +| StatelessRandomGetAlg | NO | | +| StatelessRandomGetKeyCounter | NO | | +| StatelessRandomGetKeyCounterAlg | NO | | +| StatelessRandomNormal | NO | | +| StatelessRandomNormalV2 | NO | | +| StatelessRandomPoisson | NO | | +| StatelessRandomUniform | NO | | +| StatelessRandomUniformFullInt | NO | | +| StatelessRandomUniformFullIntV2 | NO | | +| StatelessRandomUniformInt | NO | | +| StatelessRandomUniformIntV2 | NO | | +| StatelessRandomUniformV2 | NO | | +| StatelessSampleDistortedBoundingBox | NO | | +| StatelessShuffle | NO | | +| StatelessTruncatedNormal | NO | | +| StatelessTruncatedNormalV2 | NO | | +| StatelessWhile | YES | | +| StaticRegexFullMatch | YES | | +| StaticRegexReplace | NO | | +| StatsAggregatorHandle | NO | | +| StatsAggregatorHandleV2 | NO | | +| StatsAggregatorSetSummaryWriter | NO | | +| StatsAggregatorSummary | NO | | +| StopGradient | YES | | +| StridedSlice | YES | | +| StridedSliceAssign | NO | | +| StridedSliceGrad | NO | | +| StringFormat | NO | | +| StringJoin | YES | | +| StringLength | NO | | +| StringLower | NO | | +| StringNGrams | NO | | +| StringSplit | NO | | +| StringSplitV2 | NO | | +| StringStrip | NO | | +| StringToHashBucket | NO | | +| StringToHashBucketFast | NO | | +| StringToHashBucketStrong | NO | | +| StringToNumber | NO | | +| StringUpper | NO | | +| Sub | YES | | +| Substr | NO | | +| Sum | YES | | +| SummaryWriter | NO | | +| Svd | NO | | +| Switch | YES | | +| SymbolicGradient | NO | | +| SyncDevice | NO | | +| TFRecordDataset | NO | | +| TFRecordDatasetV2 | NO | | +| TFRecordReader | NO | | +| TFRecordReaderV2 | NO | | +| TPUCompilationResult | NO | | +| TPUEmbeddingActivations | NO | | +| TPUOrdinalSelector | NO | | +| TPUPartitionedCall | NO | | +| TPUPartitionedInput | NO | | +| TPUPartitionedInputV2 | NO | | +| TPUPartitionedOutput | NO | | +| TPUPartitionedOutputV2 | NO | | +| TPUReplicateMetadata | NO | | +| TPUReplicatedInput | NO | | +| TPUReplicatedOutput | NO | | +| TakeDataset | NO | | +| TakeManySparseFromTensorsMap | NO | | +| TakeWhileDataset | NO | | +| Tan | YES | | +| Tanh | YES | | +| TanhGrad | NO | | +| TemporaryVariable | NO | | +| TensorArray | NO | | +| TensorArrayClose | NO | | +| TensorArrayCloseV2 | NO | | +| TensorArrayCloseV3 | YES | | +| TensorArrayConcat | NO | | +| TensorArrayConcatV2 | NO | | +| TensorArrayConcatV3 | YES | | +| TensorArrayGather | NO | | +| TensorArrayGatherV2 | NO | | +| TensorArrayGatherV3 | YES | | +| TensorArrayGrad | NO | | +| TensorArrayGradV2 | NO | | +| TensorArrayGradV3 | NO | | +| TensorArrayGradWithShape | NO | | +| TensorArrayPack | NO | | +| TensorArrayRead | NO | | +| TensorArrayReadV2 | NO | | +| TensorArrayReadV3 | YES | | +| TensorArrayScatter | NO | | +| TensorArrayScatterV2 | NO | | +| TensorArrayScatterV3 | YES | | +| TensorArraySize | NO | | +| TensorArraySizeV2 | NO | | +| TensorArraySizeV3 | YES | | +| TensorArraySplit | NO | | +| TensorArraySplitV2 | NO | | +| TensorArraySplitV3 | NO | | +| TensorArrayUnpack | NO | | +| TensorArrayV2 | NO | | +| TensorArrayV3 | YES | | +| TensorArrayWrite | NO | | +| TensorArrayWriteV2 | NO | | +| TensorArrayWriteV3 | YES | | +| TensorDataset | NO | | +| TensorListConcat | NO | | +| TensorListConcatLists | NO | | +| TensorListConcatV2 | NO | | +| TensorListElementShape | NO | | +| TensorListFromTensor | YES | | +| TensorListGather | NO | | +| TensorListGetItem | YES | | +| TensorListLength | YES | | +| TensorListPopBack | NO | | +| TensorListPushBack | YES | | +| TensorListPushBackBatch | NO | | +| TensorListReserve | YES | | +| TensorListResize | YES | | +| TensorListScatter | NO | | +| TensorListScatterIntoExistingList | NO | | +| TensorListScatterV2 | NO | | +| TensorListSetItem | YES | | +| TensorListSplit | NO | | +| TensorListStack | YES | | +| TensorScatterAdd | NO | | +| TensorScatterMax | NO | | +| TensorScatterMin | NO | | +| TensorScatterSub | NO | | +| TensorScatterUpdate | NO | | +| TensorSliceDataset | NO | | +| TensorStridedSliceUpdate | NO | | +| TensorSummary | NO | | +| TensorSummaryV2 | NO | | +| TextLineDataset | NO | | +| TextLineReader | NO | | +| TextLineReaderV2 | NO | | +| ThreadPoolDataset | NO | | +| ThreadPoolHandle | NO | | +| ThreadUnsafeUnigramCandidateSampler | NO | | +| Tile | YES | | +| TileGrad | NO | | +| Timestamp | NO | | +| ToBool | YES | | +| TopK | YES | | +| TopKV2 | YES | | +| Transpose | YES | | +| TridiagonalMatMul | NO | | +| TridiagonalSolve | NO | | +| TruncateDiv | YES | | +| TruncateMod | YES | | +| TruncatedNormal | NO | | +| Unbatch | NO | | +| UnbatchDataset | NO | | +| UnbatchGrad | NO | | +| UncompressElement | NO | | +| UnicodeDecode | NO | | +| UnicodeDecodeWithOffsets | NO | | +| UnicodeEncode | NO | | +| UnicodeScript | NO | | +| UnicodeTranscode | NO | | +| UniformCandidateSampler | NO | | +| UniformDequantize | NO | | +| UniformQuantize | NO | | +| UniformQuantizedAdd | NO | | +| UniformQuantizedClipByValue | NO | | +| UniformQuantizedConvolution | NO | | +| UniformQuantizedConvolutionHybrid | NO | | +| UniformQuantizedDot | NO | | +| UniformQuantizedDotHybrid | NO | | +| UniformRequantize | NO | | +| Unique | YES | | +| UniqueDataset | NO | | +| UniqueV2 | NO | | +| UniqueWithCounts | NO | | +| UniqueWithCountsV2 | NO | | +| Unpack | YES | | +| UnravelIndex | YES | | +| UnsortedSegmentJoin | NO | | +| UnsortedSegmentMax | NO | | +| UnsortedSegmentMin | NO | | +| UnsortedSegmentProd | NO | | +| UnsortedSegmentSum | YES | | +| Unstage | NO | | +| UnwrapDatasetVariant | NO | | +| UpperBound | NO | | +| VarHandleOp | YES | | +| VarIsInitializedOp | YES | | +| Variable | YES | | +| VariableShape | NO | | +| VariableV2 | YES | | +| Where | YES | | +| While | YES | | +| WholeFileReader | NO | | +| WholeFileReaderV2 | NO | | +| WindowDataset | NO | | +| WindowOp | NO | | +| WorkerHeartbeat | NO | | +| WrapDatasetVariant | NO | | +| WriteAudioSummary | NO | | +| WriteFile | NO | | +| WriteGraphSummary | NO | | +| WriteHistogramSummary | NO | | +| WriteImageSummary | NO | | +| WriteRawProtoSummary | NO | | +| WriteScalarSummary | NO | | +| WriteSummary | NO | | +| Xdivy | YES | | +| XlaConcatND | NO | | +| XlaSplitND | NO | | +| Xlog1py | YES | | +| Xlogy | YES | | +| ZerosLike | YES | | +| Zeta | NO | | +| ZipDataset | NO | | diff --git a/src/frontends/tensorflow/src/checkpoint_v1_reader.cpp b/src/frontends/tensorflow/src/checkpoint_v1_reader.cpp index 33833100ad6c6f..185b374e6bc42f 100644 --- a/src/frontends/tensorflow/src/checkpoint_v1_reader.cpp +++ b/src/frontends/tensorflow/src/checkpoint_v1_reader.cpp @@ -7,7 +7,7 @@ #include "checkpoint_utils.hpp" #include "openvino/frontend/exception.hpp" #include "openvino/util/file_util.hpp" -#include "saved_tensor_slice.pb.h" +#include "ov_tensorflow/saved_tensor_slice.pb.h" #include "tf_utils.hpp" #ifdef ENABLE_SNAPPY_COMPRESSION diff --git a/src/frontends/tensorflow/src/checkpoint_v1_reader.hpp b/src/frontends/tensorflow/src/checkpoint_v1_reader.hpp index bfae3b139a2aae..1171fd6a682cb1 100644 --- a/src/frontends/tensorflow/src/checkpoint_v1_reader.hpp +++ b/src/frontends/tensorflow/src/checkpoint_v1_reader.hpp @@ -12,9 +12,9 @@ #include "checkpoint_utils.hpp" #include "openvino/core/any.hpp" #include "openvino/frontend/exception.hpp" -#include "saved_tensor_slice.pb.h" -#include "tensor_shape.pb.h" -#include "types.pb.h" +#include "ov_tensorflow/saved_tensor_slice.pb.h" +#include "ov_tensorflow/tensor_shape.pb.h" +#include "ov_tensorflow/types.pb.h" namespace ov { namespace frontend { diff --git a/src/frontends/tensorflow/src/decoder_argdef.cpp b/src/frontends/tensorflow/src/decoder_argdef.cpp index 3430bcbe5e37aa..af7c1a1cfccd01 100644 --- a/src/frontends/tensorflow/src/decoder_argdef.cpp +++ b/src/frontends/tensorflow/src/decoder_argdef.cpp @@ -5,11 +5,11 @@ #include "decoder_argdef.hpp" #include "decoder_proto.hpp" -#include "op_def.pb.h" #include "openvino/frontend/tensorflow/node_context.hpp" #include "openvino/frontend/tensorflow/special_types.hpp" +#include "ov_tensorflow/op_def.pb.h" +#include "ov_tensorflow/types.pb.h" #include "tf_utils.hpp" -#include "types.pb.h" namespace ov { namespace frontend { diff --git a/src/frontends/tensorflow/src/decoder_proto.cpp b/src/frontends/tensorflow/src/decoder_proto.cpp index 2488973c1029e1..9e0a53efb6d09f 100644 --- a/src/frontends/tensorflow/src/decoder_proto.cpp +++ b/src/frontends/tensorflow/src/decoder_proto.cpp @@ -4,12 +4,12 @@ #include "decoder_proto.hpp" -#include "attr_value.pb.h" -#include "node_def.pb.h" #include "openvino/frontend/tensorflow/node_context.hpp" #include "openvino/frontend/tensorflow/special_types.hpp" +#include "ov_tensorflow/attr_value.pb.h" +#include "ov_tensorflow/node_def.pb.h" +#include "ov_tensorflow/types.pb.h" #include "tf_utils.hpp" -#include "types.pb.h" namespace ov { namespace frontend { diff --git a/src/frontends/tensorflow/src/decoder_proto.hpp b/src/frontends/tensorflow/src/decoder_proto.hpp index 338bfdeccea79d..9d22e273e1e146 100644 --- a/src/frontends/tensorflow/src/decoder_proto.hpp +++ b/src/frontends/tensorflow/src/decoder_proto.hpp @@ -9,7 +9,7 @@ #include "openvino/core/type/element_type.hpp" #include "openvino/frontend/tensorflow/decoder.hpp" -#include "types.pb.h" +#include "ov_tensorflow/types.pb.h" namespace tensorflow { class GraphDef; diff --git a/src/frontends/tensorflow/src/frontend.cpp b/src/frontends/tensorflow/src/frontend.cpp index 24b5824fe336d1..ad9b5b76bdfbda 100644 --- a/src/frontends/tensorflow/src/frontend.cpp +++ b/src/frontends/tensorflow/src/frontend.cpp @@ -14,6 +14,7 @@ #include "helper_transforms/embedding_segments_feature_fusing.hpp" #include "helper_transforms/gru_block_cell_replacer.hpp" #include "helper_transforms/saved_model_unused_remover.hpp" +#include "helper_transforms/tensor_array_v3_replacer.hpp" #include "input_model.hpp" #include "op_table.hpp" #include "openvino/core/so_extension.hpp" @@ -491,6 +492,7 @@ void FrontEnd::normalize(const std::shared_ptr& model) const { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); diff --git a/src/frontends/tensorflow/src/graph_iterator_meta.cpp b/src/frontends/tensorflow/src/graph_iterator_meta.cpp index 8bc41fbaefdd04..06f2d31f389a27 100644 --- a/src/frontends/tensorflow/src/graph_iterator_meta.cpp +++ b/src/frontends/tensorflow/src/graph_iterator_meta.cpp @@ -10,8 +10,8 @@ #include #include "openvino/core/type/element_type.hpp" -#include "tensor_bundle.pb.h" -#include "trackable_object_graph.pb.h" +#include "ov_tensorflow/tensor_bundle.pb.h" +#include "ov_tensorflow/trackable_object_graph.pb.h" namespace ov { namespace frontend { diff --git a/src/frontends/tensorflow/src/graph_iterator_proto.hpp b/src/frontends/tensorflow/src/graph_iterator_proto.hpp index 8b073b08373305..5ef6d0a5954b41 100644 --- a/src/frontends/tensorflow/src/graph_iterator_proto.hpp +++ b/src/frontends/tensorflow/src/graph_iterator_proto.hpp @@ -10,10 +10,10 @@ #include "checkpoint_v1_reader.hpp" #include "decoder_argdef.hpp" #include "decoder_proto.hpp" -#include "graph.pb.h" #include "openvino/frontend/exception.hpp" #include "openvino/frontend/graph_iterator.hpp" #include "openvino/frontend/tensorflow/decoder.hpp" +#include "ov_tensorflow/graph.pb.h" namespace ov { namespace frontend { diff --git a/src/frontends/tensorflow/src/graph_iterator_saved_model.cpp b/src/frontends/tensorflow/src/graph_iterator_saved_model.cpp index ece0148d19bb20..803e7d694bc69a 100644 --- a/src/frontends/tensorflow/src/graph_iterator_saved_model.cpp +++ b/src/frontends/tensorflow/src/graph_iterator_saved_model.cpp @@ -10,8 +10,8 @@ #include #include "openvino/core/type/element_type.hpp" -#include "tensor_bundle.pb.h" -#include "trackable_object_graph.pb.h" +#include "ov_tensorflow/tensor_bundle.pb.h" +#include "ov_tensorflow/trackable_object_graph.pb.h" namespace ov { namespace frontend { diff --git a/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp b/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp index 511f2a0a5bc307..4cb385e66f744d 100644 --- a/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp +++ b/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp @@ -8,7 +8,7 @@ #include "graph_iterator_proto.hpp" #include "openvino/util/file_util.hpp" -#include "saved_model.pb.h" +#include "ov_tensorflow/saved_model.pb.h" #include "variables_index.hpp" namespace ov { diff --git a/src/frontends/tensorflow/src/op/merge.cpp b/src/frontends/tensorflow/src/op/merge.cpp index 3594f93ed08278..708de72aa3434f 100644 --- a/src/frontends/tensorflow/src/op/merge.cpp +++ b/src/frontends/tensorflow/src/op/merge.cpp @@ -5,6 +5,8 @@ #include "helper_ops/merge.hpp" #include "common_op_table.hpp" +#include "helper_ops/enter.hpp" +#include "helper_ops/next_iteration.hpp" #include "openvino/frontend/tensorflow/node_context.hpp" #include "openvino/op/constant.hpp" #include "utils.hpp" @@ -24,20 +26,47 @@ OutputVector translate_merge_op(const NodeContext& node) { auto node_name = node.get_name(); default_op_checks(node, 1, {"Merge"}); int input_size = static_cast(node.get_input_size()); - OutputVector inputs; + OutputVector inputs(input_size); for (int input_ind = 0; input_ind < input_size; ++input_ind) { - inputs.push_back(node.get_input(input_ind)); + inputs[input_ind] = node.get_input(input_ind); } // if Merge node has just one input, there is nothing to merge // return the same input and value_index equal to 0 - if (inputs.size() == 1) { + if (input_size == 1) { auto value_index = make_shared(element::i32, Shape{}, 0); value_index->output(0).set_names({node_name + ":1"}); inputs[0].add_names({node_name + ":0"}); return OutputVector{inputs[0], value_index}; } + // check if it is a case of TF1 While: Enter, NextIteration are going to Merge node + // in this case it can refine output shape and type for NextIteration based on Enter + if (input_size == 2) { + auto enter = as_type_ptr(inputs[0].get_node_shared_ptr()); + if (!enter) { + enter = as_type_ptr(inputs[1].get_node_shared_ptr()); + } + auto next_iteration = as_type_ptr(inputs[0].get_node_shared_ptr()); + if (!next_iteration) { + next_iteration = as_type_ptr(inputs[1].get_node_shared_ptr()); + } + + if (enter && next_iteration) { + // set output type and shape for NextIteration + // borrow them from Enter output + auto enter_output_type = enter->output(0).get_element_type(); + auto enter_output_shape = enter->output(0).get_partial_shape(); + auto next_iteration_output_shape = PartialShape::dynamic(enter_output_shape.rank()); + next_iteration->set_output_shape_and_type(next_iteration_output_shape, enter_output_type); + + // reset inputs + // refines input shapes and types for Merge node + inputs[0] = enter->output(0); + inputs[1] = next_iteration->output(0); + } + } + auto merge_node = make_shared(inputs, node.get_decoder()); set_node_name(node.get_name(), merge_node); diff --git a/src/frontends/tensorflow/src/op/tensor_array_operations.cpp b/src/frontends/tensorflow/src/op/tensor_array_operations.cpp new file mode 100644 index 00000000000000..c1b3d6ac205dc3 --- /dev/null +++ b/src/frontends/tensorflow/src/op/tensor_array_operations.cpp @@ -0,0 +1,332 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "helper_ops/enter.hpp" +#include "helper_ops/tensor_array.hpp" +#include "openvino/frontend/tensorflow/node_context.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/maximum.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/scatter_nd_update.hpp" +#include "openvino/op/scatter_update.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/slice.hpp" +#include "openvino/op/subtract.hpp" +#include "openvino/op/unsqueeze.hpp" +#include "utils.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; +using namespace ov::frontend::tensorflow; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +namespace { +// the function creates the constant imitating initial tensor array container +Output create_initial_tensor_array_constant(int64_t tensor_element_rank, + const element::Type& element_type, + Output size, + const string& node_name) { + // adjust size to have it of shape [1] for further concatenation with element shape + auto new_size_shape = make_shared(element::i32, Shape{1}, 1); + size = make_shared(size, new_size_shape, false); + + // create a vector of size element_shape.rank() with ones + // and compute a shape of initial tensor array [size, 1, ..., 1] + vector ones(tensor_element_rank, 1); + auto ones_const = make_shared(element::i32, Shape{ones.size()}, ones); + auto target_shape = make_shared(OutputVector{size, ones_const}, 0); + + // create initial tensor array + auto scalar_value = make_shared(element_type, Shape{}, vector{0}); + auto initial_tensor_array = make_shared(scalar_value, target_shape); + + return initial_tensor_array->output(0); +} +} // namespace + +OutputVector translate_tensor_array_v3_op(const NodeContext& node) { + // TensorArrayV3 has just one input: + // 0) size to initialize a size of tensor array + default_op_checks(node, 1, {"TensorArrayV3"}); + auto dtype = node.get_attribute("dtype"); + auto size = node.get_input(0); + auto element_shape = node.get_attribute("element_shape"); + + if (element_shape.rank().is_static()) { + auto node_name = node.get_name(); + auto new_output1 = + create_initial_tensor_array_constant(element_shape.rank().get_length(), dtype, size, node.get_name()); + new_output1.set_names({node_name + ":0"}); + auto new_output2 = + create_initial_tensor_array_constant(element_shape.rank().get_length(), dtype, size, node.get_name()); + new_output2.set_names({node_name + ":1"}); + return OutputVector{new_output1, new_output2}; + } + + // dynamic case when it is unable retrieve element rank from the attribute + auto tensor_array_v3 = make_shared(size, dtype, node.get_decoder()); + set_node_name(node.get_name(), tensor_array_v3); + + return tensor_array_v3->outputs(); +} + +OutputVector translate_tensor_array_scatter_v3_op(const NodeContext& node) { + // TensorArrayScatterV3 has four inputs: + // 0) handle, a Tensor of type resource. The handle to a TensorArray. + // 1) indices, a Tensor of type int32. The locations at which to write the tensor elements. + // 2) value, a Tensor. The concatenated tensor to write to the TensorArray + // 3) flow_in A Tensor of type float32. A float scalar that enforces proper chaining of operations. + // The operation has one output: + // 0) flow_out indicates that operation is complete and handle resource is updated + default_op_checks(node, 4, {"TensorArrayScatterV3"}); + auto indices = node.get_input(1); + auto value = node.get_input(2); + // flow_in is used for transferring input tensor array + auto tensor_array = node.get_input(3); + + // check if producer of tensor_array is TensorArrayV3, internal operation, still + // if yes, try to replace it with constant container + if (as_type_ptr(tensor_array.get_node_shared_ptr()) && + value.get_partial_shape().rank().is_static()) { + // set tensor element rank that gets known from TensorArrayScatterV3 operation + auto tensor_array_v3 = as_type_ptr(tensor_array.get_node_shared_ptr()); + TENSORFLOW_OP_VALIDATION( + node, + value.get_partial_shape().rank().get_length() > 0, + "[TensorFlow Frontend] internal error or inconsistent model: value to TensorArrayScatterV3 is a scalar"); + int64_t tensor_element_rank = value.get_partial_shape().rank().get_length() - 1; + tensor_array_v3->set_element_rank(tensor_element_rank); + } + + // compute element shape (shape of a tensor in the tensor array) using value + auto element_shape = make_shared(value, element::i32)->output(0); + auto one_const = make_shared(element::i32, Shape{1}, 1); + auto max_const = make_shared(element::i32, Shape{1}, numeric_limits::max()); + element_shape = make_shared(element_shape, one_const, max_const, one_const); + + // compute size of tensor array + auto tensor_array_size = make_shared(tensor_array, element::i32)->output(0); + auto zero_const = make_shared(element::i32, Shape{1}, 0); + tensor_array_size = make_shared(tensor_array_size, zero_const, zero_const); + + // compute the new shape for tensor array where new tensors will be inserted + auto new_shape = make_shared(OutputVector{tensor_array_size, element_shape}, 0); + tensor_array = make_shared(tensor_array, new_shape); + + // adjust indices for ScatterNDUpdate to have a shape [N, 1] where N is a number of indices + indices = make_shared(indices, one_const); + + // compute updated tensor array using ScatterNDUpdate + // value should be of a shape [N, ] + auto updated_tensor_array = make_shared(tensor_array, indices, value); + set_node_name(node.get_name(), updated_tensor_array); + + // TensorArrayScatterV3 has just one output flow_out + // that is used for transferring updated tensor array + return {updated_tensor_array}; +} + +OutputVector translate_tensor_array_read_v3_op(const NodeContext& node) { + // TensorArrayReadV3 read an element from the TensorArray into the output + // and it has three inputs: + // 0) handle, a Tensor of type resource. The handle to a TensorArray. + // 1) index, a Tensor of type int32. The location from which to read the value + // 2) flow_in A Tensor of type float32. A float scalar that enforces proper chaining of operations. + // The operation has one output + // 0) read value from tensor array + default_op_checks(node, 3, {"TensorArrayReadV3"}); + auto index = node.get_input(1); + // flow_in is used for transferring input tensor array + auto tensor_array = node.get_input(2); + auto dtype = node.get_attribute("dtype"); + + // adjust the index to a scalar for using Gather operation + auto new_shape = make_shared(element::i32, Shape{0}, vector{}); + index = make_shared(index, new_shape, false); + + // gather tensor element by the required position + auto gather_axis = make_shared(element::i32, Shape{1}, 0); + Output tensor_element = make_shared(tensor_array, index, gather_axis); + tensor_element = make_shared(tensor_element, dtype); + + set_node_name(node.get_name(), tensor_element.get_node_shared_ptr()); + return {tensor_element}; +} + +OutputVector translate_tensor_array_close_v3_op(const NodeContext& node) { + // TensorArrayCloseV3 deletes the TensorArray from its resource container + // it outputs nothing + default_op_checks(node, 1, {"TensorArrayCloseV3"}); + return {}; +} + +OutputVector translate_tensor_array_size_v3_op(const NodeContext& node) { + // TensorArraySizeV3 gets the current size of the TensorArray + // it outputs int32 scalar equal to a size of the tensor array + default_op_checks(node, 2, {"TensorArraySizeV3"}); + // skip the handle by the first input + auto tensor_array = node.get_input(1); + + auto size = make_shared(tensor_array, element::i32)->output(0); + auto zero_const = make_shared(element::i32, Shape{1}, 0); + size = make_shared(size, zero_const, zero_const); + + // size must be scalar + auto scalar_shape = make_shared(element::i32, Shape{0}, vector{}); + size = make_shared(size, scalar_shape, false); + + set_node_name(node.get_name(), size.get_node_shared_ptr()); + return {size}; +} + +OutputVector translate_tensor_array_gather_v3_op(const NodeContext& node) { + // TensorArrayGatherV3 gathers specific elements from the TensorArray into output + // and it has three inputs: + // 0) handle, a Tensor of type resource. The handle to a TensorArray. + // 1) indices, a Tensor of type int32. The location from which to read tensor elements + // 2) flow_in A Tensor of type float32. A float scalar that enforces proper chaining of operations. + // The operation has one output + // 0) value with read tensor elements + // it outputs int32 scalar equal to a size of the tensor array + default_op_checks(node, 3, {"TensorArrayGatherV3"}); + // skip the handle by the first input + auto indices = node.get_input(1); + // flow_in serves for transferring tensor array + // handle input is ignored + auto tensor_array = node.get_input(2); + auto dtype = node.get_attribute("dtype"); + auto element_shape = node.get_attribute("element_shape", PartialShape::dynamic()); + + // gather tensor element by the required position + auto gather_axis = make_shared(element::i32, Shape{1}, 0); + Output tensor_element = make_shared(tensor_array, indices, gather_axis); + tensor_element = make_shared(tensor_element, dtype); + + // concretize tensor_element shape if this is specified + if (tensor_element.get_partial_shape().rank().is_dynamic() && element_shape.is_static()) { + auto element_shape_value = element_shape.get_shape(); + auto element_shape_const = + make_shared(element::i32, Shape{element_shape_value.size()}, element_shape_value); + auto size = make_shared(tensor_array, element::i32)->output(0); + auto zero_const = make_shared(element::i32, Shape{1}, 0); + size = make_shared(size, zero_const, zero_const); + auto new_shape = make_shared(OutputVector{size, element_shape_const}, 0); + tensor_element = make_shared(tensor_element, new_shape, false); + } + + set_node_name(node.get_name(), tensor_element.get_node_shared_ptr()); + return {tensor_element}; +} + +OutputVector translate_tensor_array_concat_v3_op(const NodeContext& node) { + // TensorArrayConcatV3 Concat the elements from the TensorArray into value + // and it has two inputs: + // 0) handle, a Tensor of type resource. The handle to a TensorArray. + // 1) flow_in A Tensor of type float32. A float scalar that enforces proper chaining of operations. + // The operation has one output + // 0) concatenated value by the first dimension + default_op_checks(node, 2, {"TensorArrayConcatV3"}); + // flow_in serves for transferring tensor array + // handle input is ignored + auto tensor_array = node.get_input(1); + auto dtype = node.get_attribute("dtype"); + + // since tensor array saves tensor elements in the concatenated form by the first dimension + // and for this operation they should be concatenated by the first dimension of the tensor element + // it needs to combine the first two dimensions + // tensor array is of shape [k, n0, n1, ..., nd] + // 1. compute element shape excluding the first dimension + auto zero_const = make_shared(element::i32, Shape{1}, 0); + auto one_const = make_shared(element::i32, Shape{1}, 1); + auto two_const = make_shared(element::i32, Shape{1}, 2); + auto max_const = make_shared(element::i32, Shape{1}, numeric_limits::max()); + auto tensor_array_shape = make_shared(tensor_array, element::i64); + auto element_shape_no_two_dims = make_shared(tensor_array_shape, two_const, max_const, one_const); + // 2. compute the first and second dimensions k and n0 + auto k = make_shared(tensor_array_shape, zero_const, zero_const); + auto n0 = make_shared(tensor_array_shape, one_const, zero_const); + auto k_by_n0 = make_shared(k, n0); + // 3. compute the first output containing concatenated tensor elements + // it folds the first and second dimensions + auto new_shape = make_shared(OutputVector{k_by_n0, element_shape_no_two_dims}, 0); + auto concatenated_array = make_shared(tensor_array, new_shape, false)->output(0); + concatenated_array = make_shared(concatenated_array, dtype); + concatenated_array.set_names({node.get_name() + ":0"}); + // 4. compute the second output with length of each tensor element for the concatenation + auto lengths = make_shared(n0, k)->output(0); + lengths.set_names({node.get_name() + ":1"}); + + return {concatenated_array, lengths}; +} + +OutputVector translate_tensor_array_write_v3_op(const NodeContext& node) { + // TensorArrayWriteV3 pushes an element onto the tensor_array. + // and it has four inputs + // 0) handle, a Tensor of type resource. The handle to a TensorArray. + // 1) index, a Tensor of type int32. The location where to write tensor element + // 2) value, a Tensor. The tensor to write at the specified location + // 3) flow_in A Tensor of type float32. A float scalar that enforces proper chaining of operations. + // The operation has one output + // 0) read value from tensor array + default_op_checks(node, 4, {"TensorArrayWriteV3"}); + auto handle = node.get_input(0); + auto index = node.get_input(1); + auto value = node.get_input(2); + // flow_in is used for transferring input tensor array + // tensor array has a rank equal to 1 + rank(element of tensor array) + // if it just initialized, its shape is equal to [tensor_array_size, 1, ..., 1] + // otherwise, it is equal to [tensor_array_size, ] + auto tensor_array = node.get_input(3); + + // reshape index to have it of [1] shape + auto new_index_shape = make_shared(element::i32, Shape{1}, 1); + index = make_shared(index, new_index_shape, false); + + if (auto enter = as_type_ptr(handle.get_node_shared_ptr())) { + if (as_type_ptr(enter->input_value(0).get_node_shared_ptr()) && + value.get_partial_shape().rank().is_static()) { + // set tensor element rank that gets known from TensorArrayWriteV3 operation + auto tensor_array_v3 = as_type_ptr(enter->input_value(0).get_node_shared_ptr()); + int64_t tensor_element_rank = value.get_partial_shape().rank().get_length(); + tensor_array_v3->set_element_rank(tensor_element_rank); + } + } + + // compute element shape in the input tensor array + auto tensor_array_shape = make_shared(tensor_array, element::i32); + + // compute the current size of tensor array + auto zero_const = make_shared(element::i32, Shape{1}, 0); + auto tensor_array_size = make_shared(tensor_array_shape, zero_const, zero_const); + + // adjust tensor array to have the correct shape [size, ] before value insertion + auto element_shape = make_shared(value, element::i32); + auto new_tensor_array_shape = make_shared(OutputVector{tensor_array_size, element_shape}, 0); + tensor_array = make_shared(tensor_array, new_tensor_array_shape); + + // update the resulted tensor using ScatterUpdate + value = make_shared(value, zero_const); + auto scatter_update = make_shared(tensor_array, index, value, zero_const); + + set_node_name(node.get_name(), scatter_update); + // use flow_out for transferring updated tensor array + return {scatter_update}; +} + +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow/src/op/var_handle.cpp b/src/frontends/tensorflow/src/op/var_handle.cpp index 50a5b73c449f8f..501df1c504309b 100644 --- a/src/frontends/tensorflow/src/op/var_handle.cpp +++ b/src/frontends/tensorflow/src/op/var_handle.cpp @@ -9,8 +9,9 @@ #include "input_model.hpp" #include "ngraph/runtime/shared_buffer.hpp" #include "openvino/opsets/opset8.hpp" +#include "openvino/runtime/shared_buffer.hpp" #include "openvino/util/mmap_object.hpp" -#include "tensor_bundle.pb.h" +#include "ov_tensorflow/tensor_bundle.pb.h" using namespace std; using namespace ov::opset8; @@ -44,15 +45,12 @@ static std::shared_ptr read_variable(std::shared_ptr v node, static_cast(mapped_memory->size()) >= entry.offset() + entry.size(), "[TensorFlow Frontend] Internal error: Variable entry size is out of bounds of mapped memory size."); - OPENVINO_SUPPRESS_DEPRECATED_START return std::make_shared( ov_type, shape, - std::make_shared>>( - mapped_memory->data() + entry.offset(), - entry.size(), - mapped_memory)); - OPENVINO_SUPPRESS_DEPRECATED_END + std::make_shared>>(mapped_memory->data() + entry.offset(), + entry.size(), + mapped_memory)); } else { std::vector var_data; var_data.resize(size); diff --git a/src/frontends/tensorflow/src/op/xla_conv_v2.cpp b/src/frontends/tensorflow/src/op/xla_conv_v2.cpp index dc2e319c9a03b1..2d6ecdfa7bfb73 100644 --- a/src/frontends/tensorflow/src/op/xla_conv_v2.cpp +++ b/src/frontends/tensorflow/src/op/xla_conv_v2.cpp @@ -14,8 +14,8 @@ #include "openvino/op/shape_of.hpp" #include "openvino/op/slice.hpp" #include "openvino/op/transpose.hpp" +#include "ov_tensorflow/xla_data.pb.h" #include "utils.hpp" -#include "xla_data.pb.h" using namespace std; using namespace ov; diff --git a/src/frontends/tensorflow/src/op/xla_dot.cpp b/src/frontends/tensorflow/src/op/xla_dot.cpp index e463494511f076..b4c38519ce210c 100644 --- a/src/frontends/tensorflow/src/op/xla_dot.cpp +++ b/src/frontends/tensorflow/src/op/xla_dot.cpp @@ -13,8 +13,8 @@ #include "openvino/op/shape_of.hpp" #include "openvino/op/transpose.hpp" #include "openvino/op/unsqueeze.hpp" +#include "ov_tensorflow/xla_data.pb.h" #include "utils.hpp" -#include "xla_data.pb.h" using namespace std; using namespace ov; diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index bc1a657faf54fb..3a4c570c6576fb 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -46,6 +46,14 @@ TF_OP_CONVERTER(translate_sparse_segment_sum_op); TF_OP_CONVERTER(translate_staticregexfullmatch_op); TF_OP_CONVERTER(translate_stringjoin_op); TF_OP_CONVERTER(translate_switch_op); +TF_OP_CONVERTER(translate_tensor_array_close_v3_op); +TF_OP_CONVERTER(translate_tensor_array_concat_v3_op); +TF_OP_CONVERTER(translate_tensor_array_gather_v3_op); +TF_OP_CONVERTER(translate_tensor_array_read_v3_op); +TF_OP_CONVERTER(translate_tensor_array_scatter_v3_op); +TF_OP_CONVERTER(translate_tensor_array_size_v3_op); +TF_OP_CONVERTER(translate_tensor_array_v3_op); +TF_OP_CONVERTER(translate_tensor_array_write_v3_op); TF_OP_CONVERTER(translate_varhandle_op); TF_OP_CONVERTER(translate_variable_op); TF_OP_CONVERTER(translate_varisinitialized_op); @@ -174,6 +182,8 @@ const std::map get_supported_ops() { {"Gather", CreatorFunction(translate_gather_op)}, {"GatherV2", CreatorFunction(translate_gather_v2_op)}, {"GatherNd", CreatorFunction(translate_gather_nd_op)}, + {"GatherTree", CreatorFunction(translate_gather_tree_op)}, + {"Addons>GatherTree", CreatorFunction(translate_gather_tree_op)}, {"HashTable", CreatorFunction(translate_hash_table_op)}, {"HashTableV2", CreatorFunction(translate_hash_table_op)}, {"Identity", CreatorFunction(translate_identity_op)}, @@ -269,6 +279,14 @@ const std::map get_supported_ops() { {"StatelessWhile", CreatorFunction(translate_while_op)}, {"StridedSlice", CreatorFunction(translate_strided_slice_op)}, {"Switch", CreatorFunction(translate_switch_op)}, + {"TensorArrayCloseV3", CreatorFunction(translate_tensor_array_close_v3_op)}, + {"TensorArrayConcatV3", CreatorFunction(translate_tensor_array_concat_v3_op)}, + {"TensorArrayGatherV3", CreatorFunction(translate_tensor_array_gather_v3_op)}, + {"TensorArrayReadV3", CreatorFunction(translate_tensor_array_read_v3_op)}, + {"TensorArrayScatterV3", CreatorFunction(translate_tensor_array_scatter_v3_op)}, + {"TensorArraySizeV3", CreatorFunction(translate_tensor_array_size_v3_op)}, + {"TensorArrayV3", CreatorFunction(translate_tensor_array_v3_op)}, + {"TensorArrayWriteV3", CreatorFunction(translate_tensor_array_write_v3_op)}, {"TensorListFromTensor", CreatorFunction(translate_tensor_list_from_tensor_op)}, {"TensorListGetItem", CreatorFunction(translate_tensor_list_get_item_op)}, {"TensorListLength", CreatorFunction(translate_tensor_list_length_op)}, @@ -278,9 +296,12 @@ const std::map get_supported_ops() { {"TensorListReserve", CreatorFunction(translate_tensor_list_reserve_op)}, {"TensorListResize", CreatorFunction(translate_tensor_list_resize_op)}, {"Tile", CreatorFunction(translate_tile_op)}, + {"ToBool", CreatorFunction(translate_tobool_op)}, {"TopK", CreatorFunction(translate_top_k_op)}, {"TopKV2", CreatorFunction(translate_top_k_v2_op)}, {"Transpose", CreatorFunction(translate_transpose_op)}, + {"TruncateDiv", CreatorFunction(translate_truncate_div_op)}, + {"TruncateMod", CreatorFunction(translate_truncate_mod_op)}, {"Unpack", CreatorFunction(translate_unpack_op)}, {"UnravelIndex", CreatorFunction(translate_unravel_index_op)}, {"UnsortedSegmentSum", CreatorFunction(translate_unsorted_segment_sum_op)}, diff --git a/src/frontends/tensorflow/src/proto/any.proto b/src/frontends/tensorflow/src/proto/google/protobuf/any.proto similarity index 100% rename from src/frontends/tensorflow/src/proto/any.proto rename to src/frontends/tensorflow/src/proto/google/protobuf/any.proto diff --git a/src/frontends/tensorflow/src/proto/wrappers.proto b/src/frontends/tensorflow/src/proto/google/protobuf/wrappers.proto similarity index 100% rename from src/frontends/tensorflow/src/proto/wrappers.proto rename to src/frontends/tensorflow/src/proto/google/protobuf/wrappers.proto diff --git a/src/frontends/tensorflow/src/proto/allocation_description.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/allocation_description.proto similarity index 100% rename from src/frontends/tensorflow/src/proto/allocation_description.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/allocation_description.proto diff --git a/src/frontends/tensorflow/src/proto/api_def.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/api_def.proto similarity index 99% rename from src/frontends/tensorflow/src/proto/api_def.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/api_def.proto index 810aabc5a2c2c3..cbb581973d32bb 100644 --- a/src/frontends/tensorflow/src/proto/api_def.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/api_def.proto @@ -21,7 +21,7 @@ option java_outer_classname = "ApiDefProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/api_def_go_proto"; -import "attr_value.proto"; +import "ov_tensorflow/attr_value.proto"; // Used to specify and override the default API & behavior in the // generated code for client languages, from what you would get from diff --git a/src/frontends/tensorflow/src/proto/attr_value.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/attr_value.proto similarity index 96% rename from src/frontends/tensorflow/src/proto/attr_value.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/attr_value.proto index 3028176c02bcd7..b903c30cf99276 100644 --- a/src/frontends/tensorflow/src/proto/attr_value.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/attr_value.proto @@ -14,9 +14,9 @@ syntax = "proto3"; package tensorflow; -import "tensor.proto"; -import "tensor_shape.proto"; -import "types.proto"; +import "ov_tensorflow/tensor.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; option cc_enable_arenas = true; option java_outer_classname = "AttrValueProtos"; diff --git a/src/frontends/tensorflow/src/proto/cost_graph.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/cost_graph.proto similarity index 97% rename from src/frontends/tensorflow/src/proto/cost_graph.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/cost_graph.proto index dad93a029babae..8e4d9788f49595 100644 --- a/src/frontends/tensorflow/src/proto/cost_graph.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/cost_graph.proto @@ -14,8 +14,8 @@ syntax = "proto3"; package tensorflow; -import "tensor_shape.proto"; -import "types.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; option cc_enable_arenas = true; option java_outer_classname = "CostGraphProtos"; diff --git a/src/frontends/tensorflow/src/proto/dataset_options.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/dataset_options.proto similarity index 100% rename from src/frontends/tensorflow/src/proto/dataset_options.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/dataset_options.proto diff --git a/src/frontends/tensorflow/src/proto/device_attributes.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/device_attributes.proto similarity index 100% rename from src/frontends/tensorflow/src/proto/device_attributes.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/device_attributes.proto diff --git a/src/frontends/tensorflow/src/proto/function.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/function.proto similarity index 98% rename from src/frontends/tensorflow/src/proto/function.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/function.proto index 65a2acb3b91979..9e84731c983bb1 100644 --- a/src/frontends/tensorflow/src/proto/function.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/function.proto @@ -14,9 +14,9 @@ syntax = "proto3"; package tensorflow; -import "attr_value.proto"; -import "node_def.proto"; -import "op_def.proto"; +import "ov_tensorflow/attr_value.proto"; +import "ov_tensorflow/node_def.proto"; +import "ov_tensorflow/op_def.proto"; option cc_enable_arenas = true; option java_outer_classname = "FunctionProtos"; diff --git a/src/frontends/tensorflow/src/proto/graph.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/graph.proto similarity index 95% rename from src/frontends/tensorflow/src/proto/graph.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/graph.proto index c52e84022f9fcd..e047abeafe18b1 100644 --- a/src/frontends/tensorflow/src/proto/graph.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/graph.proto @@ -14,9 +14,9 @@ syntax = "proto3"; package tensorflow; -import "function.proto"; -import "node_def.proto"; -import "versions.proto"; +import "ov_tensorflow/function.proto"; +import "ov_tensorflow/node_def.proto"; +import "ov_tensorflow/versions.proto"; option cc_enable_arenas = true; option java_outer_classname = "GraphProtos"; diff --git a/src/frontends/tensorflow/src/proto/graph_transfer_info.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/graph_transfer_info.proto similarity index 98% rename from src/frontends/tensorflow/src/proto/graph_transfer_info.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/graph_transfer_info.proto index e42c1353695313..9e7d598e34a5c1 100644 --- a/src/frontends/tensorflow/src/proto/graph_transfer_info.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/graph_transfer_info.proto @@ -14,7 +14,7 @@ syntax = "proto3"; package tensorflow; -import "types.proto"; +import "ov_tensorflow/types.proto"; option cc_enable_arenas = true; option java_outer_classname = "GraphTransferInfoProto"; diff --git a/src/frontends/tensorflow/src/proto/kernel_def.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/kernel_def.proto similarity index 98% rename from src/frontends/tensorflow/src/proto/kernel_def.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/kernel_def.proto index 5e6b839d31582e..88142d3de9584d 100644 --- a/src/frontends/tensorflow/src/proto/kernel_def.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/kernel_def.proto @@ -14,7 +14,7 @@ syntax = "proto3"; package tensorflow; -import "attr_value.proto"; +import "ov_tensorflow/attr_value.proto"; option cc_enable_arenas = true; option java_outer_classname = "KernelDefProtos"; diff --git a/src/frontends/tensorflow/src/proto/log_memory.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/log_memory.proto similarity index 98% rename from src/frontends/tensorflow/src/proto/log_memory.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/log_memory.proto index 96dac4c9ca370d..62489f0e0b8df4 100644 --- a/src/frontends/tensorflow/src/proto/log_memory.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/log_memory.proto @@ -14,7 +14,7 @@ syntax = "proto3"; package tensorflow; -import "tensor_description.proto"; +import "ov_tensorflow/tensor_description.proto"; option cc_enable_arenas = true; option java_outer_classname = "LogMemoryProtos"; diff --git a/src/frontends/tensorflow/src/proto/meta_graph.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/meta_graph.proto similarity index 97% rename from src/frontends/tensorflow/src/proto/meta_graph.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/meta_graph.proto index b6918fa853bf8c..255fb6efeb2f9e 100644 --- a/src/frontends/tensorflow/src/proto/meta_graph.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/meta_graph.proto @@ -14,14 +14,14 @@ syntax = "proto3"; package tensorflow; -import "any.proto"; -import "graph.proto"; -import "op_def.proto"; -import "tensor_shape.proto"; -import "types.proto"; -import "saved_object_graph.proto"; -import "saver.proto"; -import "struct.proto"; +import "google/protobuf/any.proto"; +import "ov_tensorflow/graph.proto"; +import "ov_tensorflow/op_def.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; +import "ov_tensorflow/saved_object_graph.proto"; +import "ov_tensorflow/saver.proto"; +import "ov_tensorflow/struct.proto"; option cc_enable_arenas = true; option java_outer_classname = "MetaGraphProtos"; diff --git a/src/frontends/tensorflow/src/proto/model.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/model.proto similarity index 100% rename from src/frontends/tensorflow/src/proto/model.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/model.proto diff --git a/src/frontends/tensorflow/src/proto/node_def.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/node_def.proto similarity index 99% rename from src/frontends/tensorflow/src/proto/node_def.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/node_def.proto index 573d0f901dd732..b8f3a017a30fc5 100644 --- a/src/frontends/tensorflow/src/proto/node_def.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/node_def.proto @@ -14,7 +14,7 @@ syntax = "proto3"; package tensorflow; -import "attr_value.proto"; +import "ov_tensorflow/attr_value.proto"; option cc_enable_arenas = true; option java_outer_classname = "NodeProto"; diff --git a/src/frontends/tensorflow/src/proto/op_def.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/op_def.proto similarity index 98% rename from src/frontends/tensorflow/src/proto/op_def.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/op_def.proto index 4d5c66c39e16d7..31493fed26ce55 100644 --- a/src/frontends/tensorflow/src/proto/op_def.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/op_def.proto @@ -18,9 +18,10 @@ option java_outer_classname = "OpDefProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/op_def_go_proto"; -import "attr_value.proto"; -import "types.proto"; -import "resource_handle.proto"; + +import "ov_tensorflow/attr_value.proto"; +import "ov_tensorflow/types.proto"; +import "ov_tensorflow/resource_handle.proto"; // Defines an operation. A NodeDef in a GraphDef specifies an Op by // using the "op" field which should match the name of a OpDef. diff --git a/src/frontends/tensorflow/src/proto/reader_base.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/reader_base.proto similarity index 100% rename from src/frontends/tensorflow/src/proto/reader_base.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/reader_base.proto diff --git a/src/frontends/tensorflow/src/proto/remote_fused_graph_execute_info.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/remote_fused_graph_execute_info.proto similarity index 94% rename from src/frontends/tensorflow/src/proto/remote_fused_graph_execute_info.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/remote_fused_graph_execute_info.proto index abfcfdbec08007..3b17878e127cf9 100644 --- a/src/frontends/tensorflow/src/proto/remote_fused_graph_execute_info.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/remote_fused_graph_execute_info.proto @@ -14,9 +14,9 @@ syntax = "proto3"; package tensorflow; -import "graph.proto"; -import "tensor_shape.proto"; -import "types.proto"; +import "ov_tensorflow/graph.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; option cc_enable_arenas = true; option java_outer_classname = "RemoteFusedGraphExecuteInfoProto"; diff --git a/src/frontends/tensorflow/src/proto/resource_handle.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/resource_handle.proto similarity index 96% rename from src/frontends/tensorflow/src/proto/resource_handle.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/resource_handle.proto index 4d872b6d9d8074..19b4dcc3b84ded 100644 --- a/src/frontends/tensorflow/src/proto/resource_handle.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/resource_handle.proto @@ -14,8 +14,8 @@ syntax = "proto3"; package tensorflow; -import "tensor_shape.proto"; -import "types.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; option cc_enable_arenas = true; option java_outer_classname = "ResourceHandle"; diff --git a/src/frontends/tensorflow/src/proto/saved_model.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_model.proto similarity index 97% rename from src/frontends/tensorflow/src/proto/saved_model.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/saved_model.proto index 0034fdfd46dcf8..f8660655229245 100644 --- a/src/frontends/tensorflow/src/proto/saved_model.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_model.proto @@ -14,7 +14,7 @@ syntax = "proto3"; package tensorflow; -import "meta_graph.proto"; +import "ov_tensorflow/meta_graph.proto"; option cc_enable_arenas = true; option java_outer_classname = "SavedModelProtos"; diff --git a/src/frontends/tensorflow/src/proto/saved_object_graph.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_object_graph.proto similarity index 97% rename from src/frontends/tensorflow/src/proto/saved_object_graph.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/saved_object_graph.proto index 671441075c3628..d0b2170044966c 100644 --- a/src/frontends/tensorflow/src/proto/saved_object_graph.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_object_graph.proto @@ -14,13 +14,13 @@ syntax = "proto3"; package tensorflow; -import "any.proto"; -import "tensor_shape.proto"; -import "types.proto"; -import "variable.proto"; -import "versions.proto"; -import "struct.proto"; -import "trackable_object_graph.proto"; +import "google/protobuf/any.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; +import "ov_tensorflow/variable.proto"; +import "ov_tensorflow/versions.proto"; +import "ov_tensorflow/struct.proto"; +import "ov_tensorflow/trackable_object_graph.proto"; option cc_enable_arenas = true; option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; diff --git a/src/frontends/tensorflow/src/proto/saved_tensor_slice.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_tensor_slice.proto similarity index 94% rename from src/frontends/tensorflow/src/proto/saved_tensor_slice.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/saved_tensor_slice.proto index 4645b2bdca9b89..9e628752bb1f5c 100644 --- a/src/frontends/tensorflow/src/proto/saved_tensor_slice.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_tensor_slice.proto @@ -35,11 +35,11 @@ option java_outer_classname = "SavedTensorSliceProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.util"; -import "tensor_shape.proto"; -import "tensor_slice.proto"; -import "tensor.proto"; -import "types.proto"; -import "versions.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/tensor_slice.proto"; +import "ov_tensorflow/tensor.proto"; +import "ov_tensorflow/types.proto"; +import "ov_tensorflow/versions.proto"; // Metadata describing the set of slices of the same tensor saved in a // checkpoint file. diff --git a/src/frontends/tensorflow/src/proto/saver.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/saver.proto similarity index 100% rename from src/frontends/tensorflow/src/proto/saver.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/saver.proto diff --git a/src/frontends/tensorflow/src/proto/step_stats.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/step_stats.proto similarity index 97% rename from src/frontends/tensorflow/src/proto/step_stats.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/step_stats.proto index 04e0864a5aec49..027a1d79ee22e4 100644 --- a/src/frontends/tensorflow/src/proto/step_stats.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/step_stats.proto @@ -14,8 +14,8 @@ syntax = "proto3"; package tensorflow; -import "allocation_description.proto"; -import "tensor_description.proto"; +import "ov_tensorflow/allocation_description.proto"; +import "ov_tensorflow/tensor_description.proto"; option cc_enable_arenas = true; option java_outer_classname = "StepStatsProtos"; diff --git a/src/frontends/tensorflow/src/proto/struct.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/struct.proto similarity index 98% rename from src/frontends/tensorflow/src/proto/struct.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/struct.proto index d03201b685ac79..4126bd98c4a3d3 100644 --- a/src/frontends/tensorflow/src/proto/struct.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/struct.proto @@ -14,9 +14,9 @@ syntax = "proto3"; package tensorflow; -import "tensor.proto"; -import "tensor_shape.proto"; -import "types.proto"; +import "ov_tensorflow/tensor.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; diff --git a/src/frontends/tensorflow/src/proto/summary.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/summary.proto similarity index 99% rename from src/frontends/tensorflow/src/proto/summary.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/summary.proto index 9e4b95f4bc3348..ce326176947dd4 100644 --- a/src/frontends/tensorflow/src/proto/summary.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/summary.proto @@ -14,7 +14,7 @@ syntax = "proto3"; package tensorflow; -import "tensor.proto"; +import "ov_tensorflow/tensor.proto"; option cc_enable_arenas = true; option java_outer_classname = "SummaryProtos"; diff --git a/src/frontends/tensorflow/src/proto/tensor.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor.proto similarity index 96% rename from src/frontends/tensorflow/src/proto/tensor.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/tensor.proto index c2e1fd7eb6b627..42f063536e09e0 100644 --- a/src/frontends/tensorflow/src/proto/tensor.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor.proto @@ -14,9 +14,9 @@ syntax = "proto3"; package tensorflow; -import "resource_handle.proto"; -import "tensor_shape.proto"; -import "types.proto"; +import "ov_tensorflow/resource_handle.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; option cc_enable_arenas = true; option java_outer_classname = "TensorProtos"; diff --git a/src/frontends/tensorflow/src/proto/tensor_bundle.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_bundle.proto similarity index 94% rename from src/frontends/tensorflow/src/proto/tensor_bundle.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_bundle.proto index 43fea749b42172..21af38195c4e11 100644 --- a/src/frontends/tensorflow/src/proto/tensor_bundle.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_bundle.proto @@ -14,10 +14,10 @@ syntax = "proto3"; package tensorflow; -import "tensor_shape.proto"; -import "tensor_slice.proto"; -import "types.proto"; -import "versions.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/tensor_slice.proto"; +import "ov_tensorflow/types.proto"; +import "ov_tensorflow/versions.proto"; option cc_enable_arenas = true; option java_outer_classname = "TensorBundleProtos"; diff --git a/src/frontends/tensorflow/src/proto/tensor_description.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_description.proto similarity index 90% rename from src/frontends/tensorflow/src/proto/tensor_description.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_description.proto index 3ab9c310a6f127..c03e1311c1f386 100644 --- a/src/frontends/tensorflow/src/proto/tensor_description.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_description.proto @@ -14,9 +14,9 @@ syntax = "proto3"; package tensorflow; -import "allocation_description.proto"; -import "tensor_shape.proto"; -import "types.proto"; +import "ov_tensorflow/allocation_description.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; option cc_enable_arenas = true; option java_outer_classname = "TensorDescriptionProtos"; diff --git a/src/frontends/tensorflow/src/proto/tensor_shape.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_shape.proto similarity index 100% rename from src/frontends/tensorflow/src/proto/tensor_shape.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_shape.proto diff --git a/src/frontends/tensorflow/src/proto/tensor_slice.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_slice.proto similarity index 100% rename from src/frontends/tensorflow/src/proto/tensor_slice.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_slice.proto diff --git a/src/frontends/tensorflow/src/proto/trackable_object_graph.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/trackable_object_graph.proto similarity index 98% rename from src/frontends/tensorflow/src/proto/trackable_object_graph.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/trackable_object_graph.proto index f4a8e4da34f129..f0a9617432f617 100644 --- a/src/frontends/tensorflow/src/proto/trackable_object_graph.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/trackable_object_graph.proto @@ -14,7 +14,7 @@ syntax = "proto3"; package tensorflow; -import "wrappers.proto"; +import "google/protobuf/wrappers.proto"; option cc_enable_arenas = true; option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; diff --git a/src/frontends/tensorflow/src/proto/types.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/types.proto similarity index 100% rename from src/frontends/tensorflow/src/proto/types.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/types.proto diff --git a/src/frontends/tensorflow/src/proto/variable.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/variable.proto similarity index 100% rename from src/frontends/tensorflow/src/proto/variable.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/variable.proto diff --git a/src/frontends/tensorflow/src/proto/versions.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/versions.proto similarity index 100% rename from src/frontends/tensorflow/src/proto/versions.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/versions.proto diff --git a/src/frontends/tensorflow/src/proto/xla_data.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/xla_data.proto similarity index 100% rename from src/frontends/tensorflow/src/proto/xla_data.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/xla_data.proto diff --git a/src/frontends/tensorflow/src/tf_utils.cpp b/src/frontends/tensorflow/src/tf_utils.cpp index c72e8e7bb9080a..e298f49f92889f 100644 --- a/src/frontends/tensorflow/src/tf_utils.cpp +++ b/src/frontends/tensorflow/src/tf_utils.cpp @@ -423,7 +423,7 @@ shared_ptr create_loop_for_tf_while(const std::string& while_node_name FRONT_END_GENERAL_CHECK( cond_results.size() == 1 && cond_results[0], "[TensorFlow Frontend] Internal error or inconsistent model: condition body must contain one Result node."); - auto body_condition_output_idx = static_cast(body_results.size()); + auto body_condition_output_idx = body_results.size(); body_model->add_results(cond_results); // type setting for body graph parameters is needed for TensorList support since DT_VARIANT type is present @@ -435,14 +435,18 @@ shared_ptr create_loop_for_tf_while(const std::string& while_node_name loop->set_function(body_model); // body_results may contain less nodes than body_params that means back edge exists not for all body_params - for (size_t input_ind = 0; input_ind < static_cast(body_condition_output_idx); ++input_ind) { + for (size_t input_ind = 0; input_ind < body_condition_output_idx; ++input_ind) { loop->set_merged_input(body_params[input_ind], ov_inputs[input_ind], body_results[input_ind]->input_value(0)); } - loop->set_special_body_ports({-1, body_condition_output_idx}); + loop->set_special_body_ports({-1, static_cast(body_condition_output_idx)}); + // set invariant inputs for the loop + for (size_t input_ind = body_condition_output_idx; input_ind < input_size; ++input_ind) { + loop->set_invariant_input(body_params[input_ind], ov_inputs[input_ind]); + } // set external outputs for Loop node // do not get execution condition outside of the Loop node - for (size_t output_ind = 0; output_ind < static_cast(body_condition_output_idx); ++output_ind) { + for (size_t output_ind = 0; output_ind < body_condition_output_idx; ++output_ind) { loop->get_iter_value(body_results[output_ind]); } loop->validate_and_infer_types(); diff --git a/src/frontends/tensorflow/src/tf_utils.hpp b/src/frontends/tensorflow/src/tf_utils.hpp index 5de9029a816e6c..861fb56f552685 100644 --- a/src/frontends/tensorflow/src/tf_utils.hpp +++ b/src/frontends/tensorflow/src/tf_utils.hpp @@ -4,8 +4,6 @@ #pragma once -#include "attr_value.pb.h" -#include "node_def.pb.h" #include "openvino/core/node.hpp" #include "openvino/core/partial_shape.hpp" #include "openvino/core/runtime_attribute.hpp" @@ -14,9 +12,11 @@ #include "openvino/frontend/node_context.hpp" #include "openvino/op/loop.hpp" #include "openvino/runtime/tensor.hpp" -#include "tensor.pb.h" -#include "tensor_shape.pb.h" -#include "types.pb.h" +#include "ov_tensorflow/attr_value.pb.h" +#include "ov_tensorflow/node_def.pb.h" +#include "ov_tensorflow/tensor.pb.h" +#include "ov_tensorflow/tensor_shape.pb.h" +#include "ov_tensorflow/types.pb.h" namespace ov { namespace frontend { diff --git a/src/frontends/tensorflow/src/variables_index.cpp b/src/frontends/tensorflow/src/variables_index.cpp index c24ffd8112bd09..2dcf3faf9e0b0c 100644 --- a/src/frontends/tensorflow/src/variables_index.cpp +++ b/src/frontends/tensorflow/src/variables_index.cpp @@ -11,8 +11,8 @@ #include "graph_iterator_saved_model.hpp" #include "openvino/core/type/element_type.hpp" #include "openvino/util/mmap_object.hpp" -#include "tensor_bundle.pb.h" -#include "trackable_object_graph.pb.h" +#include "ov_tensorflow/tensor_bundle.pb.h" +#include "ov_tensorflow/trackable_object_graph.pb.h" #ifdef ENABLE_SNAPPY_COMPRESSION # include "snappy.h" diff --git a/src/frontends/tensorflow/src/variables_index.hpp b/src/frontends/tensorflow/src/variables_index.hpp index df852a627994e7..aa805b264bc3d1 100644 --- a/src/frontends/tensorflow/src/variables_index.hpp +++ b/src/frontends/tensorflow/src/variables_index.hpp @@ -9,7 +9,7 @@ #include "graph_iterator_proto.hpp" #include "openvino/util/file_util.hpp" #include "openvino/util/mmap_object.hpp" -#include "saved_model.pb.h" +#include "ov_tensorflow/saved_model.pb.h" namespace ov { namespace frontend { diff --git a/src/frontends/tensorflow/tests/convert_model.cpp b/src/frontends/tensorflow/tests/convert_model.cpp index fc00a6784963e3..f6ec18cf9cc12c 100644 --- a/src/frontends/tensorflow/tests/convert_model.cpp +++ b/src/frontends/tensorflow/tests/convert_model.cpp @@ -15,7 +15,8 @@ static const std::vector models{ std::string("2in_2out/2in_2out.pb"), std::string("forward_edge_model/forward_edge_model.pbtxt"), std::string("forward_edge_model2/forward_edge_model2.pbtxt"), - std::string("concat_with_non_constant_axis/concat_with_non_constant_axis.pbtxt")}; + std::string("concat_with_non_constant_axis/concat_with_non_constant_axis.pbtxt"), + std::string("gather_tree_model/gather_tree_model.pbtxt")}; INSTANTIATE_TEST_SUITE_P(TFConvertModelTest, FrontEndConvertModelTest, diff --git a/src/frontends/tensorflow/tests/test_models/models_pbtxt/gather_tree_model.pbtxt b/src/frontends/tensorflow/tests/test_models/models_pbtxt/gather_tree_model.pbtxt new file mode 100644 index 00000000000000..54351036dd72a2 --- /dev/null +++ b/src/frontends/tensorflow/tests/test_models/models_pbtxt/gather_tree_model.pbtxt @@ -0,0 +1,103 @@ +node { + name: "step_ids" + op: "Placeholder" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 20 + } + dim { + size: 2 + } + dim { + size: 30 + } + } + } + } +} +node { + name: "parent_ids" + op: "Placeholder" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 20 + } + dim { + size: 2 + } + dim { + size: 30 + } + } + } + } +} +node { + name: "max_seq_len" + op: "Placeholder" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + } + } + } +} +node { + name: "end_token" + op: "Placeholder" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "shape" + value { + shape { + } + } + } +} +node { + name: "Addons>GatherTree" + op: "Addons>GatherTree" + input: "step_ids" + input: "parent_ids" + input: "max_seq_len" + input: "end_token" + attr { + key: "T" + value { + type: DT_INT32 + } + } +} diff --git a/src/frontends/tensorflow/tests/tf_utils.cpp b/src/frontends/tensorflow/tests/tf_utils.cpp index 120b8ffab8659d..d742b53dcf8704 100644 --- a/src/frontends/tensorflow/tests/tf_utils.cpp +++ b/src/frontends/tensorflow/tests/tf_utils.cpp @@ -16,6 +16,8 @@ namespace frontend { namespace tensorflow { namespace tests { +const std::string TF_FE = "tf"; + shared_ptr convert_model(const string& model_path, const ConversionExtension::Ptr& conv_ext, const vector& input_names, diff --git a/src/frontends/tensorflow/tests/tf_utils.hpp b/src/frontends/tensorflow/tests/tf_utils.hpp index 1c48a95c85fee8..80addd43d61e9e 100644 --- a/src/frontends/tensorflow/tests/tf_utils.hpp +++ b/src/frontends/tensorflow/tests/tf_utils.hpp @@ -14,7 +14,7 @@ namespace ov { namespace frontend { namespace tensorflow { namespace tests { -static const std::string TF_FE = "tf"; +extern const std::string TF_FE; // a wrapper to create TensorFlow Frontend and configure the conversion pipeline // by registering new translator via extension, specifying (new) inputs, their shapes and types diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp index 54f1dff243efd1..29efb83547d263 100644 --- a/src/frontends/tensorflow_common/include/common_op_table.hpp +++ b/src/frontends/tensorflow_common/include/common_op_table.hpp @@ -72,6 +72,7 @@ OP_CONVERTER_NAMED(translate_fused_batch_norm_op); OP_CONVERTER(translate_gather_op); OP_CONVERTER(translate_gather_v2_op); OP_CONVERTER(translate_gather_nd_op); +OP_CONVERTER(translate_gather_tree_op); OP_CONVERTER(translate_identity_op); OP_CONVERTER(translate_identity_n_op); OP_CONVERTER(translate_input_arg_op); @@ -142,9 +143,12 @@ OP_CONVERTER(translate_tensor_list_set_item_op); OP_CONVERTER(translate_tensor_list_stack_op); OP_CONVERTER(translate_tensor_list_resize_op); OP_CONVERTER(translate_tile_op); +OP_CONVERTER(translate_tobool_op); OP_CONVERTER_NAMED(translate_top_k_op); OP_CONVERTER_NAMED(translate_top_k_v2_op); OP_CONVERTER(translate_transpose_op); +OP_CONVERTER(translate_truncate_div_op); +OP_CONVERTER(translate_truncate_mod_op); OP_CONVERTER(translate_unpack_op); OP_CONVERTER(translate_unravel_index_op); OP_CONVERTER(translate_unsorted_segment_sum_op); diff --git a/src/frontends/tensorflow_common/include/helper_ops/merge.hpp b/src/frontends/tensorflow_common/include/helper_ops/merge.hpp index eb7e611f3e21f0..6261dd0e67c229 100644 --- a/src/frontends/tensorflow_common/include/helper_ops/merge.hpp +++ b/src/frontends/tensorflow_common/include/helper_ops/merge.hpp @@ -33,20 +33,34 @@ class Merge : public InternalOperation { ov::PartialShape output_data_shape = ov::PartialShape::dynamic(); auto input_size = get_input_size(); - bool merge_output_shape = true; for (size_t input_ind = 0; input_ind < input_size; ++input_ind) { auto input_type = get_input_element_type(input_ind); if (input_type.is_static()) { output_data_type = input_type; } - // check if it still needs to merge input shapes - // if yes, it tries to merge them - if (merge_output_shape && - !PartialShape::merge_into(output_data_shape, get_input_partial_shape(input_ind))) { - merge_output_shape = false; - // reset output shape to dynamic rank + auto input_shape = get_input_partial_shape(input_ind); + if (input_shape.rank().is_dynamic()) { + continue; + } + + if (output_data_shape.rank().is_dynamic()) { + // firstly met shape of static rank + // immediately use this shape of static rank + output_data_shape = input_shape; + } else if (output_data_shape.rank().is_static() && + output_data_shape.rank().get_length() != input_shape.rank().get_length()) { + // different inputs have different rank means output must be of a dynamic rank output_data_shape = ov::PartialShape::dynamic(); + break; + } else { + auto output_rank = output_data_shape.rank().get_length(); + for (int64_t dim_ind = 0; dim_ind < output_rank; ++dim_ind) { + if (input_shape[dim_ind] != output_data_shape[dim_ind]) { + // different inputs can have different dimensions so it must combine them + output_data_shape[dim_ind] = ov::Dimension::dynamic(); + } + } } } diff --git a/src/frontends/tensorflow_common/include/helper_ops/next_iteration.hpp b/src/frontends/tensorflow_common/include/helper_ops/next_iteration.hpp index eb262b4307af7f..e556c9ad4478da 100644 --- a/src/frontends/tensorflow_common/include/helper_ops/next_iteration.hpp +++ b/src/frontends/tensorflow_common/include/helper_ops/next_iteration.hpp @@ -43,6 +43,10 @@ class NextIteration : public InternalOperation { producer_output_port_idx = m_producer_output_port_idx; } + void set_output_shape_and_type(const ov::PartialShape& output_shape, const ov::element::Type& output_type) { + set_output_type(0, output_type, output_shape); + } + private: bool m_back_edge_set; std::string m_producer_name; diff --git a/src/frontends/tensorflow_common/include/helper_ops/tensor_array.hpp b/src/frontends/tensorflow_common/include/helper_ops/tensor_array.hpp new file mode 100644 index 00000000000000..030ff12d5b68c5 --- /dev/null +++ b/src/frontends/tensorflow_common/include/helper_ops/tensor_array.hpp @@ -0,0 +1,60 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "internal_operation.hpp" + +namespace ov { +namespace frontend { +namespace tensorflow { + +// Internal operation for TensorArrayV3 +// An array of Tensors of given size +// It has two outputs: +// 1. handle - resource (a reference) for tensor array +// 2. flow_out - float type will be used for storing tensor array +class TensorArrayV3 : public InternalOperation { +public: + OPENVINO_OP("TensorArrayV3", "ov::frontend::tensorflow", InternalOperation); + + TensorArrayV3(const Output& size, + const ov::element::Type element_type, + const std::shared_ptr& decoder = std::make_shared()) + : InternalOperation(decoder, OutputVector{size}, 2, "TensorArrayV3"), + m_element_type(element_type), + m_element_rank(-1) { + validate_and_infer_types(); + } + + void validate_and_infer_types() override { + set_output_type(0, m_element_type, ov::PartialShape::dynamic()); + set_output_type(1, m_element_type, ov::PartialShape::dynamic()); + } + + ov::element::Type get_element_type() const { + return m_element_type; + } + + int64_t get_element_rank() const { + return m_element_rank; + } + + void set_element_rank(int64_t element_rank) { + FRONT_END_GENERAL_CHECK( + element_rank >= 0, + "[TensorFlow Frontend] internal error: negavite element rank tries to set for TensorArrayV3"); + m_element_rank = element_rank; + } + +private: + ov::element::Type m_element_type; + int64_t m_element_rank; +}; + +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/include/helper_transforms/tensor_array_v3_replacer.hpp b/src/frontends/tensorflow_common/include/helper_transforms/tensor_array_v3_replacer.hpp new file mode 100644 index 00000000000000..42e5a0ad754ea7 --- /dev/null +++ b/src/frontends/tensorflow_common/include/helper_transforms/tensor_array_v3_replacer.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "openvino/pass/graph_rewrite.hpp" +#include "openvino/pass/pass.hpp" + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace pass { + +// This transformation replaces internal operation TensorArrayV3 with a Constant +// that simulates initial state of tensor array container +class TensorArrayV3Replacer : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ov::frontend::tensorflow::pass::TensorArrayV3Replacer"); + TensorArrayV3Replacer(); +}; + +} // namespace pass +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/helper_transforms/tensor_array_v3_replacer.cpp b/src/frontends/tensorflow_common/src/helper_transforms/tensor_array_v3_replacer.cpp new file mode 100644 index 00000000000000..72ed922511cd98 --- /dev/null +++ b/src/frontends/tensorflow_common/src/helper_transforms/tensor_array_v3_replacer.cpp @@ -0,0 +1,71 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "helper_transforms/tensor_array_v3_replacer.hpp" + +#include "helper_ops/tensor_array.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/utils/utils.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; +using namespace ov::pass; + +ov::frontend::tensorflow::pass::TensorArrayV3Replacer::TensorArrayV3Replacer() { + auto tensor_array_v3 = pattern::wrap_type(); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + NodeRegistry rg; + + auto tensor_array_v3 = dynamic_pointer_cast(m.get_match_root()); + if (!tensor_array_v3) { + return false; + } + + int32_t tensor_element_rank = static_cast(tensor_array_v3->get_element_rank()); + if (tensor_element_rank < 0) { + return false; + } + + // retrieve all TensorArrayV3 inputs + auto size = tensor_array_v3->input_value(0); + auto element_type = tensor_array_v3->get_element_type(); + + // adjust size to have it of shape [1] for further concatenation with element shape + auto new_size_shape = rg.make(element::i32, Shape{1}, 1); + auto new_size = rg.make(size, new_size_shape, false); + + // create a vector of size element_shape.rank() with ones + // and compute a shape of initial tensor array [size, 1, ..., 1] + Output target_shape; + if (tensor_element_rank == 0) { + target_shape = new_size->output(0); + } else { + vector ones(tensor_element_rank, 1); + auto ones_const = rg.make(element::i32, Shape{ones.size()}, ones); + target_shape = rg.make(OutputVector{new_size, ones_const}, 0)->output(0); + } + + // create initial tensor array + auto scalar_value = make_shared(element_type, Shape{}, vector{0}); + auto initial_tensor_array = make_shared(scalar_value, target_shape); + + // preserve names of the node and the output tensor + initial_tensor_array->set_friendly_name(tensor_array_v3->get_friendly_name()); + copy_runtime_info(tensor_array_v3, rg.get()); + + ov::replace_node(tensor_array_v3, + ov::OutputVector{initial_tensor_array->output(0), initial_tensor_array->output(0)}); + return true; + }; + + auto m = + std::make_shared(tensor_array_v3, "ov::frontend::tensorflow::pass::TensorArrayV3Replacer"); + register_matcher(m, callback); +} diff --git a/src/frontends/tensorflow_common/src/op/gather_tree.cpp b/src/frontends/tensorflow_common/src/op/gather_tree.cpp new file mode 100644 index 00000000000000..e349efe6784e64 --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/gather_tree.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/gather_tree.hpp" + +#include "common_op_table.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/reshape.hpp" + +using namespace std; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +OutputVector translate_gather_tree_op(const NodeContext& node) { + default_op_checks(node, 4, {"GatherTree", "Addons>GatherTree"}); + auto step_ids = node.get_input(0); + auto parent_ids = node.get_input(1); + auto max_sequence_lengths = node.get_input(2); + auto end_token = node.get_input(3); + + // adjust end_token that must be a scalar + auto new_shape_end_token = make_shared(element::i32, Shape{0}, vector{}); + end_token = make_shared(end_token, new_shape_end_token, false); + + auto gather_tree = make_shared(step_ids, parent_ids, max_sequence_lengths, end_token); + set_node_name(node.get_name(), gather_tree); + + return {gather_tree}; +} + +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/tobool.cpp b/src/frontends/tensorflow_common/src/op/tobool.cpp new file mode 100644 index 00000000000000..a8d595800a4f5c --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/tobool.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/equal.hpp" +#include "openvino/op/greater.hpp" +#include "openvino/op/logical_and.hpp" +#include "openvino/op/logical_or.hpp" +#include "openvino/op/not_equal.hpp" +#include "openvino/op/reduce_prod.hpp" +#include "openvino/op/select.hpp" +#include "openvino/op/shape_of.hpp" + +using namespace std; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { +OutputVector translate_tobool_op(const NodeContext& node) { + // (rank(x) == 0 && x != 0) || (rank > 0 && ReduceProd(ShapeOf(x))) > 0 + + default_op_checks(node, 1, {"ToBool"}); + auto x = node.get_input(0); + + // prepare auxiliary zero and zero constants of the same type as the inputs + auto zero = create_same_type_const_scalar(x, 0); + auto zero_2 = make_shared(element::i32, Shape{}, 0); + auto true_const = make_shared(element::boolean, Shape{}, true); + auto false_const = make_shared(element::boolean, Shape{}, false); + // compute a mask to get rank(x) == 0 + auto x_rank = compute_subgraph_scalar_rank(x, element::i32); + + // compute rank(x) == 0 + auto is_zero = make_shared(x_rank, zero_2); + + // compute mask to get x != 0 + auto is_not_zero = make_shared(x, zero); + + // compute (rank(x) == 0 && x != 0) + auto logical_and = make_shared(is_zero, is_not_zero); + // compute rank(x) > 0 + auto greater_than_zero = make_shared(x_rank, zero_2); + + // compute ShapeOf(x) + auto cond_shape = make_shared(x, element::i32); + // compute ReduceProd(ShapeOf(x))) and axis + auto axis = make_shared(element::i32, Shape{}, 0); + auto reduce_prod = make_shared(cond_shape, axis); + + // compute ReduceProd(ShapeOf(x))) > 0 + auto greater_than__zero_2 = make_shared(reduce_prod, zero_2); + // compute (rank > 0 && ReduceProd(ShapeOf(x))) > 0 + auto logical_and_2 = make_shared(greater_than_zero, greater_than__zero_2); + + auto logical_or = make_shared(logical_and, logical_and_2); + + auto tobool = make_shared(logical_or, true_const, false_const); + set_node_name(node.get_name(), tobool); + return tobool->outputs(); +} +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov \ No newline at end of file diff --git a/src/frontends/tensorflow_common/src/op/truncate_div.cpp b/src/frontends/tensorflow_common/src/op/truncate_div.cpp new file mode 100644 index 00000000000000..b725bbd76b44a3 --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/truncate_div.cpp @@ -0,0 +1,36 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "openvino/op/ceiling.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/equal.hpp" +#include "openvino/op/floor.hpp" +#include "openvino/op/less.hpp" +#include "openvino/op/mod.hpp" +#include "openvino/op/select.hpp" + +using namespace std; +using namespace ov::opset10; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { +OutputVector translate_truncate_div_op(const NodeContext& node) { + default_op_checks(node, 2, {"TruncateDiv"}); + auto x = node.get_input(0); + auto y = node.get_input(1); + + auto res = make_shared(x, y); + auto is_res_negative = make_shared(res, create_same_type_const_scalar(x, 0)); + auto final_res = make_shared(is_y_negative, negative_y, y); + + // check if floor_mod == zero + auto floor_mod = make_shared(x, y); + auto is_zero = make_shared(floor_mod, create_same_type_const_scalar(floor_mod, 0)); + + // floor_mod - y + auto other_res = make_shared(floor_mod, y); + + // select operation to handle the sign + auto result = make_shared(is_x_negative, other_res, floor_mod)); + + set_node_name(node.get_name(), result); + return result->outputs(); +} +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tests/frontend/shared/src/library_extension.cpp b/src/frontends/tests/frontend/shared/src/library_extension.cpp index a2257f8fca116b..8a6bb23d82f0ef 100644 --- a/src/frontends/tests/frontend/shared/src/library_extension.cpp +++ b/src/frontends/tests/frontend/shared/src/library_extension.cpp @@ -9,6 +9,7 @@ #include "common_test_utils/file_utils.hpp" #include "openvino/op/relu.hpp" #include "openvino/op/swish.hpp" +#include "openvino/runtime/core.hpp" #include "utils.hpp" using namespace ov::frontend; @@ -88,3 +89,30 @@ TEST_P(FrontendLibraryExtensionTest, verifyFunctions) { nodes.end()); } } + +TEST_P(FrontendLibraryExtensionTest, loadExtensionBeforeFrontend) { + // release all frontends internally + ov::shutdown(); + + const auto& lib_path = get_lib_path("test_builtin_extensions"); + + ov::Core core; + core.add_extension(lib_path); + + auto model = core.read_model(m_param.m_modelName); + ASSERT_NE(nullptr, model); + + const auto nodes = model->get_ops(); + ASSERT_EQ(std::find_if(nodes.begin(), + nodes.end(), + [](const std::shared_ptr& n) { + return ov::is_type(n); + }), + nodes.end()); + ASSERT_NE(std::find_if(nodes.begin(), + nodes.end(), + [](const std::shared_ptr& n) { + return ov::is_type(n); + }), + nodes.end()); +} diff --git a/src/inference/dev_api/ie_icore.hpp b/src/inference/dev_api/ie_icore.hpp index 8852c1f4ecd8c9..2210f26bbfc6ef 100644 --- a/src/inference/dev_api/ie_icore.hpp +++ b/src/inference/dev_api/ie_icore.hpp @@ -191,7 +191,7 @@ class ICore : public ov::ICore { virtual InferenceEngine::RemoteContext::Ptr CreateContext(const std::string& deviceName, const ov::AnyMap&) = 0; /** - * @brief Get only configs that are suppored by device + * @brief Get only configs that are supported by device * @param deviceName Name of a device * @param config Map of configs that can contains configs that are not supported by device * @return map of configs that are supported by device diff --git a/src/inference/dev_api/openvino/runtime/icore.hpp b/src/inference/dev_api/openvino/runtime/icore.hpp index e4d0a98f5be968..de2ca2ebf07c57 100644 --- a/src/inference/dev_api/openvino/runtime/icore.hpp +++ b/src/inference/dev_api/openvino/runtime/icore.hpp @@ -222,7 +222,7 @@ class OPENVINO_RUNTIME_API ICore { } /** - * @brief Get only properties that are suppored by specified device + * @brief Get only properties that are supported by specified device * @param full_device_name Name of a device (can be either virtual or hardware) * @param properties Properties that can contains configs that are not supported by device * @return map of properties that are supported by device diff --git a/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp b/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp index ed15438de2eb83..938fa8924fbb05 100644 --- a/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp +++ b/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp @@ -124,6 +124,12 @@ class OPENVINO_RUNTIME_API ISyncInferRequest : public IInferRequest { } }; + /** + * @brief Finds input or output port + * @return structure which contains index of Input/Output or report that port wasn't found + */ + FoundPort find_port(const ov::Output& port) const; + /** * @brief Converts batched tensors to tensor */ @@ -157,12 +163,9 @@ class OPENVINO_RUNTIME_API ISyncInferRequest : public IInferRequest { std::shared_ptr m_compiled_model; // Mutable to return reference to ov::Tensor mutable std::unordered_map, ov::SoPtr> m_tensors; - - /** - * @brief Finds input or output port - * @return structure which contains index of Input/Output or report that port wasn't found - */ - FoundPort find_port(const ov::Output& port) const; + // Cache ports + mutable std::unordered_map m_cached_ports; + mutable std::mutex m_cache_mutex; }; }; // namespace ov diff --git a/src/inference/src/dev/isync_infer_request.cpp b/src/inference/src/dev/isync_infer_request.cpp index 8e0f554fedd900..94d714d9f134a5 100644 --- a/src/inference/src/dev/isync_infer_request.cpp +++ b/src/inference/src/dev/isync_infer_request.cpp @@ -4,6 +4,7 @@ #include "openvino/runtime/isync_infer_request.hpp" +#include #include #include @@ -17,6 +18,7 @@ #include "openvino/runtime/make_tensor.hpp" #include "openvino/runtime/plugin_itt.hpp" #include "openvino/runtime/tensor.hpp" +#include "openvino/util/common_util.hpp" namespace { void check_batched_tensors(const ov::Output& input, @@ -93,14 +95,18 @@ ov::IInferRequest::~IInferRequest() = default; ov::ISyncInferRequest::ISyncInferRequest(const std::shared_ptr& compiled_model) : m_compiled_model(compiled_model) { OPENVINO_ASSERT(m_compiled_model); - // Create map of empty tensors - for (const auto& input : get_inputs()) { - if (m_tensors.find(input.get_tensor_ptr()) == m_tensors.end()) - m_tensors[input.get_tensor_ptr()] = ov::SoPtr(); - } - for (const auto& output : get_outputs()) { - if (m_tensors.find(output.get_tensor_ptr()) == m_tensors.end()) - m_tensors[output.get_tensor_ptr()] = ov::SoPtr(); + // Create map of empty tensors and cache ports from the compiled model + auto port_type = ov::ISyncInferRequest::FoundPort::Type::INPUT; + for (const auto& ports : {get_inputs(), get_outputs()}) { + for (size_t i = 0; i < ports.size(); i++) { + const auto& port = ports[i]; + if (m_tensors.find(port.get_tensor_ptr()) == m_tensors.end()) + m_tensors[port.get_tensor_ptr()] = ov::SoPtr(); + size_t port_hash = ov::util::hash_combine(std::vector{std::hash()(port.get_node()), + std::hash()(port.get_index())}); + m_cached_ports[port_hash] = {i, port_type}; + } + port_type = ov::ISyncInferRequest::FoundPort::Type::OUTPUT; } } @@ -118,18 +124,30 @@ ov::ISyncInferRequest::FoundPort ov::ISyncInferRequest::find_port(const ov::Outp // This function is hotspot, need optimization. auto check_nodes = [](const ov::Node* node1, const ov::Node* node2) { return node1 == node2 || - (node1->get_friendly_name() == node2->get_friendly_name() && - node1->get_type_info() == node2->get_type_info() && - node1->outputs().size() == node2->outputs().size() && node1->inputs().size() == node2->inputs().size()); + (node1->outputs().size() == node2->outputs().size() && + node1->inputs().size() == node2->inputs().size() && node1->get_type_info() == node2->get_type_info() && + node1->get_friendly_name() == node2->get_friendly_name()); }; + // Find port without caching work slow because we need each time iterate over all ports and compare different + // strings So use WA with caching in order to make 2+ calls for the same ports faster. + // Calculate hash for the port + size_t port_hash = ov::util::hash_combine( + std::vector{std::hash()(port.get_node()), std::hash()(port.get_index())}); + { + std::lock_guard lock(m_cache_mutex); + if (m_cached_ports.find(port_hash) != m_cached_ports.end()) { + // Cached port for the hash was found + return m_cached_ports[port_hash]; + } + } ov::ISyncInferRequest::FoundPort::Type type = ov::ISyncInferRequest::FoundPort::Type::INPUT; for (const auto& ports : {get_inputs(), get_outputs()}) { for (size_t i = 0; i < ports.size(); i++) { - // TODO: Fix port comparison - // if (ports[i] == port) { if (ports[i].get_index() == port.get_index() && ports[i].get_names() == port.get_names() && check_nodes(ports[i].get_node(), port.get_node())) { - return {i, type}; + std::lock_guard lock(m_cache_mutex); + m_cached_ports[port_hash] = {i, type}; + return m_cached_ports[port_hash]; } } type = ov::ISyncInferRequest::FoundPort::Type::OUTPUT; @@ -275,10 +293,10 @@ void ov::ISyncInferRequest::allocate_tensor( void ov::ISyncInferRequest::check_tensors() const { const auto& inputs = m_compiled_model->inputs(); for (size_t i = 0; i < inputs.size(); i++) { - check_tensor(inputs[i], get_tensor_ptr(inputs[i])); + check_tensor(inputs[i], m_tensors.at(inputs[i].get_tensor_ptr())); } const auto& outputs = m_compiled_model->outputs(); for (size_t i = 0; i < outputs.size(); i++) { - check_tensor(outputs[i], get_tensor_ptr(outputs[i])); + check_tensor(outputs[i], m_tensors.at(outputs[i].get_tensor_ptr())); } } diff --git a/src/inference/src/dev/threading/cpu_streams_executor.cpp b/src/inference/src/dev/threading/cpu_streams_executor.cpp index dba0082d647080..691a3951615460 100644 --- a/src/inference/src/dev/threading/cpu_streams_executor.cpp +++ b/src/inference/src/dev/threading/cpu_streams_executor.cpp @@ -4,6 +4,7 @@ #include "openvino/runtime/threading/cpu_streams_executor.hpp" +#include #include #include #include @@ -22,8 +23,6 @@ namespace ov { namespace threading { -// maybe there are two CPUStreamsExecutors in the same thread. -thread_local std::map> t_stream_count_map; struct CPUStreamsExecutor::Impl { struct Stream { #if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO @@ -335,12 +334,58 @@ struct CPUStreamsExecutor::Impl { // will be counted by thread_local t_stream_count_map. // when the customer's thread is destoryed, the stream's count will became 1, // Call local() will reuse one of them, and release others. + // it's only a workaround for ticket CVS-111490, please be carefully when need to modify + // CustomeThreadLocal::local(), especially like operations that will affect the count of + // CustomThreadLocal::ThreadId class CustomThreadLocal : public ThreadLocal> { + class ThreadTracker { + public: + explicit ThreadTracker(const std::thread::id& id) + : _id(id), + _count_ptr(std::make_shared(1)) {} + ~ThreadTracker() { + _count_ptr->fetch_sub(1); + } + std::shared_ptr fetch() { + auto new_ptr = std::shared_ptr(new ThreadTracker(*this)); + auto pre_valule = new_ptr.get()->_count_ptr->fetch_add(1); + OPENVINO_ASSERT(pre_valule == 1, "this value must be 1, please check code CustomThreadLocal::local()"); + return new_ptr; + } + const std::thread::id& get_id() const { + return _id; + } + int count() const { + return *(_count_ptr.get()); + } + + private: + // disable all copy and move semantics, user only can use fetch() + // to create a new instance with a shared count num; + ThreadTracker(ThreadTracker const&) = default; + ThreadTracker(ThreadTracker&&) = delete; + ThreadTracker& operator=(ThreadTracker const&) = delete; + ThreadTracker& operator=(ThreadTracker&&) = delete; + std::thread::id _id; + std::shared_ptr _count_ptr; + }; + public: CustomThreadLocal(std::function()> callback_construct, Impl* impl) : ThreadLocal>(callback_construct), _impl(impl) {} std::shared_ptr local() { + // maybe there are two CPUStreamsExecutors in the same thread. + static thread_local std::map> t_stream_count_map; + // fix the memory leak issue that CPUStreamsExecutor is already released, + // but still exists CustomThreadLocal::ThreadTracker in t_stream_count_map + for (auto it = t_stream_count_map.begin(); it != t_stream_count_map.end();) { + if (this != it->first && it->second->count() == 1) { + t_stream_count_map.erase(it++); + } else { + it++; + } + } auto id = std::this_thread::get_id(); auto search = _thread_ids.find(id); if (search != _thread_ids.end()) { @@ -348,14 +393,13 @@ struct CPUStreamsExecutor::Impl { } std::lock_guard guard(_stream_map_mutex); for (auto& item : _stream_map) { - if (*(item.first.get()) == id) { - t_stream_count_map[(void*)this] = item.first; + if (item.first->get_id() == id) { return item.second; } } std::shared_ptr stream = nullptr; for (auto it = _stream_map.begin(); it != _stream_map.end();) { - if (it->first.use_count() == 1) { + if (it->first->count() == 1) { if (stream == nullptr) { stream = it->second; } @@ -367,9 +411,10 @@ struct CPUStreamsExecutor::Impl { if (stream == nullptr) { stream = std::make_shared(_impl); } - auto id_ptr = std::make_shared(id); - t_stream_count_map[(void*)this] = id_ptr; - _stream_map[id_ptr] = stream; + auto tracker_ptr = std::make_shared(id); + t_stream_count_map[(void*)this] = tracker_ptr; + auto new_tracker_ptr = tracker_ptr->fetch(); + _stream_map[new_tracker_ptr] = stream; return stream; } @@ -382,7 +427,7 @@ struct CPUStreamsExecutor::Impl { private: std::set _thread_ids; Impl* _impl; - std::map, std::shared_ptr> _stream_map; + std::map, std::shared_ptr> _stream_map; std::mutex _stream_map_mutex; }; @@ -397,7 +442,7 @@ struct CPUStreamsExecutor::Impl { auto numaNodes = get_available_numa_nodes(); if (_config._streams != 0) { std::copy_n(std::begin(numaNodes), - std::min(static_cast(_config._streams), numaNodes.size()), + std::min(_config._streams, numaNodes.size()), std::back_inserter(_usedNumaNodes)); } else { _usedNumaNodes = numaNodes; diff --git a/src/inference/src/ie_network_reader.cpp b/src/inference/src/ie_network_reader.cpp index 7fe34b42ed7948..f5aca3586e8339 100644 --- a/src/inference/src/ie_network_reader.cpp +++ b/src/inference/src/ie_network_reader.cpp @@ -20,6 +20,7 @@ #include "ie_icnn_network.hpp" #include "ie_input_info.hpp" #include "openvino/frontend/manager.hpp" +#include "openvino/runtime/shared_buffer.hpp" #ifdef ENABLE_IR_V7_READER # include "legacy/ie_ir_version.hpp" #endif @@ -388,8 +389,8 @@ CNNNetwork details::ReadNetwork(const std::string& model, ov::AnyVector params{&modelStream}; if (weights) { char* data = weights->cbuffer().as(); - std::shared_ptr weights_buffer = - std::make_shared>(data, weights->byteSize(), weights); + std::shared_ptr weights_buffer = + std::make_shared>(data, weights->byteSize(), weights); params.emplace_back(weights_buffer); } diff --git a/src/inference/src/model_reader.cpp b/src/inference/src/model_reader.cpp index 1837d75a2d44aa..bc67f6d21b225a 100644 --- a/src/inference/src/model_reader.cpp +++ b/src/inference/src/model_reader.cpp @@ -9,6 +9,8 @@ #include "openvino/core/model.hpp" #include "openvino/core/preprocess/pre_post_process.hpp" #include "openvino/frontend/manager.hpp" +#include "openvino/runtime/aligned_buffer.hpp" +#include "openvino/runtime/shared_buffer.hpp" #include "openvino/util/file_util.hpp" #include "transformations/utils/utils.hpp" @@ -155,10 +157,10 @@ std::shared_ptr read_model(const std::string& model, ov::AnyVector params{&modelStream}; if (weights) { - std::shared_ptr weights_buffer = - std::make_shared>(reinterpret_cast(weights.data()), - weights.get_byte_size(), - weights); + std::shared_ptr weights_buffer = + std::make_shared>(reinterpret_cast(weights.data()), + weights.get_byte_size(), + weights); params.emplace_back(weights_buffer); } diff --git a/src/plugins/hetero/src/async_infer_request.cpp b/src/plugins/hetero/src/async_infer_request.cpp index a4f5f36e15f0f5..e9d3643b5baa43 100644 --- a/src/plugins/hetero/src/async_infer_request.cpp +++ b/src/plugins/hetero/src/async_infer_request.cpp @@ -7,7 +7,7 @@ struct RequestExecutor : ov::threading::ITaskExecutor { explicit RequestExecutor(ov::SoPtr& request) : m_request(request) { m_request->set_callback([this](std::exception_ptr exception_ptr) mutable { - m_exception_ptr = exception_ptr; + m_exception_ptr = std::move(exception_ptr); auto task = std::move(m_task); task(); }); diff --git a/src/plugins/hetero/src/sync_infer_request.cpp b/src/plugins/hetero/src/sync_infer_request.cpp index 21fbd0b4f2e2a3..0bb4bc4b7e9a4f 100644 --- a/src/plugins/hetero/src/sync_infer_request.cpp +++ b/src/plugins/hetero/src/sync_infer_request.cpp @@ -49,20 +49,15 @@ ov::hetero::InferRequest::InferRequest(const std::shared_ptr ov::hetero::InferRequest::get_request(const ov::Output& port) const { - auto check_nodes = [](const ov::Node* node1, const ov::Node* node2) { - return node1 == node2 || - (node1->get_friendly_name() == node2->get_friendly_name() && - node1->get_type_info() == node2->get_type_info() && - node1->outputs().size() == node2->outputs().size() && node1->inputs().size() == node2->inputs().size()); - }; - - for (const auto& kvp : m_port_to_subrequest_idx) { - if (kvp.first.get_index() == port.get_index() && kvp.first.get_names() == port.get_names() && - check_nodes(kvp.first.get_node(), port.get_node())) { - return m_subrequests[kvp.second]; - } + auto found_port = find_port(port); + ov::Output internal_port; + OPENVINO_ASSERT(found_port.found(), "Cannot find infer request for port ", port); + if (found_port.is_input()) { + internal_port = get_inputs().at(found_port.idx); + } else { + internal_port = get_outputs().at(found_port.idx); } - OPENVINO_THROW("Cannot find infer request for port ", port); + return m_subrequests[m_port_to_subrequest_idx.at(internal_port)]; } ov::SoPtr ov::hetero::InferRequest::get_tensor(const ov::Output& port) const { diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index 734fd462acbebb..ba3474fddf2cc6 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -450,7 +450,9 @@ void Edge::init() { DEBUG_LOG(*this, " getBaseEdge() return itself"); changeStatus(Status::NeedAllocation); } else { - if (edgePtr->getParent()->isConstant() && !edgePtr->getChild()->isConstant()) { + if (Type::Input == edgePtr->getParent()->getType() && + edgePtr->getParent()->isConstant() && + !edgePtr->getChild()->isConstant()) { changeStatus(Status::NeedAllocation); DEBUG_LOG(*this, " edge inplace from ", *edgePtr, " is broken!"); return; diff --git a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp index b23e068244512a..da6e2e39442777 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp @@ -25,25 +25,25 @@ #include -using namespace std; +namespace ov { #define CREATE_SNIPPETS_EMITTER(e_type) { \ - [this](const ov::snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr { \ + [this](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr { \ return std::make_shared(h.get(), isa, expr); \ }, \ [](const std::shared_ptr& n) -> std::set> { \ return e_type::get_supported_precisions(n); \ } \ -}; +} #define CREATE_CPU_EMITTER(e_type) { \ - [this](const ov::snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr { \ + [this](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr { \ return std::make_shared(h.get(), isa, expr->get_node()); \ }, \ - [](const std::shared_ptr& n) -> std::set> { \ + [](const std::shared_ptr& n) -> std::set> { \ return e_type::get_supported_precisions(n); \ } \ -}; +} class jit_snippet : public dnnl::impl::cpu::x64::jit_generator { public: @@ -58,94 +58,95 @@ class jit_snippet : public dnnl::impl::cpu::x64::jit_generator { } }; -ov::intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t host_isa) +intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t host_isa) : TargetMachine(), h(new jit_snippet()), isa(host_isa) { // data movement - jitters[ov::op::v0::Parameter::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter); - jitters[ov::op::v0::Result::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter); + jitters[op::v0::Parameter::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter); + jitters[op::v0::Result::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter); jitters[snippets::op::Buffer::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter); jitters[snippets::op::VectorBuffer::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter); - // jitters[ov::op::v1::Constant::get_type_info_static()] = CREATE_CPU_EMITTER(); // Not supported + jitters[snippets::op::RankNormalization::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter); + // jitters[op::v1::Constant::get_type_info_static()] = CREATE_CPU_EMITTER(); // Not supported jitters[snippets::op::Load::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadEmitter); jitters[snippets::op::LoadReshape::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadEmitter); jitters[snippets::op::BroadcastLoad::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BroadcastLoadEmitter); - jitters[ov::intel_cpu::LoadConvertSaturation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadConvertEmitter); - jitters[ov::intel_cpu::LoadConvertTruncation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadConvertEmitter); + jitters[intel_cpu::LoadConvertSaturation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadConvertEmitter); + jitters[intel_cpu::LoadConvertTruncation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadConvertEmitter); jitters[snippets::op::Store::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(StoreEmitter); - jitters[ov::intel_cpu::StoreConvertSaturation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(StoreConvertEmitter); - jitters[ov::intel_cpu::StoreConvertTruncation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(StoreConvertEmitter); + jitters[intel_cpu::StoreConvertSaturation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(StoreConvertEmitter); + jitters[intel_cpu::StoreConvertTruncation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(StoreConvertEmitter); jitters[snippets::op::Scalar::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(ScalarEmitter); jitters[snippets::op::BroadcastMove::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BroadcastMoveEmitter); // jitters[snippets::op::Nop::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter); // Not supported - // jitters[ov::op::v1::Broadcast::get_type_info_static()] = CREATE_CPU_EMITTER(); // Not supported + // jitters[op::v1::Broadcast::get_type_info_static()] = CREATE_CPU_EMITTER(); // Not supported - jitters[snippets::op::ConvertTruncation::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_convert_truncation_emitter); - jitters[snippets::op::ConvertSaturation::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_convert_saturation_emitter); - // jitters[ov::op::v1::FakeQuantize::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + jitters[snippets::op::ConvertTruncation::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_convert_truncation_emitter); + jitters[snippets::op::ConvertSaturation::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_convert_saturation_emitter); + // jitters[op::v1::FakeQuantize::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported // ternary - jitters[ov::op::v1::Select::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_select_emitter); - jitters[ov::intel_cpu::FusedMulAdd::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_mul_add_emitter); + jitters[op::v1::Select::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_select_emitter); + jitters[intel_cpu::FusedMulAdd::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_mul_add_emitter); // binary - jitters[ov::op::v1::Add::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_add_emitter); - jitters[ov::op::v1::Divide::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_divide_emitter); - jitters[ov::op::v1::Equal::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_equal_emitter); - jitters[ov::op::v1::FloorMod::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_floor_mod_emitter); - jitters[ov::op::v1::Greater::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_greater_emitter); - jitters[ov::op::v1::GreaterEqual::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_greater_equal_emitter); - jitters[ov::op::v1::Less::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_less_emitter); - jitters[ov::op::v1::LessEqual::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_less_equal_emitter); - jitters[ov::op::v1::LogicalAnd::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_logical_and_emitter); - jitters[ov::op::v1::LogicalOr::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_logical_or_emitter); - jitters[ov::op::v1::LogicalXor::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_logical_xor_emitter); - jitters[ov::op::v1::Maximum::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_maximum_emitter); - jitters[ov::op::v1::Minimum::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_minimum_emitter); - jitters[ov::op::v1::Mod::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_mod_emitter); - jitters[ov::op::v1::Multiply::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_multiply_emitter); - jitters[ov::op::v1::NotEqual::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_not_equal_emitter); - jitters[snippets::op::PowerStatic::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_power_static_emitter); - jitters[ov::op::v1::Power::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_power_dynamic_emitter); - jitters[ov::op::v0::PRelu::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_prelu_emitter); - jitters[ov::op::v0::SquaredDifference::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_squared_difference_emitter); - jitters[ov::op::v1::Subtract::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_subtract_emitter); - jitters[ov::op::v0::Xor::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_logical_xor_emitter); + jitters[op::v1::Add::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_add_emitter); + jitters[op::v1::Divide::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_divide_emitter); + jitters[op::v1::Equal::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_equal_emitter); + jitters[op::v1::FloorMod::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_floor_mod_emitter); + jitters[op::v1::Greater::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_greater_emitter); + jitters[op::v1::GreaterEqual::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_greater_equal_emitter); + jitters[op::v1::Less::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_less_emitter); + jitters[op::v1::LessEqual::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_less_equal_emitter); + jitters[op::v1::LogicalAnd::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_logical_and_emitter); + jitters[op::v1::LogicalOr::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_logical_or_emitter); + jitters[op::v1::LogicalXor::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_logical_xor_emitter); + jitters[op::v1::Maximum::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_maximum_emitter); + jitters[op::v1::Minimum::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_minimum_emitter); + jitters[op::v1::Mod::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_mod_emitter); + jitters[op::v1::Multiply::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_multiply_emitter); + jitters[op::v1::NotEqual::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_not_equal_emitter); + jitters[snippets::op::PowerStatic::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_power_static_emitter); + jitters[op::v1::Power::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_power_dynamic_emitter); + jitters[op::v0::PRelu::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_prelu_emitter); + jitters[op::v0::SquaredDifference::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_squared_difference_emitter); + jitters[op::v1::Subtract::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_subtract_emitter); + jitters[op::v0::Xor::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_logical_xor_emitter); // unary - jitters[ov::op::v0::Abs::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_abs_emitter); - // jitters[ov::op::v1::Acos::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - // jitters[ov::op::v1::Asin::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - // jitters[ov::op::v1::Atan::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - jitters[ov::op::v0::Ceiling::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_ceiling_emitter); - jitters[ov::op::v0::Clamp::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_clamp_emitter); - // jitters[ov::op::v1::Cos::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - // jitters[ov::op::v1::Cosh::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - jitters[ov::op::v0::Elu::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_elu_emitter); - jitters[ov::op::v0::Erf::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_erf_emitter); - jitters[ov::op::v0::Exp::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_exp_emitter); - jitters[ov::op::v0::Floor::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_floor_emitter); - jitters[ngraph::opset5::Round::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_round_emitter); - // jitters[ov::op::v1::Log::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - jitters[ov::op::v1::LogicalNot::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_logical_not_emitter); - jitters[ov::op::v0::Negative::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_negative_emitter); - jitters[ov::op::v0::Relu::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_relu_emitter); - // jitters[ov::op::v1::Sign::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - jitters[ov::op::v0::Sigmoid::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_sigmoid_emitter); - // jitters[ov::op::v1::Sin::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - // jitters[ov::op::v1::Sinh::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - jitters[ov::op::v0::Sqrt::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_sqrt_emitter); - // jitters[ov::op::v1::Tan::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - jitters[ov::op::v0::Tanh::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_tanh_emitter); - - jitters[ov::intel_cpu::SwishNode::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_swish_emitter); - jitters[ngraph::op::v4::HSwish::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_hswish_emitter); - // jitters[ov::op::v1::HardSigmoid::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - // jitters[ov::op::v1::Selu::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - jitters[ngraph::op::v0::Gelu::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_gelu_v0_emitter); - jitters[ngraph::op::v7::Gelu::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_gelu_v7_emitter); + jitters[op::v0::Abs::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_abs_emitter); + // jitters[op::v1::Acos::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + // jitters[op::v1::Asin::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + // jitters[op::v1::Atan::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + jitters[op::v0::Ceiling::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_ceiling_emitter); + jitters[op::v0::Clamp::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_clamp_emitter); + // jitters[op::v1::Cos::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + // jitters[op::v1::Cosh::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + jitters[op::v0::Elu::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_elu_emitter); + jitters[op::v0::Erf::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_erf_emitter); + jitters[op::v0::Exp::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_exp_emitter); + jitters[op::v0::Floor::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_floor_emitter); + jitters[ngraph::opset5::Round::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_round_emitter); + // jitters[op::v1::Log::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + jitters[op::v1::LogicalNot::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_logical_not_emitter); + jitters[op::v0::Negative::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_negative_emitter); + jitters[op::v0::Relu::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_relu_emitter); + // jitters[op::v1::Sign::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + jitters[op::v0::Sigmoid::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_sigmoid_emitter); + // jitters[op::v1::Sin::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + // jitters[op::v1::Sinh::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + jitters[op::v0::Sqrt::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_sqrt_emitter); + // jitters[op::v1::Tan::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + jitters[op::v0::Tanh::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_tanh_emitter); + + jitters[intel_cpu::SwishNode::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_swish_emitter); + jitters[ngraph::op::v4::HSwish::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_hswish_emitter); + // jitters[op::v1::HardSigmoid::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + // jitters[op::v1::Selu::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + jitters[ngraph::op::v0::Gelu::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_gelu_v0_emitter); + jitters[ngraph::op::v7::Gelu::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_gelu_v7_emitter); jitters[snippets::op::Fill::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(FillEmitter); jitters[snippets::op::HorizonMax::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(HorizonEmitter); @@ -154,11 +155,11 @@ ov::intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_ jitters[snippets::op::Kernel::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(KernelEmitter); jitters[snippets::op::LoopBegin::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoopBeginEmitter); jitters[snippets::op::LoopEnd::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoopEndEmitter); - jitters[ov::intel_cpu::BrgemmCPU::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BrgemmEmitter); - jitters[ov::intel_cpu::BrgemmCopyB::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BrgemmCopyBEmitter); + jitters[intel_cpu::BrgemmCPU::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BrgemmEmitter); + jitters[intel_cpu::BrgemmCopyB::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BrgemmCopyBEmitter); } -size_t ov::intel_cpu::CPUTargetMachine::get_lanes() const { +size_t intel_cpu::CPUTargetMachine::get_lanes() const { switch (isa) { case dnnl::impl::cpu::x64::avx2 : return dnnl::impl::cpu::x64::cpu_isa_traits::vlen / sizeof(float); case dnnl::impl::cpu::x64::sse41 : return dnnl::impl::cpu::x64::cpu_isa_traits::vlen / sizeof(float); @@ -167,28 +168,62 @@ size_t ov::intel_cpu::CPUTargetMachine::get_lanes() const { } } -bool ov::intel_cpu::CPUTargetMachine::is_supported() const { +dnnl::impl::cpu::x64::cpu_isa_t intel_cpu::CPUTargetMachine::get_isa() const { + return isa; +} + +bool intel_cpu::CPUTargetMachine::is_supported() const { return dnnl::impl::cpu::x64::mayiuse(isa); } -ov::snippets::code ov::intel_cpu::CPUTargetMachine::get_snippet() const { +snippets::CompiledSnippetPtr intel_cpu::CPUTargetMachine::get_snippet() { if (h->create_kernel() != dnnl::impl::status::success) { IE_THROW() << "Failed to create jit_kernel in get_snippet()"; } - return h->jit_ker(); + const auto& result = std::make_shared(std::unique_ptr(h.release())); + // Note that we reset all the generated code, since it was copied into CompiledSnippetCPU + h.reset(new jit_snippet()); + return result; +} + +intel_cpu::CompiledSnippetCPU::CompiledSnippetCPU(std::unique_ptr h) : h_compiled(std::move(h)) { + OPENVINO_ASSERT(h_compiled && h_compiled->jit_ker(), "Got invalid jit generator or kernel was nopt compiled"); } -ov::intel_cpu::CPUGenerator::CPUGenerator(dnnl::impl::cpu::x64::cpu_isa_t isa_) : Generator(std::make_shared(isa_)) { +const uint8_t* intel_cpu::CompiledSnippetCPU::get_code() const { + return h_compiled->jit_ker(); } -ov::snippets::Generator::opRegType ov::intel_cpu::CPUGenerator::get_specific_op_reg_type(const std::shared_ptr& op) const { - if (std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op)) +size_t intel_cpu::CompiledSnippetCPU::get_code_size() const { + return h_compiled->getSize(); +} + +bool intel_cpu::CompiledSnippetCPU::empty() const { + return get_code_size() == 0; +} + +intel_cpu::CPUGenerator::CPUGenerator(dnnl::impl::cpu::x64::cpu_isa_t isa_) : Generator(std::make_shared(isa_)) { +} + +std::shared_ptr intel_cpu::CPUGenerator::clone() const { + const auto& cpu_target_machine = std::dynamic_pointer_cast(target); + OPENVINO_ASSERT(cpu_target_machine, "Failed to clone CPUGenerator: the instance contains incompatible TargetMachine type"); + return std::make_shared(cpu_target_machine->get_isa()); +} + +snippets::Generator::opRegType intel_cpu::CPUGenerator::get_specific_op_reg_type(const std::shared_ptr& op) const { + if (std::dynamic_pointer_cast(op) || + std::dynamic_pointer_cast(op)) return gpr2gpr; else if ( - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op)) + std::dynamic_pointer_cast(op) || + std::dynamic_pointer_cast(op)) return vec2vec; else OPENVINO_THROW("Register type of the operation " + std::string(op->get_type_name()) + " isn't determined!"); } +bool intel_cpu::CPUGenerator::uses_precompiled_kernel(const std::shared_ptr& e) const { + return std::dynamic_pointer_cast(e) || + std::dynamic_pointer_cast(e); +} +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp index 96ccbb4b0db97f..fa3528df6c9e6d 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp +++ b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp @@ -13,13 +13,23 @@ namespace ov { namespace intel_cpu { +class CompiledSnippetCPU : public snippets::CompiledSnippet { + const std::unique_ptr h_compiled; +public: + const uint8_t* get_code() const override; + size_t get_code_size() const override; + bool empty() const override; + explicit CompiledSnippetCPU(std::unique_ptr h); +}; + class CPUTargetMachine : public snippets::TargetMachine { public: - CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t host_isa); + explicit CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t host_isa); bool is_supported() const override; - snippets::code get_snippet() const override; + snippets::CompiledSnippetPtr get_snippet() override; size_t get_lanes() const override; + dnnl::impl::cpu::x64::cpu_isa_t get_isa() const; private: std::unique_ptr h; @@ -29,8 +39,10 @@ class CPUTargetMachine : public snippets::TargetMachine { class CPUGenerator : public snippets::Generator { public: CPUGenerator(dnnl::impl::cpu::x64::cpu_isa_t isa); + std::shared_ptr clone() const override; protected: + bool uses_precompiled_kernel(const std::shared_ptr& emitter) const override; opRegType get_specific_op_reg_type(const std::shared_ptr& op) const override; }; diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp index 24493334f1d675..072c3f7edcf60b 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp @@ -11,6 +11,7 @@ #include "snippets/lowered/port_connector.hpp" #include "transformations/snippets/x64/op/brgemm_copy_b.hpp" #include "transformations/snippets/x64/op//brgemm_cpu.hpp" +#include "snippets/op/rank_normalization.hpp" using namespace InferenceEngine; using namespace Xbyak; @@ -121,7 +122,12 @@ KernelEmitter::KernelEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPt element::Type etype; switch (expr->get_type()) { case snippets::lowered::IOExpression::io_type::INPUT: { - desc = expr->get_output_port_descriptor(0); + const auto first_consumer = expr->get_output_port_connector(0)->get_consumers().begin()->get_expr(); + if (ov::is_type(first_consumer->get_node())) { + desc = first_consumer->get_output_port_descriptor(0); + } else { + desc = expr->get_output_port_descriptor(0); + } etype = expr->get_node()->get_output_element_type(0); num_inputs++; break; diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp index 71c84045e12848..1da8e866f7c4a1 100644 --- a/src/plugins/intel_cpu/src/extension.cpp +++ b/src/plugins/intel_cpu/src/extension.cpp @@ -157,6 +157,7 @@ std::map Extension::getOpSets() { NGRAPH_OP(Store, ov::snippets::op) NGRAPH_OP(Subgraph, ov::snippets::op) NGRAPH_OP(VectorBuffer, ov::snippets::op) + NGRAPH_OP(RankNormalization, ov::snippets::op) NGRAPH_OP_X64(LoadConvertSaturation, ov::intel_cpu) NGRAPH_OP_X64(LoadConvertTruncation, ov::intel_cpu) NGRAPH_OP_X64(StoreConvertSaturation, ov::intel_cpu) diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index c20ecbea76cdca..58c6c4f595a95f 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -13,7 +13,6 @@ #include #include -#include #include #include @@ -119,67 +118,36 @@ bool SnippetKey::operator==(const SnippetKey& rhs) const { return true; } -snippets::op::Subgraph::BlockedShapeVector getBlockedShapes(const std::vector>& memBlockedDims, - const std::vector>& memOrders, const std::vector& memPrecs) { - size_t numShapes = memBlockedDims.size(); - if (memOrders.size() != numShapes || memPrecs.size() != numShapes) - IE_THROW(Unexpected) << "Number of shapes is mismacthed for dimensions, orders and precisions"; - snippets::op::Subgraph::BlockedShapeVector blockedShapes(numShapes); - for (size_t i = 0; i < numShapes; i++) { - size_t dimSize = memBlockedDims[i].size(); - std::vector dims(dimSize); - for (size_t j = 0; j < dimSize; j++) { - dims[j] = memBlockedDims[i][j]; - } - ov::PartialShape shape(dims); - ov::AxisVector order(memOrders[i]); - ov::element::Type precision = InferenceEngine::details::convertPrecision(memPrecs[i]); - - blockedShapes[i] = snippets::op::Subgraph::BlockedShape{shape, order, precision}; - } - - return blockedShapes; -} } // namespace Snippet::Snippet(const std::shared_ptr& op, const GraphContext::CPtr& context) : Node(op, context, SnippetShapeInferFactory(op)) { host_isa = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) ? dnnl::impl::cpu::x64::avx512_core : dnnl::impl::cpu::x64::avx2; - original_snippet = ov::as_type_ptr(op); - if (!original_snippet) { - IE_THROW(NotImplemented) << "Node is not an instance of snippets::op::Subgraph"; - } - init_body_hash(); - is_dynamic = isDynamicNgraphNode(op); -} + const auto& tmp_snippet = ov::as_type_ptr(op); + OPENVINO_ASSERT(tmp_snippet, "Attempt to create Snippet node from an invalid op type"); + snippetAttrs.snippet = tmp_snippet->clone(); + snippetAttrs.bodyHash = get_body_hash(tmp_snippet); -void Snippet::copy_snippet() const { - ov::OutputVector subgraph_node_inputs; - for (const auto &input : original_snippet->input_values()) { - auto new_input = std::make_shared(input.get_element_type(), input.get_partial_shape()); - subgraph_node_inputs.push_back(new_input); - } - std::shared_ptr new_body = original_snippet->body_ptr()->clone(); - snippetAttrs.snippet = std::make_shared(subgraph_node_inputs, new_body); - ov::copy_runtime_info(original_snippet, snippetAttrs.snippet); - snippetAttrs.snippet->set_friendly_name(original_snippet->get_friendly_name()); #if defined(OPENVINO_ARCH_X86_64) snippetAttrs.snippet->set_generator(std::make_shared(host_isa)); #else - IE_THROW(NotImplemented) << "CPU plugin: code-generation is not supported on non-x64 platforms"; + OPENVINO_THROW("CPU plugin: Snippets code-generator is not supported on non-x64 platforms"); #endif // OPENVINO_ARCH_X86_64 + + // Note: we have to update shapeInfer, so it uses the per-thread op::Subgraph copy + shapeInference = SnippetShapeInferFactory(snippetAttrs.snippet).makeShapeInfer(); + is_dynamic = isDynamicNgraphNode(op); } -void Snippet::init_body_hash() { +uint64_t Snippet::get_body_hash(const std::shared_ptr& snippet) { uint64_t seed = 0; ov::snippets::pass::Hash hash_function(seed); - hash_function.run_on_model(original_snippet->body_ptr()); - snippetAttrs.bodyHash = seed; + hash_function.run_on_model(snippet->body_ptr()); + return seed; } void Snippet::initSupportedPrimitiveDescriptors() { - copy_snippet(); if (!supportedPrimitiveDescriptors.empty()) return; @@ -315,16 +283,29 @@ void Snippet::selectOptimalPrimitiveDescriptor() { } void Snippet::initOptimalPrimitiveDescriptor() { + const auto isPlanar = [](const VectorDims& order ) { + for (size_t i = 0; i < order.size(); ++i) + if (order[i] != i) + return false; + return true; + }; Node::initOptimalPrimitiveDescriptor(); // memory order and precision is determined now, there is no need to prepare for each dynamic shapes. const auto config = getSelectedPrimitiveDescriptor()->getConfig(); inputNum = config.inConfs.size(); + snippets::op::Subgraph::BlockedShapeVector in_blocked_shapes; snippetAttrs.inMemPrecs.resize(inputNum); snippetAttrs.inMemOrders.resize(inputNum); + in_blocked_shapes.reserve(inputNum); + snippetAttrs.has_non_planar_inputs = false; for (size_t i = 0; i < inputNum; i++) { const auto& memDesc = config.inConfs[i].getMemDesc(); snippetAttrs.inMemPrecs[i] = memDesc->getPrecision(); - snippetAttrs.inMemOrders[i] = memDesc->as()->getOrder(); + const auto& blockedDesc = memDesc->as(); + const auto& order = blockedDesc->getOrder(); + snippetAttrs.inMemOrders[i] = order; + snippetAttrs.has_non_planar_inputs |= !isPlanar(order); + in_blocked_shapes.emplace_back(blockedDesc->getBlockDims(), order); } outputNum = config.outConfs.size(); snippetAttrs.outMemPrecs.resize(outputNum); @@ -338,6 +319,52 @@ void Snippet::initOptimalPrimitiveDescriptor() { snippetAttrs.outMemBlockedDims.resize(outputNum); srcMemPtrs.resize(inputNum); dstMemPtrs.resize(outputNum); + + // here we should perform all shape-agnostic snippets passes + // * canonicalization (RankNormalization insert) + // * precision propagation & align element types + // * data flow optimizations + // The result of these transformations will be reused by all shapes + using Manager = snippets::pass::Manager; + std::vector backend_passes; +#if defined(OPENVINO_ARCH_X86_64) + using PassPosition = snippets::pass::Manager::PassPosition; + using Place = snippets::pass::Manager::PassPosition::Place; +# define SNIPPETS_REGISTER_PASS(PASS_POS, PASS, ...) \ + backend_passes.emplace_back(PASS_POS, std::make_shared(__VA_ARGS__)) +#else +# define SNIPPETS_REGISTER_PASS(PASS_POS, PASS, ...) +#endif // OPENVINO_ARCH_X86_64 + + SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineStart), ConvertToSwishCPU); + if (context->getConfig().inferencePrecision == ov::element::bf16 && snippetAttrs.snippet->has_domain_sensitive_ops()) { + // enforce BF16 precisions to supported operations + // MatMul has to be decomposed to Brgemm operations before enforcement + // Note, MatMul decomposition will be run later again for case if BF16 enforcement is not happened + SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineStart), ov::snippets::pass::MatMulToBrgemm); + SNIPPETS_REGISTER_PASS(PassPosition(Place::After, "MatMulToBrgemm"), pass::EnforcePrecision, element::f32, element::bf16); + } + + SNIPPETS_REGISTER_PASS(PassPosition(Place::Before, "PropagatePrecision"), ov::intel_cpu::pass::BrgemmToBrgemmCPU); + SNIPPETS_REGISTER_PASS(PassPosition(Place::Before, "PropagatePrecision"), ov::intel_cpu::pass::SetBrgemmCPUBlockingParams); + + SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineEnd), ov::intel_cpu::pass::RemoveConverts); + SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineEnd), ov::intel_cpu::pass::MulAddToFMA); + +#undef SNIPPETS_REGISTER_PASS + + std::vector input_precisions; + std::vector output_precisions; + input_precisions.reserve(inputNum); + for (const auto& p : snippetAttrs.inMemPrecs) { + input_precisions.push_back(InferenceEngine::details::convertPrecision(p)); + } + output_precisions.reserve(outputNum); + for (const auto& p : snippetAttrs.outMemPrecs) + output_precisions.push_back(InferenceEngine::details::convertPrecision(p)); + + snippetAttrs.snippet->data_flow_transformations(in_blocked_shapes, input_precisions, output_precisions, backend_passes); + snippetAttrs.snippet->convert_body_to_linear_ir(std::make_shared()); } InferenceEngine::Precision Snippet::getRuntimePrecision() const { @@ -361,9 +388,8 @@ void Snippet::prepareParams() { SnippetKey key = {snippetAttrs}; auto builder = [this](const SnippetKey& key) -> std::shared_ptr { - std::shared_ptr executor = std::make_shared(key.attrs, is_canonicalized, - is_dynamic, context->getConfig().inferencePrecision == ov::element::bf16); - is_canonicalized = true; + std::shared_ptr executor = + std::make_shared(key.attrs, is_dynamic, context->getConfig().inferencePrecision == ov::element::bf16); return executor; }; @@ -426,15 +452,17 @@ void Snippet::executeDynamicImpl(dnnl::stream strm) { } void Snippet::SnippetJitExecutor::exec(const std::vector& inMemPtrs, const std::vector& outMemPtrs) { - if (schedule.ptr == nullptr) { + if (schedule.lowering_result.compiled_snippet->empty()) { IE_THROW() << "Snippet can't use Optimized implementation and can't fallback to reference"; } auto initStartMemoryOffsets = [this, &inMemPtrs, &outMemPtrs]() { for (size_t i = 0; i < numInput; i++) { - start_offset_in[i] = inMemPtrs[i]->getDescWithType()->getOffsetPadding() * dataSize[i]; + start_offset_in[i] = + static_cast(inMemPtrs[i]->getDescWithType()->getOffsetPadding() * dataSize[i]); } for (size_t i = 0; i < numOutput; i++) { - start_offset_out[i] = outMemPtrs[i]->getDescWithType()->getOffsetPadding() * dataSize[i + numInput]; + start_offset_out[i] = + static_cast(outMemPtrs[i]->getDescWithType()->getOffsetPadding() * dataSize[i + numInput]); } }; // initialize start offsets to src and dst memory @@ -465,13 +493,13 @@ void Snippet::SnippetJitExecutor::update_ptrs(jit_snippets_call_args& call_args, void Snippet::SnippetJitExecutor::schedule_6d(const std::vector& inMemPtrs, const std::vector& outMemPtrs) { const auto& dom = parallel_exec_domain; // < N, C, H, W > < 1, 1, N, C*H*W> + const auto& callable = schedule.get_callable(); parallel_for5d(dom[0], dom[1], dom[2], dom[3], dom[4], [&](int64_t d0, int64_t d1, int64_t d2, int64_t d3, int64_t d4) { int64_t indexes[] = {d0, d1, d2, d3, d4}; jit_snippets_call_args call_args; update_ptrs(call_args, inMemPtrs, outMemPtrs); - - schedule.get_callable()(indexes, &call_args); + callable(indexes, &call_args); }); } @@ -487,8 +515,8 @@ void Snippet::SnippetJitExecutor::schedule_nt(const std::vector& inMe std::vector indexes(work_size.size() - 1, 0); for (size_t iwork = start; iwork < end; ++iwork) { size_t tmp = iwork; - for (ptrdiff_t j = work_size.size() - 2; j >= 0; j--) { - indexes[j] = tmp % work_size[j]; + for (ptrdiff_t j = static_cast(work_size.size()) - 2; j >= 0; j--) { + indexes[j] = static_cast(tmp % work_size[j]); tmp /= work_size[j]; } @@ -497,49 +525,25 @@ void Snippet::SnippetJitExecutor::schedule_nt(const std::vector& inMe }); } -Snippet::SnippetExecutor::SnippetExecutor(const SnippetAttrs& attrs, bool is_canonicalized, bool is_dynamic, bool enforceBF16) - : snippetAttrs(attrs), is_canonicalized(is_canonicalized), is_dynamic(is_dynamic), enforceBF16(enforceBF16) {} +Snippet::SnippetExecutor::SnippetExecutor(SnippetAttrs attrs, bool is_dynamic, bool enforceBF16) + : snippetAttrs(std::move(attrs)), is_dynamic(is_dynamic), enforceBF16(enforceBF16) {} -Snippet::SnippetJitExecutor::SnippetJitExecutor(const SnippetAttrs& attrs, bool is_canonicalized, bool is_dynamic, bool enforceBF16) : - SnippetExecutor(attrs, is_canonicalized, is_dynamic, enforceBF16) { +Snippet::SnippetJitExecutor::SnippetJitExecutor(SnippetAttrs attrs, bool is_dynamic, bool enforceBF16) : + SnippetExecutor(std::move(attrs), is_dynamic, enforceBF16) { numInput = snippetAttrs.inMemBlockedDims.size(); numOutput = snippetAttrs.outMemBlockedDims.size(); start_offset_in.resize(numInput); start_offset_out.resize(numOutput); - auto local_copy = [this]() { - ov::OutputVector subgraph_node_inputs; - for (size_t i = 0; i < numInput; i++) { - const auto paramShape = snippetAttrs.snippet->body_ptr()->get_parameters()[i]->get_shape(); - const auto paramType = snippetAttrs.snippet->body_ptr()->get_parameters()[i]->get_element_type(); - auto new_input = std::make_shared(paramType, paramShape); - subgraph_node_inputs.push_back(new_input); - } - std::shared_ptr new_body = snippetAttrs.snippet->body_ptr()->clone(); - - snippet_for_generation = std::make_shared(subgraph_node_inputs, new_body); - ov::copy_runtime_info(snippetAttrs.snippet, snippet_for_generation); - snippet_for_generation->set_friendly_name(snippetAttrs.snippet->get_friendly_name()); -#if defined(OPENVINO_ARCH_X86_64) - auto host_isa = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) - ? dnnl::impl::cpu::x64::avx512_core - : dnnl::impl::cpu::x64::avx2; - snippet_for_generation->set_generator(std::make_shared(host_isa)); -#else - IE_THROW(NotImplemented) << "CPU plugin: code-generation is not supported on non-x64 platforms"; -#endif // OPENVINO_ARCH_X86_64 - }; - // is_canonicalized is ture means just reshape canonicalized graph with new input shapes, and get updated master shape, - // false means canonicalization, determine master_shape on snippetAttrs.snippet. - ov::PartialShape canonicalShape = canonicalizeBody(is_canonicalized); - - if (is_dynamic) { - // we need a local snippets for generation, which will be adjusted based on input shapes possibily. - // The adjustment may be not compatible with new input shape in dynamic node, such as broadcastMove inserted. - local_copy(); - } else { - snippet_for_generation = snippetAttrs.snippet; + // todo: snippets don't support backend-provided blocking, so we need to reshape body + // using blocked shapes first. This can be removed after [121670] + if (snippetAttrs.has_non_planar_inputs) { + std::vector in_shapes; + for (const auto& s : snippetAttrs.inMemBlockedDims) + in_shapes.emplace_back(s); + snippetAttrs.snippet->shape_infer(in_shapes); } + const VectorDims& canonicalShape = snippetAttrs.snippet->infer_master_shape(); // initialize by maximum output dimension. Dimensions of outputs should be broadcastable tensorRank = std::max(static_cast(rank6D), canonicalShape.size()); @@ -552,85 +556,39 @@ Snippet::SnippetJitExecutor::SnippetJitExecutor(const SnippetAttrs& attrs, bool }; initDataSizes(); - if (canonicalShape.is_dynamic()) + if (std::any_of(canonicalShape.begin(), canonicalShape.end(), + [](size_t x){return x == snippets::IShapeInferSnippets::DYNAMIC_DIMENSION;})) IE_THROW() << "Snippets: Canonicalization returned dynamic shape in static pipeline"; - snippet_for_generation->set_min_parallel_work_amount(static_cast(parallel_get_max_threads())); + snippetAttrs.snippet->set_min_parallel_work_amount(static_cast(parallel_get_max_threads())); // Note: minimal JIT work amount is a predefined value that describes the number of kernel iterations (work amount) // needed to cover kernel call overhead. It is used for balancing between parallel and JIT work amounts in domain optimization. - snippet_for_generation->set_min_jit_work_amount(256); + snippetAttrs.snippet->set_min_jit_work_amount(256); // generate jit_snippets_compile_args jcp; jcp.parallel_executor_ndims = tensorRank; generate(&jcp); - buffer_scratchpad_size = snippet_for_generation->get_buffer_scratchpad_size(); + buffer_scratchpad_size = schedule.lowering_result.buffer_scratchpad_size; buffer_scratchpad.resize(buffer_scratchpad_size * parallel_get_max_threads(), 0); parallel_exec_domain = schedule.parallel_exec_domain; harnessWorkAmount = std::accumulate(parallel_exec_domain.begin(), parallel_exec_domain.end(), 1, std::multiplies()); parallel_exec_domain = getNormalizedDimsBySize(parallel_exec_domain, tensorRank); } -ov::PartialShape Snippet::SnippetJitExecutor::canonicalizeBody(bool reshape) { - ov::snippets::op::Subgraph::BlockedShapeVector input_blocked_shapes = getBlockedShapes( - snippetAttrs.inMemBlockedDims, snippetAttrs.inMemOrders, snippetAttrs.inMemPrecs); - if (reshape) { - const auto& canonicalShape = snippetAttrs.snippet->canonicalized_body_shape_infer(input_blocked_shapes); - return canonicalShape; - } else { - ov::snippets::op::Subgraph::BlockedShapeVector output_blocked_shapes = getBlockedShapes( - snippetAttrs.outMemBlockedDims, snippetAttrs.outMemOrders, snippetAttrs.outMemPrecs); - - const auto& canonicalShape = snippetAttrs.snippet->canonicalize(output_blocked_shapes, input_blocked_shapes); - return canonicalShape; - } -} - void Snippet::SnippetJitExecutor::generate(const jit_snippets_compile_args* jcp) { - using Manager = snippets::pass::Manager; - std::vector backend_passes; -#if defined(OPENVINO_ARCH_X86_64) - using PassPosition = snippets::pass::Manager::PassPosition; - using Place = snippets::pass::Manager::PassPosition::Place; -# define SNIPPETS_REGISTER_PASS(PASS_POS, PASS, ...) \ - backend_passes.emplace_back(PASS_POS, std::make_shared(__VA_ARGS__)) -#else -# define SNIPPETS_REGISTER_PASS(PASS_POS, PASS, ...) -#endif // OPENVINO_ARCH_X86_64 - - SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineStart), ConvertToSwishCPU); - if (enforceBF16 && snippet_for_generation->has_domain_sensitive_ops()) { - // enforce BF16 precisions to supported operations - // MatMul has to be decomposed to Brgemm operations before enforcement - // Note, MatMul decomposition will be run later again for case if BF16 enforcement is not happened - SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineStart), ov::snippets::pass::MatMulToBrgemm); - SNIPPETS_REGISTER_PASS(PassPosition(Place::After, "MatMulToBrgemm"), pass::EnforcePrecision, element::f32, element::bf16); - } - - SNIPPETS_REGISTER_PASS(PassPosition(Place::Before, "PropagatePrecision"), ov::intel_cpu::pass::BrgemmToBrgemmCPU); - SNIPPETS_REGISTER_PASS(PassPosition(Place::Before, "PropagatePrecision"), ov::intel_cpu::pass::SetBrgemmCPUBlockingParams); - - SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineEnd), ov::intel_cpu::pass::RemoveConverts); - SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineEnd), ov::intel_cpu::pass::MulAddToFMA); - -#undef SNIPPETS_REGISTER_PASS - ov::snippets::lowered::pass::PassPipeline control_flow_markup_pipeline; CPU_REGISTER_PASS_X64(control_flow_markup_pipeline, ov::intel_cpu::pass::BrgemmBlocking) ov::snippets::lowered::pass::PassPipeline control_flow_pipeline; CPU_REGISTER_PASS_X64(control_flow_pipeline, ov::intel_cpu::pass::FuseLoadStoreConvert) CPU_REGISTER_PASS_X64(control_flow_pipeline, ov::intel_cpu::pass::SetBrgemmCopyBBuffersShape); - // Note: we need to pass valid shapeInfer factory to generate, so it can be used in OptimizeDomain pass - // in all other cases nGraph shape inference will be used until ticket # 113209 (PR 18563) is merged - schedule = snippet_for_generation->generate(backend_passes, - control_flow_markup_pipeline, - control_flow_pipeline, - std::make_shared(), - reinterpret_cast(jcp)); + schedule = snippetAttrs.snippet->generate_from_linear_ir(control_flow_markup_pipeline, + control_flow_pipeline, + reinterpret_cast(jcp)); } bool Snippet::SnippetJitExecutor::schedule_created() { - return schedule.ptr != nullptr; + return !schedule.lowering_result.compiled_snippet->empty(); } } // namespace node diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.h b/src/plugins/intel_cpu/src/nodes/subgraph.h index 086e84e15ba631..0979aeee807d8f 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.h +++ b/src/plugins/intel_cpu/src/nodes/subgraph.h @@ -48,31 +48,24 @@ class Snippet : public Node { // Local copy of subgraph node for canonization & code generation std::shared_ptr snippet; uint64_t bodyHash; - std::vector> inMemBlockedDims; - std::vector> inMemOrders; + std::vector inMemBlockedDims; + std::vector inMemOrders; std::vector inMemPrecs; - std::vector> outMemBlockedDims; - std::vector> outMemOrders; + std::vector outMemBlockedDims; + std::vector outMemOrders; std::vector outMemPrecs; + // todo: used flag if we need extra shape infer, can be removed after [121670] + bool has_non_planar_inputs; }; private: - static const size_t rank6D {6}; - typedef void (*kernel)(const void *, const void *); - // Create a deep local copy of the input snippet to perform canonicalization & code generation - // TODO: Probably better to implement a proper copy constructor - void copy_snippet() const; - void init_body_hash(); + static uint64_t get_body_hash(const std::shared_ptr& snippet); size_t inputNum = 0; size_t outputNum = 0; - // Original subgraph node - std::shared_ptr original_snippet; - mutable std::shared_ptr local_snippet; - // Holds ISA version used is codeGeneration target dnnl::impl::cpu::x64::cpu_isa_t host_isa; @@ -80,18 +73,17 @@ class Snippet : public Node { std::vector dstMemPtrs = {}; mutable SnippetAttrs snippetAttrs; - mutable bool is_canonicalized = false; bool is_dynamic = false; class SnippetExecutor { public: - SnippetExecutor(const SnippetAttrs& attrs, bool is_canonicalized, bool is_dynamic, bool enforceBF16); + SnippetExecutor(SnippetAttrs attrs, bool is_dynamic, bool enforceBF16); virtual void exec(const std::vector& inMemPtrs, const std::vector& outMemPtrs) = 0; virtual ~SnippetExecutor() = default; + std::shared_ptr shapeInference = nullptr; protected: SnippetAttrs snippetAttrs; - bool is_canonicalized = false; bool is_dynamic = false; bool enforceBF16 = false; }; @@ -100,7 +92,7 @@ class Snippet : public Node { class SnippetJitExecutor : public SnippetExecutor { public: - SnippetJitExecutor(const SnippetAttrs& attrs, bool is_canonicalized, bool is_dynamic, bool enforceBF16); + SnippetJitExecutor(SnippetAttrs attrs, bool is_dynamic, bool enforceBF16); void exec(const std::vector& inMemPtrs, const std::vector& outMemPtrs) override; bool schedule_created(); @@ -113,16 +105,12 @@ class Snippet : public Node { size_t numInput = 0; size_t numOutput = 0; - ov::PartialShape canonicalizeBody(bool reshape); - void generate(const jit_snippets_compile_args*); inline void update_ptrs(jit_snippets_call_args&, const std::vector& inMemPtrs, const std::vector& outMemPtrs); // Evaluates generated snippet using parallel backend void schedule_6d(const std::vector& inMemPtrs, const std::vector& outMemPtrs); void schedule_nt(const std::vector& inMemPtrs, const std::vector& outMemPtrs); - std::shared_ptr snippet_for_generation; - // Holds generated snippet with information about how to schedule it snippets::Schedule schedule; diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp index b38ae2fde7e7db..b9b7345b37f493 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp @@ -513,7 +513,7 @@ void TensorIterator::createPrimitive() { lastUsedCond = initial_cond_check->getStatus(); } - if (isDynamicNode()) + if (runAsDynamic()) prepareDynamicBuffers(); Node::createPrimitive(); @@ -556,7 +556,7 @@ void TensorIterator::prepareParams() { prepareContinueCond(); prepareLoopBodyCurrentIteration(); - if (!isDynamicNode()) { + if (!runAsDynamic()) { prepareOutputPorts(); prepareBackEdges(); } @@ -568,6 +568,12 @@ void TensorIterator::prepareParams() { } void TensorIterator::execute(dnnl::stream strm) { + //Special case, the subgraph is dynamic while the node has all static shapes + if (runAsDynamic()) { + executeDynamicImpl(strm); + return; + } + sub_graph.ResetInferCount(); bool continue_cond = initial_cond_check->getStatus(); @@ -872,6 +878,10 @@ int TensorIterator::getNumIteration(const std::vector& inputPortMap, co return numIterations; } +bool TensorIterator::runAsDynamic() const { + return isDynamicNode() || Graph::Status::ReadyDynamic == sub_graph.getStatus(); +} + bool TensorIterator::created() const { return getType() == Type::TensorIterator; } diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.h b/src/plugins/intel_cpu/src/nodes/tensoriterator.h index 8633be5c28df61..104ee077f9a163 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.h +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.h @@ -138,6 +138,7 @@ class TensorIterator : public Node { void reshapeAndFillOutput(dnnl::stream strm); bool checkForInputAndBodyShapesInequality() const; int getNumIteration(const std::vector& inputPortMap, const std::vector& outputPortMap) const; + bool runAsDynamic() const; ExtensionManager::Ptr ext_mng; Graph sub_graph; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.cpp index 64db193d7773f1..bae9b2b1dd43cc 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.cpp @@ -43,3 +43,8 @@ void FusedMulAdd::validate_and_infer_types() { } set_output_type(0, element_type, pshape); } + +const ov::op::AutoBroadcastSpec& FusedMulAdd::get_autob() const { + static ov::op::AutoBroadcastSpec autob_spec(ov::op::AutoBroadcastType::NUMPY); + return autob_spec; +} diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.hpp index e55741986a2473..68a730c95aabd2 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.hpp @@ -24,6 +24,7 @@ class FusedMulAdd : public ngraph::op::Op { bool visit_attributes(AttributeVisitor& visitor) override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; void validate_and_infer_types() override; + const ov::op::AutoBroadcastSpec& get_autob() const override; }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp index dfae3a030abdeb..fee91b5c5a38d3 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp @@ -34,6 +34,8 @@ void core_configuration(ov::test::SubgraphBaseTest* test) { test->configuration.insert({ov::hint::inference_precision.name(), ov::element::f32.to_string()}); } #endif + // todo: issue: 123320 + test->convert_precisions = {{ ov::element::bf16, ov::element::f32 }, { ov::element::f16, ov::element::f32 }}; } } // namespace test diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 4eb40365fa95d7..e942043dd3fbf2 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -230,6 +230,8 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(smoke_CompareWithRefs_Mvn.*INFERENCE_PRECISION_HINT=f16.*)"); retVector.emplace_back(R"(smoke_staticShapes4D.*INFERENCE_PRECISION_HINT=f16.*)"); retVector.emplace_back(R"(smoke_dynamicShapes4D.*INFERENCE_PRECISION_HINT=f16.*)"); + // Issue: 123064 + retVector.emplace_back(R"(smoke_TestsROIPooling_.*/ROIPoolingLayerTest.*modelType=f16.*)"); #endif #endif diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/add.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/add.cpp index 45aaf9f4eeafdf..bd2f1aad832550 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/add.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/add.cpp @@ -68,6 +68,9 @@ std::vector> inShapesAddPair { {{{}, {{1, 128, 9, 30}}}, {{}, {{1, 128, 1, 30}}}}, {{{}, {{1, 128, 9, 1}}}, {{}, {{1, 128, 1, 30}}}}, {{{}, {{1, 128, 9, 16}}}, {{}, {{1, 128, 9, 1}}}}, + // Test Canonicalization and Dimension collapsing + {{{}, {{2, 17, 3, 4}}}, {{}, {{1, 3, 4}}}}, + {{{}, {{2, 17, 3, 4}}}, {{}, {{1, 4}}}}, // DS {{{1, -1, {1, 10}, {1, 33}}, {{1, 128, 1, 1}, {1, 128, 1, 9}, {1, 128, 1, 17}, {1, 128, 1, 29}, {1, 128, 9, 1}, {1, 128, 1, 1}}}, {{{1, 1}, {128, 128}, {1, 10}, {1, 33}}, {{1, 128, 1, 1}, {1, 128, 1, 9}, {1, 128, 1, 17}, {1, 128, 1, 29}, {1, 128, 1, 30}, {1, 128, 1, 1}}}}, diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp new file mode 100644 index 00000000000000..f84c9844db6c10 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "subgraph_tests/transpose_matmul_fusion.hpp" + +using namespace ov::test; + +namespace { +INSTANTIATE_TEST_SUITE_P(smoke_TransposeMatMulFusion, TransposeMatMulFusion, + ::testing::Values(ov::test::utils::DEVICE_CPU), + TransposeMatMulFusion::getTestCaseName); + +} // namespace diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/adaptive_pooling.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/adaptive_pooling.cpp index 1efb20ffc1d8d2..8cab3926b72a5f 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/adaptive_pooling.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/adaptive_pooling.cpp @@ -91,6 +91,10 @@ class AdaPoolLayerCPUTest : public testing::WithParamInterfaceget_parameters().size() == 2) { + generatePooledVector(); + functionRefs = createFunction(true); + } } void generatePooledVector() { @@ -124,14 +128,6 @@ class AdaPoolLayerCPUTest : public testing::WithParamInterface &funcRef, const std::vector& targetInputStaticShapes) override { - if (function->get_parameters().size() == 2) { - generatePooledVector(); - funcRef = createFunction(true); - } - ngraph::helpers::resize_function(funcRef, targetInputStaticShapes); - } - void validate() override { auto actualOutputs = get_plugin_outputs(); if (function->get_parameters().size() == 2) { diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp index 836931ec465669..99367ef14e8ba9 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp @@ -9,14 +9,12 @@ using namespace InferenceEngine; using namespace CPUTestUtils; -using namespace ngraph::opset3; using namespace ov::test; namespace CPULayerTestsDefinitions { namespace { std::vector blockShape, cropsBegin, cropsEnd; - ngraph::Shape paramShape; } // namespace using BatchToSpaceLayerTestCPUParams = std::tuple< @@ -24,7 +22,7 @@ using BatchToSpaceLayerTestCPUParams = std::tuple< std::vector, // block shape std::vector, // crops begin std::vector, // crops end - Precision , // Network precision + ov::element::Type, // Network precision CPUSpecificParams>; class BatchToSpaceCPULayerTest : public testing::WithParamInterface, @@ -32,9 +30,9 @@ class BatchToSpaceCPULayerTest : public testing::WithParamInterface &obj) { std::vector inputShapes; - Precision netPrecision; + ov::element::Type model_type; CPUSpecificParams cpuParams; - std::tie(inputShapes, blockShape, cropsBegin, cropsEnd, netPrecision, cpuParams) = obj.param; + std::tie(inputShapes, blockShape, cropsBegin, cropsEnd, model_type, cpuParams) = obj.param; std::ostringstream result; if (inputShapes.front().first.size() != 0) { result << "IS=("; @@ -53,39 +51,40 @@ class BatchToSpaceCPULayerTest : public testing::WithParamInterface& targetInputStaticShapes) override { inputs.clear(); - const auto& funcInputs = function->inputs(); - for (size_t i = 0; i < funcInputs.size(); i++) { - const auto& funcInput = funcInputs[i]; + const auto& parameters = function->get_parameters(); + for (size_t i = 0; i < parameters.size(); i++) { + const auto& parameter = parameters[i]; ov::Tensor tensor; - if (i == 0U) { - tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256); - } else if (i == 1U) { - tensor = ov::Tensor(funcInput.get_element_type(), paramShape); - auto *dataPtr = tensor.data(); - for (size_t j = 0; j < blockShape.size(); j++) { - dataPtr[j] = blockShape[j]; + const auto& param_type = parameter->get_output_element_type(0); + const auto& static_shape = targetInputStaticShapes[i]; + switch (i) { + case 0: { + tensor = ov::test::utils::create_and_fill_tensor(param_type, static_shape, 2560, 0, 256); + break; } - } else if (i == 2U) { - tensor = ov::Tensor(funcInput.get_element_type(), paramShape); - auto *dataPtr = tensor.data(); - for (size_t j = 0; j < cropsBegin.size(); j++) { - dataPtr[j] = cropsBegin[j]; + case 1: { + ASSERT_EQ(ov::shape_size(static_shape), blockShape.size()); + tensor = ov::Tensor(param_type, static_shape, blockShape.data()); + break; } - } else if (i == 3U) { - tensor = ov::Tensor(funcInput.get_element_type(), paramShape); - auto *dataPtr = tensor.data(); - for (size_t j = 0; j < cropsEnd.size(); j++) { - dataPtr[j] = cropsEnd[j]; + case 2: + case 3: { + ASSERT_EQ(ov::shape_size(static_shape), cropsEnd.size()); + tensor = ov::Tensor(param_type, static_shape, cropsEnd.data()); + break; + } + default: { + throw std::runtime_error("Incorrect parameter number!"); } } - inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + inputs.insert({parameter, tensor}); } } @@ -94,55 +93,51 @@ class BatchToSpaceCPULayerTest : public testing::WithParamInterface inputShapes; - Precision netPrecision; + ov::element::Type model_type; CPUSpecificParams cpuParams; - std::tie(inputShapes, blockShape, cropsBegin, cropsEnd, netPrecision, cpuParams) = this->GetParam(); + std::tie(inputShapes, blockShape, cropsBegin, cropsEnd, model_type, cpuParams) = this->GetParam(); std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + init_input_shapes(inputShapes); - auto ngPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - - const std::vector inputShapesVec{inputShapes}; - init_input_shapes(inputShapesVec); - - if (strcmp(netPrecision.name(), "U8") == 0) + if (model_type == ov::element::Type_t::u8) { selectedType = std::string("ref_any_") + "I8"; - else - selectedType = std::string("ref_any_") + netPrecision.name(); - - ov::ParameterVector params{std::make_shared(ngPrec, inputDynamicShapes.front())}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - paramShape = {paramOuts[0].get_partial_shape().size()}; - - std::shared_ptr in2, in3, in4; - auto blockShapeParam = std::make_shared(ngraph::element::i64, paramShape); - in2 = blockShapeParam; - params.push_back(blockShapeParam); - auto cropsBeginParam = std::make_shared(ngraph::element::i64, paramShape); - params.push_back(cropsBeginParam); - in3 = cropsBeginParam; - auto cropsEndParam = std::make_shared(ngraph::element::i64, paramShape); - params.push_back(cropsEndParam); - in4 = cropsEndParam; - auto btsNode = std::make_shared(paramOuts[0], in2, in3, in4); + } else { + std::string type_name = model_type.get_type_name(); + if (type_name == "f16") + type_name = "fp16"; + if (type_name == "f32") + type_name = "fp32"; + if (type_name == "f64") + type_name = "fp64"; + std::transform(type_name.begin(), type_name.end(), type_name.begin(), ::toupper); + selectedType = std::string("ref_any_") + type_name; + } + + std::shared_ptr in0, in1, in2, in3; + in0 = std::make_shared(model_type, inputDynamicShapes.front()); + in1 = std::make_shared(ov::element::Type_t::i64, inputDynamicShapes[1]); + in2 = std::make_shared(ov::element::Type_t::i64, inputDynamicShapes[2]); + in3 = std::make_shared(ov::element::Type_t::i64, inputDynamicShapes[3]); + auto btsNode = std::make_shared(in0, in1, in2, in3); btsNode->get_rt_info() = getCPUInfo(); - ngraph::ResultVector results{std::make_shared(btsNode)}; - function = std::make_shared(results, params, "BatchToSpace"); + ngraph::ResultVector results{std::make_shared(btsNode)}; + function = std::make_shared(results, ov::ParameterVector{in0, in1, in2, in3}, "BatchToSpace"); } }; TEST_P(BatchToSpaceCPULayerTest, CompareWithRefs) { run(); - CheckPluginRelatedResults(compiledModel, "BatchToSpace"); + // CheckPluginRelatedResults(compiledModel, "BatchToSpace"); }; namespace { -const std::vector netPrecision = { - Precision::U8, - Precision::I8, - Precision::I32, - Precision::FP32, - Precision::BF16 +const std::vector model_types = { + ov::element::Type_t::u8, + ov::element::Type_t::i8, + ov::element::Type_t::i32, + ov::element::Type_t::f32, + ov::element::Type_t::bf16 }; const std::vector> blockShape4D1 = {{1, 1, 1, 2}, {1, 2, 2, 1}}; @@ -226,7 +221,7 @@ const auto staticBatchToSpaceParamsSet4D1 = ::testing::Combine( ::testing::ValuesIn(blockShape4D1), ::testing::ValuesIn(cropsBegin4D1), ::testing::ValuesIn(cropsEnd4D1), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParamsWithBlock_4D)); const auto dynamicBatchToSpaceParamsSet4D1 = ::testing::Combine( @@ -234,7 +229,7 @@ const auto dynamicBatchToSpaceParamsSet4D1 = ::testing::Combine( ::testing::ValuesIn(blockShape4D1), ::testing::ValuesIn(cropsBegin4D1), ::testing::ValuesIn(cropsEnd4D1), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParams_4D)); const auto dynamicBatchToSpaceParamsWithBlockedSet4D1 = ::testing::Combine( @@ -242,7 +237,7 @@ const auto dynamicBatchToSpaceParamsWithBlockedSet4D1 = ::testing::Combine( ::testing::ValuesIn(blockShape4D1), ::testing::ValuesIn(cropsBegin4D1), ::testing::ValuesIn(cropsEnd4D1), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParamsWithBlock_4D)); const auto staticBatchToSpaceParamsSet4D2 = ::testing::Combine( @@ -250,7 +245,7 @@ const auto staticBatchToSpaceParamsSet4D2 = ::testing::Combine( ::testing::ValuesIn(blockShape4D2), ::testing::ValuesIn(cropsBegin4D2), ::testing::ValuesIn(cropsEnd4D2), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParamsWithBlock_4D)); const auto dynamicBatchToSpaceParamsSet4D2 = ::testing::Combine( @@ -258,7 +253,7 @@ const auto dynamicBatchToSpaceParamsSet4D2 = ::testing::Combine( ::testing::ValuesIn(blockShape4D2), ::testing::ValuesIn(cropsBegin4D2), ::testing::ValuesIn(cropsEnd4D2), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParams_4D)); const auto dynamicBatchToSpaceParamsWithBlockedSet4D2 = ::testing::Combine( @@ -266,7 +261,7 @@ const auto dynamicBatchToSpaceParamsWithBlockedSet4D2 = ::testing::Combine( ::testing::ValuesIn(blockShape4D2), ::testing::ValuesIn(cropsBegin4D2), ::testing::ValuesIn(cropsEnd4D2), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParamsWithBlock_4D)); INSTANTIATE_TEST_SUITE_P(smoke_StaticBatchToSpaceCPULayerTestCase1_4D, BatchToSpaceCPULayerTest, @@ -381,7 +376,7 @@ const auto staticBatchToSpaceParamsSet5D1 = ::testing::Combine( ::testing::ValuesIn(blockShape5D1), ::testing::ValuesIn(cropsBegin5D1), ::testing::ValuesIn(cropsEnd5D1), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParamsWithBlock_5D)); const auto dynamicBatchToSpaceParamsSet5D1 = ::testing::Combine( @@ -389,7 +384,7 @@ const auto dynamicBatchToSpaceParamsSet5D1 = ::testing::Combine( ::testing::ValuesIn(blockShape5D1), ::testing::ValuesIn(cropsBegin5D1), ::testing::ValuesIn(cropsEnd5D1), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParams_5D)); const auto dynamicBatchToSpaceParamsWithBlockedSet5D1 = ::testing::Combine( @@ -397,7 +392,7 @@ const auto dynamicBatchToSpaceParamsWithBlockedSet5D1 = ::testing::Combine( ::testing::ValuesIn(blockShape5D1), ::testing::ValuesIn(cropsBegin5D1), ::testing::ValuesIn(cropsEnd5D1), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParamsWithBlock_5D)); const auto staticBatchToSpaceParamsSet5D2 = ::testing::Combine( @@ -405,7 +400,7 @@ const auto staticBatchToSpaceParamsSet5D2 = ::testing::Combine( ::testing::ValuesIn(blockShape5D2), ::testing::ValuesIn(cropsBegin5D2), ::testing::ValuesIn(cropsEnd5D2), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParamsWithBlock_5D)); const auto dynamicBatchToSpaceParamsSet5D2 = ::testing::Combine( @@ -413,7 +408,7 @@ const auto dynamicBatchToSpaceParamsSet5D2 = ::testing::Combine( ::testing::ValuesIn(blockShape5D2), ::testing::ValuesIn(cropsBegin5D2), ::testing::ValuesIn(cropsEnd5D2), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParams_5D)); const auto dynamicBatchToSpaceParamsWithBlockedSet5D2 = ::testing::Combine( @@ -421,7 +416,7 @@ const auto dynamicBatchToSpaceParamsWithBlockedSet5D2 = ::testing::Combine( ::testing::ValuesIn(blockShape5D2), ::testing::ValuesIn(cropsBegin5D2), ::testing::ValuesIn(cropsEnd5D2), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParamsWithBlock_5D)); INSTANTIATE_TEST_SUITE_P(smoke_StaticBatchToSpaceCPULayerTestCase1_5D, BatchToSpaceCPULayerTest, diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp index 2f9706e7d2562e..282ebef47ba9bb 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp @@ -122,6 +122,14 @@ void RandomUniformLayerTestCPU::SetUp() { const ov::ResultVector results{std::make_shared(rnd_op)}; function = std::make_shared(results, in_params, "RandomUniformLayerTestCPU"); + + // todo: issue: 123320 + if (!InferenceEngine::with_cpu_x86_avx512_core()) { + convert_precisions.insert({ ov::element::bf16, ov::element::f32 }); + } + if (!InferenceEngine::with_cpu_x86_avx512_core_fp16()) { + convert_precisions.insert({ ov::element::f16, ov::element::f32 }); + } } template @@ -206,19 +214,6 @@ void RandomUniformLayerTestCPU::compare(const std::vector& expected, #undef CASE } -precisions_map RandomUniformLayerTestCPU::get_ref_precisions_convert_map() { - precisions_map precisions; - - if (!InferenceEngine::with_cpu_x86_avx512_core()) { - precisions.insert({ ov::element::bf16, ov::element::f32 }); - } - if (!InferenceEngine::with_cpu_x86_avx512_core_fp16()) { - precisions.insert({ ov::element::f16, ov::element::f32 }); - } - - return precisions; -} - inline double less_or_equal(double a, double b) { return (b - a) >= (std::fmax(std::fabs(a), std::fabs(b)) * std::numeric_limits::epsilon()); } diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp index 1cb9f5fccc451a..8e071439bc8577 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp @@ -35,8 +35,6 @@ class RandomUniformLayerTestCPU : public testing::WithParamInterface& expected, const std::vector& actual) override; - precisions_map get_ref_precisions_convert_map() override; - template void rndUCompare(const ov::Tensor& expected, const ov::Tensor& actual); diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution_backprop_data.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution_backprop_data.cpp index 455c78a8c09ed0..a602d3cbac45a8 100755 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution_backprop_data.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution_backprop_data.cpp @@ -94,6 +94,13 @@ class DeconvolutionLayerCPUTest : public testing::WithParamInterface& targetInputStaticShapes) override { + if (function->get_parameters().size() != 1) { + // WA: output_shape depends on 3rd deconvolution input data + // but the reference implementation doesn't implement shape inference + // so we need to build a new ngraph function and replace the 3rd input parameter with a constant + // to get valid output shapes + functionRefs = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); + } inputs.clear(); const auto& funcInputs = function->inputs(); for (size_t i = 0; i < funcInputs.size(); ++i) { @@ -111,38 +118,6 @@ class DeconvolutionLayerCPUTest : public testing::WithParamInterface &funcRef, const std::vector& targetInputStaticShapes) override { - if (function->get_parameters().size() == 1) { - ngraph::helpers::resize_function(funcRef, targetInputStaticShapes); - } else { - // WA: output_shape depends on 3rd deconvolution input data - // but the reference implementation doesn't implement shape inference - // so we need to build a new ngraph function and replace the 3rd input parameter with a constant - // to get valid output shapes - funcRef = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); - } - } - - void validate() override { - auto actualOutputs = get_plugin_outputs(); - if (function->get_parameters().size() == 2) { - auto pos = std::find_if(inputs.begin(), inputs.end(), - [](const std::pair, ov::Tensor> ¶ms) { - return params.first->get_friendly_name() == "param_1"; - }); - IE_ASSERT(pos != inputs.end()); - inputs.erase(pos); - } - auto expectedOutputs = calculate_refs(); - if (expectedOutputs.empty()) { - return; - } - ASSERT_EQ(actualOutputs.size(), expectedOutputs.size()) - << "nGraph interpreter has " << expectedOutputs.size() << " outputs, while IE " << actualOutputs.size(); - - compare(expectedOutputs, actualOutputs); - } - void configure_model() override { ov::preprocess::PrePostProcessor p(function); { diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution_backprop_data.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution_backprop_data.cpp index 03f1f707254bc5..96a295830079ed 100755 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution_backprop_data.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution_backprop_data.cpp @@ -93,6 +93,13 @@ class GroupDeconvolutionLayerCPUTest : public testing::WithParamInterface& targetInputStaticShapes) override { + if (function->get_parameters().size() != 1) { + // WA: output_shape depends on 3rd deconvolution input data + // but the reference implementation doesn't implement shape inference + // so we need to build a new ngraph function and replace the 3rd input parameter with a constant + // to get valid output shapes + functionRefs = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); + } inputs.clear(); const auto& funcInputs = function->inputs(); for (size_t i = 0; i < funcInputs.size(); ++i) { @@ -110,18 +117,6 @@ class GroupDeconvolutionLayerCPUTest : public testing::WithParamInterface &funcRef, const std::vector& targetInputStaticShapes) override { - if (function->get_parameters().size() == 1) { - ngraph::helpers::resize_function(funcRef, targetInputStaticShapes); - } else { - // WA: output_shape depends on 3rd deconvolution input data - // but the reference implementation doesn't implement shape inference - // so we need to build a new ngraph function and replace the 3rd input parameter with a constant - // to get valid output shapes - funcRef = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); - } - } - void validate() override { auto actualOutputs = get_plugin_outputs(); if (function->get_parameters().size() == 2) { diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/loop.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/loop.cpp index b92646e458130e..cda499b042fb4e 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/loop.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/loop.cpp @@ -371,6 +371,65 @@ class LoopForConcatLayerCPUTest : public LoopLayerCPUTest { } }; +class StaticLoopDynamicSubgraphCPUTest : public SubgraphBaseTest { + void SetUp() override { + InputShape input_shape = {{25, 1, 1}, {{25, 1, 1}}}; + InputShape input_exec_flag_shape = {{1}, {{1}}}; + targetDevice = ov::test::utils::DEVICE_CPU; + ElementType netType = ov::element::f32; + init_input_shapes({input_shape, input_exec_flag_shape}); + + ov::ParameterVector params; + params.push_back(std::make_shared(netType, inputDynamicShapes[0])); + + // exec_condition + params.push_back(std::make_shared(ov::element::boolean, inputDynamicShapes[1])); + + auto trip_count_input = std::make_shared(ov::element::i64, ov::Shape{1}, 2); + auto body_condition_const = std::make_shared(ov::element::boolean, ov::Shape{1}, true); + + // Body parameters + ov::ParameterVector body_params = {std::make_shared(netType, ov::PartialShape{25, 1, -1})}; + + // Body + auto broadcast_target_shape = std::make_shared(ov::element::i64, ov::Shape{3}, std::vector{25, 1, 256}); + auto broadcast_axis_mapping = std::make_shared(ov::element::i64, ov::Shape{1}, 0); + auto broadcast = std::make_shared(body_params[0], broadcast_target_shape); + auto body = std::make_shared(ov::OutputVector{body_condition_const, broadcast}, body_params); + + auto loop = std::make_shared(trip_count_input, params[1]); + loop->set_function(body); + loop->set_special_body_ports(ov::op::v5::Loop::SpecialBodyPorts{-1, 0}); + + loop->set_merged_input(body_params.front(), params.front(), broadcast); + + auto out0 = loop->get_iter_value(body_condition_const, -1); + auto out1 = loop->get_iter_value(broadcast, -1); + + auto result0 = std::make_shared(out0); + auto result1 = std::make_shared(out1); + function = std::make_shared(ov::ResultVector{result0, result1}, params, "loop"); + } + void generate_inputs(const std::vector& targetInputStaticShapes) override { + inputs.clear(); + const auto& funcInputs = function->inputs(); + for (size_t i = 0; i < funcInputs.size(); ++i) { + const auto& funcInput = funcInputs[i]; + ov::Tensor tensor; + + if (i == 1) { + tensor = ov::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + auto* dataPtr = tensor.data(); + *dataPtr = true; + } else { + tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256); + } + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } + } +}; + + TEST_P(LoopLayerCPUTest, CompareWithRefs) { run(); } @@ -387,6 +446,10 @@ TEST_P(LoopForConcatLayerCPUTest, CompareWithRefs) { run(); } +TEST_F(StaticLoopDynamicSubgraphCPUTest, smoke_StaticLoopWithDynSubgraph) { + run(); +} + namespace { const std::vector inputPrecisions = { diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp index 8eda5f4221e77e..84f8c4b4740b22 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp @@ -101,13 +101,10 @@ class OneHotLayerCPUTest : public testing::WithParamInterface &funcRef, const std::vector& targetInputStaticShapes) override { if (function->get_parameters().size() == 2) { generateDepth(); - funcRef = createFunction(true); + functionRefs = createFunction(true); } - ngraph::helpers::resize_function(funcRef, targetInputStaticShapes); } void validate() override { auto actualOutputs = get_plugin_outputs(); diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/inplace_edge.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/inplace_edge.cpp new file mode 100644 index 00000000000000..1385313ce88d41 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/inplace_edge.cpp @@ -0,0 +1,77 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "test_utils/cpu_test_utils.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "ov_models/utils/ov_helpers.hpp" +#include "ov_models/builders.hpp" + +using namespace CPUTestUtils; +using namespace InferenceEngine; + +namespace SubgraphTestsDefinitions { +// If a node (CumSum) with constant parents has several non-constant nodes (Eltwises) than the edge is broken. +// The fix is to check node type - is should be Input. +// Subgraph: +/* + * Constant Constant + * \ / + * \ / + * CumSum + * Parameter / \ Parameter + * \ / \ / + * \ / \ / + * Eltwise Eltwise + * \ / + * Eltwise + * | + * Result + */ + +using namespace ov::test; + +class NonInputInPlaceTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + std::ostringstream result; + result << "NonInputInPlaceTest_inPrc=outPrc=" << obj.param; + return result.str(); + } + + void SetUp() override { + targetDevice = utils::DEVICE_CPU; + configuration.insert({ov::hint::inference_precision.name(), ov::element::f16.to_string()}); + const std::vector inputShape = {1, 11, 3, 3}; + targetStaticShapes = {{inputShape, inputShape}}; + ElementType prc = this->GetParam(); + + ov::ParameterVector inputParams {std::make_shared(prc, ov::Shape(inputShape)), + std::make_shared(prc, ov::Shape(inputShape))}; + + auto cumsum_tensor = ngraph::opset8::Constant::create(prc, inputShape, {10.0f}); + auto axis_node = ngraph::opset8::Constant::create(ngraph::element::i32, {}, {0}); + const auto cumsum = std::make_shared(cumsum_tensor, axis_node); + + auto eltwiseMul = ngraph::builder::makeEltwise(inputParams[0], cumsum, ngraph::helpers::EltwiseTypes::MULTIPLY); + auto eltwiseAdd1 = ngraph::builder::makeEltwise(inputParams[1], cumsum, ngraph::helpers::EltwiseTypes::ADD); + auto eltwiseAdd2 = ngraph::builder::makeEltwise(eltwiseAdd1, eltwiseMul, ngraph::helpers::EltwiseTypes::ADD); + + ngraph::ResultVector results{std::make_shared(eltwiseAdd2)}; + function = std::make_shared(results, inputParams, "NonInputInPlaceT"); + } +}; + +namespace { + TEST_P(NonInputInPlaceTest, CompareWithRefs) { + run(); + } + +INSTANTIATE_TEST_SUITE_P(smoke_NonInputInPlaceTest_CPU, NonInputInPlaceTest, + testing::Values(ngraph::element::f32, ngraph::element::f16), + NonInputInPlaceTest::getTestCaseName); + +} // namespace +} // namespace SubgraphTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/input_output_tensor_reuse.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/input_output_tensor_reuse.cpp index 612006be75dc2b..1a55b7204045d6 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/input_output_tensor_reuse.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/input_output_tensor_reuse.cpp @@ -61,7 +61,6 @@ class InputOutputTensorReuse : public SubgraphBaseTest { TEST_F(InputOutputTensorReuse, smoke_Input_Output_Binding) { compile_model(); std::vector inputShapes = {{1, 32, 5, 16}, {1, 32, 1, 16}}; - init_ref_function(functionRefs, inputShapes); generate_inputs(inputShapes); validate(); @@ -69,7 +68,6 @@ TEST_F(InputOutputTensorReuse, smoke_Input_Output_Binding) { for (size_t i = 0; i < num_iter; i++) { auto outputTensor = inferRequest.get_output_tensor(0); inputShapes.back() = outputTensor.get_shape(); - init_ref_function(functionRefs, inputShapes); auto itr = std::find_if(inputs.begin(), inputs.end(), [](const std::pair, ov::Tensor>& item) { return item.first->get_friendly_name() == "Param_1"; }); diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/mul_add_to_fma.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/mul_add_to_fma.cpp index 7a6ef6e67c1d21..3a760050d0159a 100644 --- a/src/plugins/intel_cpu/tests/unit/snippets_transformations/mul_add_to_fma.cpp +++ b/src/plugins/intel_cpu/tests/unit/snippets_transformations/mul_add_to_fma.cpp @@ -6,8 +6,10 @@ #include #include #include +#include #include "snippets/op/scalar.hpp" #include "lowering_utils.hpp" +#include "common_test_utils/common_utils.hpp" #include "snippets/pass_manager.hpp" namespace ov { @@ -61,7 +63,7 @@ class EltwiseWithMulAddFunction : public SnippetsFunctionBase { ParameterVector parameters{data0, data1}; std::shared_ptr data2; if (scalar_input) { - data2 = std::make_shared(precision, Shape{}, 2.f); + data2 = std::make_shared(precision, Shape{1}, 2.f); } else { auto parameter = std::make_shared(precision, input_shapes[2]); parameters.push_back(parameter); @@ -110,8 +112,8 @@ class MulAddToFMATests : public LoweringTests, public testing::WithParamInterfac std::ostringstream result; for (size_t i = 0; i < inputShapes.size(); i++) - result << "IS[" << i << "]=" << inputShapes[i] << "_"; - result << "MS=" << master_shape << "_"; + result << "IS[" << i << "]=" << ov::test::utils::partialShape2str({inputShapes[i]}) << "_"; + result << "MS=" << ov::test::utils::partialShape2str({master_shape}) << "_"; result << "add_input_idx=" << add_input_idx; return result.str(); } @@ -146,7 +148,8 @@ TEST_P(MulAddToFMATests, MulAddToFMATests) { backend_passes, {}, {}, - generator); + generator, + std::make_shared()); model = subgraph->body_ptr(); model_ref = snippets_model->getLowered(); } diff --git a/src/plugins/intel_cpu/thirdparty/mlas b/src/plugins/intel_cpu/thirdparty/mlas index c7c8a631315000..f6425b13943348 160000 --- a/src/plugins/intel_cpu/thirdparty/mlas +++ b/src/plugins/intel_cpu/thirdparty/mlas @@ -1 +1 @@ -Subproject commit c7c8a631315000f17c650af34431009d2f22129c +Subproject commit f6425b1394334822390fcd9da12788c9cd0d11da diff --git a/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp b/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp index 32903887e81181..b1eb1e0539e9a0 100644 --- a/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp +++ b/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp @@ -51,6 +51,7 @@ #include "legacy/ngraph_ops/selu_ie.hpp" #include "legacy/ngraph_ops/tile_ie.hpp" #include "legacy/ngraph_ops/topk_ie.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "transformations/rt_info/fused_names_attribute.hpp" #include "transformations/rt_info/primitives_priority_attribute.hpp" #include "transformations/utils/utils.hpp" @@ -475,6 +476,11 @@ void CNNLayerCreator::on_adapter(const std::string& name, ::ngraph::ValueAccesso const auto data_beg = static_cast(a->get()->get_ptr()); params[name] = std::string(data_beg, a->get()->size()); } + } else if (auto a = ::ngraph::as_type<::ngraph::AttributeAdapter>>(&adapter)) { + if (std::string(node->get_type_name()) != "Constant") { + const auto data_beg = static_cast(a->get()->get_ptr()); + params[name] = std::string(data_beg, a->get()->size()); + } } else if (const auto& a = ngraph::as_type>(&adapter)) { const auto& attrs = a->get(); params[name] = details::joinVec(attrs); diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp index 106b7696ccec40..05fac378c01874 100644 --- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -102,5 +102,7 @@ std::vector disabledTestPatterns() { R"(.*SplitConvTest.CompareWithRefImpl.*IS=\(1.(128|256)\).*IC=4.*OC=4.*configItem=GNA_DEVICE_MODE_GNA_SW_FP32)", // TODO: Issue: 114149 R"(.*smoke_Decompose2DConv.*)", + // TODO: Issue: 123306 + R"(smoke_convert_matmul_to_fc/ConvertMatmulToFcWithTransposesPass.CompareWithRefImpl/netPRC=FP(32|16)_targetDevice=GNA__configItem=GNA_COMPACT_MODE_NO_configItem=GNA_DEVICE_MODE_GNA_SW_(FP32|EXACT)_IS=\(8.*)", }; } diff --git a/src/plugins/intel_gpu/include/intel_gpu/op/rms.hpp b/src/plugins/intel_gpu/include/intel_gpu/op/rms.hpp new file mode 100644 index 00000000000000..a6b7432ac28c51 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/op/rms.hpp @@ -0,0 +1,48 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/op.hpp" + +namespace ov { +namespace intel_gpu { +namespace op { +/// \brief Operator performing Root Mean Square Normalization +/// +/// \note Performs re-scaling invariance and regularizes the summed input according to RMS statistics +class RMS : public ov::op::Op { +public: + OPENVINO_OP("RMS", "gpu_opset"); + + RMS() = default; + /// \brief Constructs an RMS operation. + /// + /// \param data Input tensor with data + /// \param gamma Gamma values for weight + /// \param eps Epsilon for not dividing by zero while normalizing the value + /// \param output_type Output element type + RMS(const Output& data, + const Output& gamma, + double epsilson, + const ov::element::Type output_type = ov::element::undefined); + + bool visit_attributes(ov::AttributeVisitor& visitor) override; + + void validate_and_infer_types() override; + + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; + + double get_epsilon() const { return m_epsilon; } + + void set_epsilon(double epsilon) { m_epsilon = epsilon; } + +private: + double m_epsilon{0}; + ov::element::Type m_output_type; +}; + +} // namespace op +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp index 17e62ca926397b..ceba5be5a5dd53 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp @@ -268,3 +268,4 @@ REGISTER_FACTORY(internal, NmsStaticShapeIE8); REGISTER_FACTORY(internal, MulticlassNmsIEInternal); REGISTER_FACTORY(internal, FullyConnected); REGISTER_FACTORY(internal, FullyConnectedCompressed); +REGISTER_FACTORY(internal, RMS); diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp index b9614cd47258c0..a0c5c7138764bf 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp @@ -17,6 +17,12 @@ namespace cldnn { struct non_max_suppression : public primitive_base { CLDNN_DECLARE_PRIMITIVE(non_max_suppression) + enum Rotation { + NONE, + CLOCKWISE, + COUNTERCLOCKWISE + }; + non_max_suppression() : primitive_base("", {}), selected_indices_num(0), center_point_box(false), @@ -68,6 +74,7 @@ struct non_max_suppression : public primitive_base { primitive_id soft_nms_sigma; primitive_id second_output; primitive_id third_output; + Rotation rotation{Rotation::NONE}; size_t hash() const override { size_t seed = primitive::hash(); @@ -79,6 +86,7 @@ struct non_max_suppression : public primitive_base { seed = hash_combine(seed, soft_nms_sigma.empty()); seed = hash_combine(seed, second_output.empty()); seed = hash_combine(seed, third_output.empty()); + seed = hash_combine(seed, rotation); return seed; } @@ -97,7 +105,8 @@ struct non_max_suppression : public primitive_base { cmp_fields(score_threshold.empty()) && cmp_fields(soft_nms_sigma.empty()) && cmp_fields(second_output.empty()) && - cmp_fields(third_output.empty()); + cmp_fields(third_output.empty()) && + cmp_fields(rotation); #undef cmp_fields } @@ -130,6 +139,7 @@ struct non_max_suppression : public primitive_base { ob << soft_nms_sigma; ob << second_output; ob << third_output; + ob << make_data(&rotation, sizeof(rotation)); } void load(BinaryInputBuffer& ib) override { @@ -143,6 +153,7 @@ struct non_max_suppression : public primitive_base { ib >> soft_nms_sigma; ib >> second_output; ib >> third_output; + ib >> make_data(&rotation, sizeof(rotation)); } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/rms.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/rms.hpp new file mode 100644 index 00000000000000..cf2ad7af502da9 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/rms.hpp @@ -0,0 +1,58 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once +#include "primitive.hpp" + +namespace cldnn { + +/// @brief Root Mean Square Normalization primitive +/// @details Performs re-scaling invariance and regularizes the summed input according to RMS statistics +struct rms : public primitive_base { + CLDNN_DECLARE_PRIMITIVE(rms); + + rms() : primitive_base("", {}) {} + + /// @brief Constructs rms primitive + /// @param id This primitive id + /// @param input Input primitive id + /// @param gamma Gamma values for weight + /// @param epsilon Epsilon for not dividing by zero while normalizing + rms(const primitive_id& id, + const input_info& input, + const input_info& gamma, + const float epsilon, + const padding& output_padding = padding()) + : primitive_base(id, {input, gamma}, {output_padding}), + epsilon(epsilon) {} + + /// @brief Epsilon for not dividing by zero while normalizing + float epsilon; + + size_t hash() const override { + size_t seed = primitive::hash(); + seed = hash_combine(seed, epsilon); + return seed; + } + + bool operator==(const primitive& rhs) const override { + if (!compare_common_params(rhs)) + return false; + + auto rhs_casted = downcast(rhs); + + return epsilon == rhs_casted.epsilon; + } + + void save(BinaryOutputBuffer& ob) const override { + primitive_base::save(ob); + ob << epsilon; + } + + void load(BinaryInputBuffer& ib) override { + primitive_base::load(ib); + ib >> epsilon; + } +}; +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 45739c78a1e36a..cc9d8602316fa5 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -1221,6 +1221,8 @@ void prepare_primitive_fusing::fuse_constant_transposes(program& p) { return format::find_format(new_order, fmt.block_sizes()); }; + std::vector> to_replace_nodes; + auto& proc_order = p.get_processing_order(); auto itr = proc_order.begin(); while (itr != proc_order.end()) { @@ -1285,9 +1287,7 @@ void prepare_primitive_fusing::fuse_constant_transposes(program& p) { auto new_reorder = std::make_shared(next_node->id() + "_reorder_fmt", new_const_node.id(), reorder_layout); auto& new_reorder_node = p.get_or_create(new_reorder); - p.replace(*next_node, new_reorder_node); - new_reorder_node.recalc_output_layout(false); - itr = std::find(proc_order.begin(), proc_order.end(), &new_reorder_node); + to_replace_nodes.emplace_back(std::make_pair(next_node, &new_reorder_node)); } else { layout reorder_layout = new_const_node.get_output_layout(); reorder_layout.format = format::bfyx; @@ -1299,6 +1299,11 @@ void prepare_primitive_fusing::fuse_constant_transposes(program& p) { } } } + + for (auto& nodes : to_replace_nodes) { + p.replace(*nodes.first, *nodes.second); + nodes.second->recalc_output_layout(false); + } } void prepare_primitive_fusing::optimize_fused_ops(program& p) { diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp index 7405729120bfbd..f89980a3f936d4 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp @@ -143,6 +143,17 @@ struct non_max_suppression_impl : typed_primitive_impl_ocl params.sort_result_descending = primitive->sort_result_descending; params.box_encoding = primitive->center_point_box ? kernel_selector::BoxEncodingType::BOX_ENCODING_CENTER : kernel_selector::BoxEncodingType::BOX_ENCODING_CORNER; + switch (primitive->rotation) { + case non_max_suppression::Rotation::CLOCKWISE: + params.rotation = kernel_selector::NMSRotationType::CLOCKWISE; + break; + case non_max_suppression::Rotation::COUNTERCLOCKWISE: + params.rotation = kernel_selector::NMSRotationType::COUNTERCLOCKWISE; + break; + default: + params.rotation = kernel_selector::NMSRotationType::NONE; + } + if (impl_param.get_program().get_node(primitive->id).is_dynamic()) { params.reuse_internal_buffer = true; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp index 6b35b9cdfb16ce..3cc96ee00735ce 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp @@ -65,6 +65,7 @@ void register_implementations() { REGISTER_OCL(reshape); REGISTER_OCL(reverse); REGISTER_OCL(reverse_sequence); + REGISTER_OCL(rms); REGISTER_OCL(roi_align); REGISTER_OCL(roi_pooling); REGISTER_OCL(roll); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp index 45f4018bf90dac..d591499148e77a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp @@ -59,6 +59,7 @@ #include "intel_gpu/primitives/resample.hpp" #include "intel_gpu/primitives/reshape.hpp" #include "intel_gpu/primitives/reverse_sequence.hpp" +#include "intel_gpu/primitives/rms.hpp" #include "intel_gpu/primitives/roi_align.hpp" #include "intel_gpu/primitives/roi_pooling.hpp" #include "intel_gpu/primitives/roll.hpp" @@ -146,6 +147,7 @@ REGISTER_OCL(reorg_yolo); REGISTER_OCL(reshape); REGISTER_OCL(reverse); REGISTER_OCL(reverse_sequence); +REGISTER_OCL(rms); REGISTER_OCL(roi_align); REGISTER_OCL(roi_pooling); REGISTER_OCL(roll); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/rms.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/rms.cpp new file mode 100644 index 00000000000000..71f44e685157b0 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/rms.cpp @@ -0,0 +1,65 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "primitive_base.hpp" + +#include "rms_inst.h" +#include "rms/rms_kernel_selector.h" +#include "rms/rms_kernel_ref.h" + +namespace cldnn { +namespace ocl { + +struct rms_impl : typed_primitive_impl_ocl { + using parent = typed_primitive_impl_ocl; + using parent::parent; + using kernel_selector_t = kernel_selector::rms_kernel_selector; + using kernel_params_t = std::pair; + + DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::ocl::rms_impl); + + std::unique_ptr clone() const override { + return make_unique(*this); + } + + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { + const auto& primitive = impl_param.typed_desc(); + auto params = get_default_params(impl_param, is_shape_agnostic); + auto optional_params = get_default_optional_params(impl_param.get_program()); + + params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(1))); + params.epsilon = primitive->epsilon; + return {params, optional_params}; + } + + void update_dispatch_data(const kernel_impl_params& impl_param) override { + auto kernel_params = get_kernel_params(impl_param, true); + (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data); + } +}; + +namespace detail { + +attach_rms_impl::attach_rms_impl() { + auto types = { + data_types::f32, + data_types::f16, + data_types::i32 + }; + + auto formats = { + format::bfyx, + format::bfzyx + }; + + implementation_map::add(impl_types::ocl, + shape_types::any, + typed_primitive_impl_ocl::create, + types, + formats); +} + +} // namespace detail +} // namespace ocl +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/include/rms_inst.h b/src/plugins/intel_gpu/src/graph/include/rms_inst.h new file mode 100644 index 00000000000000..a7800249f40421 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/rms_inst.h @@ -0,0 +1,44 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once +#include "intel_gpu/primitives/rms.hpp" +#include "primitive_inst.h" + +#include + +namespace cldnn { + +template <> +struct typed_program_node : public typed_program_node_base { + using parent = typed_program_node_base; + +public: + using parent::parent; + + program_node& input(size_t index = 0) const { return get_dependency(index); } + std::vector get_shape_infer_dependencies() const override { return {}; } +}; + +using rms_node = typed_program_node; + +template <> +class typed_primitive_inst : public typed_primitive_inst_base { + using parent = typed_primitive_inst_base; + using parent::parent; + +public: + template + static std::vector calc_output_layouts(rms_node const& /*node*/, const kernel_impl_params& impl_params) { + return forward_input0_shape(impl_params); + } + static layout calc_output_layout(rms_node const& node, kernel_impl_params const& impl_params); + static std::string to_string(rms_node const& node); + + typed_primitive_inst(network& network, rms_node const& node); +}; + +using rms_inst = typed_primitive_inst; + +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 69b1e12fa3b4ae..ca4569a7df7099 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -1484,17 +1484,21 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format if (blocked_formats.find(node.get_input_layout(0).format) != blocked_formats.end()) { preferred_impl = impl_types::ocl; } else { - auto& nms_node = node.as(); - auto scores_layout = nms_node.input_scores().get_output_layout(); - if (scores_layout.is_dynamic()) { + const auto& nms_node = node.as(); + if (nms_node.get_primitive()->rotation != non_max_suppression::Rotation::NONE) { preferred_impl = impl_types::ocl; } else { - const size_t kBatchNum = scores_layout.batch(); - const size_t kClassNum = scores_layout.feature(); - const size_t kNStreams = - static_cast(node.get_program().get_config().get_property(ov::streams::num)); - const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast(8)) * kNStreams; - preferred_impl = (kKeyValue > 64) ? impl_types::ocl : impl_types::cpu; + const auto scores_layout = nms_node.input_scores().get_output_layout(); + if (scores_layout.is_dynamic()) { + preferred_impl = impl_types::ocl; + } else { + const size_t kBatchNum = scores_layout.batch(); + const size_t kClassNum = scores_layout.feature(); + const size_t kNStreams = + static_cast(node.get_program().get_config().get_property(ov::streams::num)); + const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast(8)) * kNStreams; + preferred_impl = (kKeyValue > 64) ? impl_types::ocl : impl_types::cpu; + } } } } else if (node.is_type()) { diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 6c1e88de349115..92f9f60743b9f5 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -242,6 +242,7 @@ event::ptr primitive_inst::set_output_memory(memory::ptr mem_new, bool check, si } void primitive_inst::update_shape() { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("update_shape: " + id())); GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::shape_inference); if (update_shape_done_by_other) { update_shape_done_by_other = false; // reset @@ -341,6 +342,7 @@ void primitive_inst::update_shape() { } if (has_runtime_deps) { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("update_shape_sync: " + id())); if (!dependencies_events.empty() && queue_type == QueueTypes::out_of_order) { _network.get_stream().wait_for_events(dependencies_events); } else if (queue_type == QueueTypes::in_order) { @@ -380,6 +382,7 @@ void primitive_inst::update_shape() { } event::ptr primitive_inst::realloc_if_needed() { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("realloc_if_needed: " + id())); GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::memory_allocation); @@ -493,6 +496,7 @@ bool primitive_inst::use_async_compilation() { } bool primitive_inst::update_impl() { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("update_impl: " + id())); GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::update_implementation); auto prev_impl_str = _impl != nullptr ? _impl->get_kernel_name() : "nullptr"; @@ -656,6 +660,7 @@ bool primitive_inst::update_impl() { } void primitive_inst::do_runtime_skip_reorder() { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_skip_reorder: " + id())); GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(debug_config->disable_runtime_skip_reorder) { return; @@ -713,6 +718,7 @@ void primitive_inst::do_runtime_skip_reorder() { } void primitive_inst::do_runtime_in_place_concat() { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_in_place_concat: " + id())); GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) { return; @@ -780,6 +786,7 @@ bool primitive_inst::has_inner_networks() const { } event::ptr primitive_inst::execute(const std::vector& events) { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("primitive_inst::execute: " + id())); const auto primitive_id = id(); OPENVINO_ASSERT(_has_valid_input, primitive_id, " has invalid/unset input"); GPU_DEBUG_GET_INSTANCE(debug_config); @@ -802,6 +809,7 @@ event::ptr primitive_inst::execute(const std::vector& events) { } if (!is_valid_fusion()) { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("unfused_subgraph_exec: " + id())); auto subgraph = get_unfused_subgraph(); for (auto& d : _deps) { @@ -859,16 +867,16 @@ event::ptr primitive_inst::execute(const std::vector& events) { GPU_DEBUG_TRACE << id() << ": execute " << _impl->get_kernel_name() << " (is_dynamic=" << _impl->is_dynamic() << ", " << "can_be_optimized=" << can_be_optimized() << ")" << std::endl; + const bool out_of_order_queue = get_network().get_stream().get_queue_type() == QueueTypes::out_of_order; if (_exec_deps.empty() && dependencies.empty()) { dependencies = events; } else { - auto queue_type = get_network().get_stream().get_queue_type(); // Prepare dependencies events in case of OOO queue, CPU implementation, // or optimized_out impl which has CPU users (needs_completion_event() && !is_output() condition) - if (queue_type == QueueTypes::out_of_order || _impl->is_cpu() || (can_be_optimized() && needs_completion_event() && !is_output())) { + if (out_of_order_queue || _impl->is_cpu() || (can_be_optimized() && needs_completion_event() && !is_output())) { dependencies.reserve(dependencies.size() + _exec_deps.size()); for (auto& input : _exec_deps) { - if (input->is_input() && queue_type != QueueTypes::out_of_order) + if (input->is_input() && !out_of_order_queue) continue; auto id = input->id(); try { @@ -883,6 +891,13 @@ event::ptr primitive_inst::execute(const std::vector& events) { } } + // Replace multiple events with single grouped event in case of barriers synchronization to prevent `_last_barrier_ev` usage as a dependency + // event of optimized_out instance's users, which may lead to unwanted extra synchronization of CPU impls with GPU kernels + if (_node && _node->is_in_shape_of_subgraph() && can_be_optimized() && dependencies.size() > 1 && out_of_order_queue) { + auto grouped_ev = get_network().get_stream().group_events(dependencies); + dependencies = {grouped_ev}; + } + { GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::inference); auto ev = _impl->execute(dependencies, *this); @@ -905,6 +920,7 @@ event::ptr primitive_inst::execute(const std::vector& events) { } void primitive_inst::set_arguments() { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("set_arguments: " + id())); GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::set_arguments); OPENVINO_ASSERT(_has_valid_input, id(), " has invalid/unset input"); _impl->set_arguments(*this); @@ -1138,6 +1154,7 @@ void primitive_inst::allocate_internal_buffers(bool reset) { } event::ptr primitive_inst::update_weights() { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("update_weights: " + id())); GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::update_weights); if (!_impl) return nullptr; diff --git a/src/plugins/intel_gpu/src/graph/rms.cpp b/src/plugins/intel_gpu/src/graph/rms.cpp new file mode 100644 index 00000000000000..5002417df57394 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/rms.cpp @@ -0,0 +1,41 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "rms_inst.h" + +#include "primitive_type_base.h" +#include "json_object.h" +#include + +namespace cldnn { +GPU_DEFINE_PRIMITIVE_TYPE_ID(rms); + +layout rms_inst::calc_output_layout(rms_node const& node, kernel_impl_params const& impl_param) { + auto desc = impl_param.typed_desc(); + auto input_layout = impl_param.get_input_layout(); + auto output_type = desc->output_data_types[0].value_or(input_layout.data_type); + auto output_format = input_layout.format; + + return layout(output_type, output_format, input_layout.get_tensor()); +} + +std::string rms_inst::to_string(rms_node const& node) { + auto desc = node.get_primitive(); + auto node_info = node.desc_to_json(); + + std::stringstream primitive_description; + + json_composite rms_info; + rms_info.add("input_id", node.input(0).id()); + rms_info.add("epsilon", desc->epsilon); + + node_info->add("rms_info", rms_info); + node_info->dump(primitive_description); + + return primitive_description.str(); +} + +rms_inst::typed_primitive_inst(network& network, rms_node const& node) : parent(network, node) {} + +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/non_max_suppression_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/non_max_suppression_gpu_ref.cl index 36651d8773fe6c..cf26d0cbc276c0 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/non_max_suppression_gpu_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/non_max_suppression_gpu_ref.cl @@ -62,7 +62,7 @@ inline COORD_TYPE_4 FUNC(getBoxCoords)(const __global INPUT0_TYPE *boxes, const boxes[INPUT0_GET_INDEX(batch, boxId, 2, 0)], boxes[INPUT0_GET_INDEX(batch, boxId, 3, 0)]); -#if BOX_ENCODING == 0 +#if !defined(ROTATION) && BOX_ENCODING == 0 const COORD_TYPE ax1 = min(coords[1], coords[3]); const COORD_TYPE ax2 = max(coords[1], coords[3]); const COORD_TYPE ay1 = min(coords[0], coords[2]); @@ -76,9 +76,331 @@ inline COORD_TYPE_4 FUNC(getBoxCoords)(const __global INPUT0_TYPE *boxes, const return coords; } +#ifdef ROTATION + +typedef struct { + float x, y; +} FUNC(Point2D); +#define POINT_2D FUNC(Point2D) + +inline void FUNC(getRotatedVertices)(const COORD_TYPE_4 box, const INPUT0_TYPE angle, POINT_2D* pts) { + const float theta = angle + #if ROTATION == 2 + * -1.0f + #endif + ; + float cosTheta2 = cos(theta) * 0.5f; + float sinTheta2 = sin(theta) * 0.5f; + + // y: top --> down; x: left --> right + // Left-Down + pts[0].x = box[0]/*.x_ctr*/ - sinTheta2 * box[3]/*.h*/ - cosTheta2 * box[2]/*.w*/; + pts[0].y = box[1]/*.y_ctr*/ + cosTheta2 * box[3]/*.h*/ - sinTheta2 * box[2]/*.w*/; + // Left-Top + pts[1].x = box[0]/*.x_ctr*/ + sinTheta2 * box[3]/*.h*/ - cosTheta2 * box[2]/*.w*/; + pts[1].y = box[1]/*.y_ctr*/ - cosTheta2 * box[3]/*.h*/ - sinTheta2 * box[2]/*.w*/; + // Right-Top + pts[2].x = 2 * box[0]/*.x_ctr*/ - pts[0].x; + pts[2].y = 2 * box[1]/*.y_ctr*/ - pts[0].y; + // Right-Down + pts[3].x = 2 * box[0]/*.x_ctr*/ - pts[1].x; + pts[3].y = 2 * box[1]/*.y_ctr*/ - pts[1].y; +} + +inline float FUNC(dot2D)(const POINT_2D A, const POINT_2D B) { + return A.x * B.x + A.y * B.y; +} + +inline float FUNC(cross2D)(const POINT_2D A, const POINT_2D B) { + return A.x * B.y - B.x * A.y; +} + +inline int FUNC(getIntersectionPoints)(const POINT_2D* pts1, const POINT_2D* pts2, POINT_2D* intersections) { + // Line vector + // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1] + POINT_2D vec1[4], vec2[4]; + for (int i = 0; i < 4; i++) { + vec1[i].x = pts1[(i + 1) % 4].x - pts1[i].x; + vec1[i].y = pts1[(i + 1) % 4].y - pts1[i].y; + vec2[i].x = pts2[(i + 1) % 4].x - pts2[i].x; + vec2[i].y = pts2[(i + 1) % 4].y - pts2[i].y; + } + + // Line test - test all line combos for intersection + int num = 0; // number of intersections + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + // Solve for 2x2 Ax=b + float det = FUNC_CALL(cross2D)(vec2[j], vec1[i]); + // This takes care of parallel lines + if (fabs(det) <= 1e-14f) { + continue; + } + + POINT_2D vec12; + vec12.x= pts2[j].x - pts1[i].x; + vec12.y= pts2[j].y - pts1[i].y; + + float t1 = FUNC_CALL(cross2D)(vec2[j], vec12) / det; + float t2 = FUNC_CALL(cross2D)(vec1[i], vec12) / det; + + if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) { + intersections[num].x = pts1[i].x + vec1[i].x * t1; + intersections[num].y = pts1[i].y + vec1[i].y * t1; + ++num; + } + } + } + + // Check for vertices of rect1 inside rect2 + { + const POINT_2D AB = vec2[0]; + const POINT_2D DA = vec2[3]; + float ABdotAB = FUNC_CALL(dot2D)(AB, AB); + float ADdotAD = FUNC_CALL(dot2D)(DA, DA); + for (int i = 0; i < 4; i++) { + // assume ABCD is the rectangle, and P is the point to be judged + // P is inside ABCD iff. P's projection on AB lies within AB + // and P's projection on AD lies within AD + + POINT_2D AP; + AP.x = pts1[i].x - pts2[0].x; + AP.y = pts1[i].y - pts2[0].y; + + float APdotAB = FUNC_CALL(dot2D)(AP, AB); + float APdotAD = -FUNC_CALL(dot2D)(AP, DA); + + if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && (APdotAD <= ADdotAD)) { + intersections[num].x = pts1[i].x; + intersections[num].y = pts1[i].y; + ++num; + } + } + } + + // Reverse the check - check for vertices of rect2 inside rect1 + { + const POINT_2D AB = vec1[0]; + const POINT_2D DA = vec1[3]; + float ABdotAB = FUNC_CALL(dot2D)(AB, AB); + float ADdotAD = FUNC_CALL(dot2D)(DA, DA); + for (int i = 0; i < 4; i++) { + POINT_2D AP; + AP.x = pts2[i].x - pts1[0].x; + AP.y = pts2[i].y - pts1[0].y; + + float APdotAB = FUNC_CALL(dot2D)(AP, AB); + float APdotAD = -FUNC_CALL(dot2D)(AP, DA); + + if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && (APdotAD <= ADdotAD)) { + intersections[num].x = pts2[i].x; + intersections[num].y = pts2[i].y; + ++num; + } + } + } + + return num; +} + +inline void FUNC(swapPoints)(POINT_2D* a, POINT_2D* b) +{ + POINT_2D temp = *a; + *a = *b; + *b = temp; +} + +inline void FUNC(sortPoints)(POINT_2D* arr, int l, int h) +{ + for (int i = 0; i < h-l; i++) { + bool swapped = false; + + for (int j = l; j < h-i; j++) { + bool is_less = false; + const float temp = FUNC_CALL(cross2D)(arr[j], arr[j+1]); + if (fabs(temp) < 1e-6f) { + is_less = FUNC_CALL(dot2D)(arr[j], arr[j]) < FUNC_CALL(dot2D)(arr[j+1], arr[j+1]); + } else { + is_less = temp > 0; + } + + if (is_less) { + continue; + } + + FUNC_CALL(swapPoints)(&arr[j], &arr[j+1]); + swapped = true; + } + + if (!swapped) { + break; + } + } +} + +inline int FUNC(convex_hull_graham)(const POINT_2D* p, const int num_in, POINT_2D* q, bool shift_to_zero) { + if (num_in < 2) { + return -1; + } + + // Step 1: + // Find point with minimum y + // if more than 1 points have the same minimum y, + // pick the one with the minimum x. + int t = 0; + for (int i = 1; i < num_in; i++) { + if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) { + t = i; + } + } + const POINT_2D start = p[t]; // starting point + + // Step 2: + // Subtract starting point from every points (for sorting in the next step) + for (int i = 0; i < num_in; i++) { + q[i].x = p[i].x - start.x; + q[i].y = p[i].y - start.y; + } + + // Swap the starting point to position 0 + FUNC_CALL(swapPoints)(&q[t], &q[0]); + + // Step 3: + // Sort point 1 ~ num_in according to their relative cross-product values + // (essentially sorting according to angles) + // If the angles are the same, sort according to their distance to origin + float dist[24]; + for (int i = 0; i < num_in; i++) { + dist[i] = FUNC_CALL(dot2D)(q[i], q[i]); + } + + FUNC_CALL(sortPoints)(q, 1, num_in - 1); + + // compute distance to origin after sort, since the points are now different. + for (int i = 0; i < num_in; i++) { + dist[i] = FUNC_CALL(dot2D)(q[i], q[i]); + } + + // Step 4: + // Make sure there are at least 2 points (that don't overlap with each other) + // in the stack + int k; // index of the non-overlapped second point + for (k = 1; k < num_in; k++) { + if (dist[k] > 1e-8f) { + break; + } + } + if (k == num_in) { + // We reach the end, which means the convex hull is just one point + q[0].x = p[t].x; + q[0].y = p[t].y; + return 1; + } + + q[1].x = q[k].x; + q[1].y = q[k].y; + int m = 2; // 2 points in the stack + // Step 5: + // Finally we can start the scanning process. + // When a non-convex relationship between the 3 points is found + // (either concave shape or duplicated points), + // we pop the previous point from the stack + // until the 3-point relationship is convex again, or + // until the stack only contains two points + for (int i = k + 1; i < num_in; i++) { + POINT_2D diff1, diff2; + diff1.x = q[i].x - q[m - 2].x; + diff1.y = q[i].y - q[m - 2].y; + diff2.x = q[m - 1].x - q[m - 2].x; + diff2.y = q[m - 1].y - q[m - 2].y; + + float cross2d_diff = FUNC_CALL(cross2D)(diff1, diff2); + + while (m > 1 && cross2d_diff >= 0) { + m--; + } + q[m].x = q[i].x; + q[m].y = q[i].y; + ++m; + } + + // Step 6 (Optional): + // In general sense we need the original coordinates, so we + // need to shift the points back (reverting Step 2) + // But if we're only interested in getting the area/perimeter of the shape + // We can simply return. + if (!shift_to_zero) { + for (int i = 0; i < m; i++) { + q[i].x += start.x; + q[i].y += start.y; + } + } + + return m; +} + +inline float FUNC(polygon_area)(const POINT_2D* q, const int m) { + if (m <= 2) { + return 0.f; + } + + float area = 0.f; + for (int i = 1; i < m - 1; i++) { + POINT_2D diff1, diff2; + diff1.x = q[i].x - q[0].x; + diff1.y = q[i].y - q[0].y; + diff2.x = q[i + 1].x - q[0].x; + diff2.y = q[i + 1].y - q[0].y; + float cross_result = FUNC_CALL(cross2D)(diff1, diff2); + + area += fabs(cross_result); + } + + return area / 2.0f; +} + +inline float FUNC(rotatedBoxesIntersection)(const COORD_TYPE_4 boxA, const INPUT0_TYPE angleA, + const COORD_TYPE_4 boxB, const INPUT0_TYPE angleB) { + // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned + // from get_intersection_points + POINT_2D intersectPts[24], orderedPts[24]; + POINT_2D pts1[4]; + POINT_2D pts2[4]; + FUNC_CALL(getRotatedVertices)(boxA, angleA, pts1); + FUNC_CALL(getRotatedVertices)(boxB, angleB, pts2); + // Find points defining area of the boxes intersection + int num = FUNC_CALL(getIntersectionPoints)(pts1, pts2, intersectPts); + + if (num <= 2) { + return 0.f; + } + + // Convex Hull to order the intersection points in clockwise order and find + // the contour area. + int num_convex = FUNC_CALL(convex_hull_graham)(intersectPts, num, orderedPts, true); + return FUNC_CALL(polygon_area)(orderedPts, num_convex); +} + + +inline float FUNC(intersectionOverUnion)(const COORD_TYPE_4 boxA, const INPUT0_TYPE angleA, + const COORD_TYPE_4 boxB, const INPUT0_TYPE angleB) +{ + const float areaA = convert_float(boxA[3]) * convert_float(boxA[2]); + const float areaB = convert_float(boxB[3]) * convert_float(boxB[2]); + + if (areaA <= 0.0f || areaB <= 0.0f) + return 0.0f; + + const float intersection_area = FUNC_CALL(rotatedBoxesIntersection)(boxA, angleA, boxB, angleB); + const float union_area = areaA + areaB - intersection_area; + return intersection_area / union_area; +} + +#else + inline float FUNC(intersectionOverUnion)(const COORD_TYPE_4 boxA, const COORD_TYPE_4 boxB) { -#if BOX_ENCODING == 0 +#if !defined(ROTATION) && BOX_ENCODING == 0 /// CORNER const float areaA = convert_float(boxA[3] - boxA[1]) * convert_float(boxA[2] - boxA[0]); const float areaB = convert_float(boxB[3] - boxB[1]) * convert_float(boxB[2] - boxB[0]); @@ -110,6 +432,7 @@ inline float FUNC(intersectionOverUnion)(const COORD_TYPE_4 boxA, const COORD_TY const float union_area = areaA + areaB - intersection_area; return intersection_area / union_area; } +#endif // ROTATION inline float FUNC(scaleIOU)(float iou, float iou_threshold, float scale) { @@ -240,6 +563,16 @@ inline void FUNC(swap)(__global BOX_INFO* a, __global BOX_INFO* b) *b = temp; } +#ifdef ROTATION +inline void FUNC(reverseOutputBoxList)(__global BOX_INFO *outBoxes, int boxNum) +{ + for (int i = 0; i < boxNum / 2; ++i) { + FUNC_CALL(swap)(&outBoxes[i], &outBoxes[boxNum - 1 - i]); + } +} + +#else + inline void FUNC(sortOutputBoxList)(__global BOX_INFO *outSortedBoxes, int boxNum) { for (int i = 0; i < boxNum - 1; ++i) { @@ -261,6 +594,7 @@ inline void FUNC(sortOutputBoxList)(__global BOX_INFO *outSortedBoxes, int boxNu break; } } +#endif // ROTATION #ifdef NMS_STAGE_0 @@ -427,9 +761,11 @@ KERNEL (non_max_suppression_ref_stage_2)( const ushort classId = get_global_id(1); float scale = 0.0f; + #ifndef ROTATION if (SOFT_NMS_SIGMA_VAL > 0.0f) { scale = -0.5f / SOFT_NMS_SIGMA_VAL; } + #endif __global SBOX_INFO *sortedBoxList = (__global SBOX_INFO*)&buffer0[(batchId * NUM_CLASSES + classId) * BUFFER_STRIDE]; const int kSortedBoxNum = buffer2[batchId * NUM_CLASSES + classId]; @@ -442,12 +778,22 @@ KERNEL (non_max_suppression_ref_stage_2)( SBOX_INFO next_candidate = sortedBoxList[i]; INPUT1_TYPE original_score = next_candidate.score; const COORD_TYPE_4 next_candidate_coord = FUNC_CALL(getBoxCoords)(boxes, batchId, next_candidate.boxId); + #ifdef ROTATION + const INPUT0_TYPE next_candidate_angle = boxes[INPUT0_GET_INDEX(batchId, next_candidate.boxId, 4, 0)]; + #endif + ++i; bool should_hard_suppress = false; for (int j = selectedBoxNum - 1; j >= next_candidate.suppress_begin_index; --j) { const COORD_TYPE_4 selected_box_coord = FUNC_CALL(getBoxCoords)(boxes, batchId, selectedBoxList[j].boxId); + #ifdef ROTATION + const INPUT0_TYPE selected_box_angle = boxes[INPUT0_GET_INDEX(batchId, selectedBoxList[j].boxId, 4, 0)]; + const float iou = FUNC_CALL(intersectionOverUnion)(next_candidate_coord, next_candidate_angle, + selected_box_coord, selected_box_angle); + #else const float iou = FUNC_CALL(intersectionOverUnion)(next_candidate_coord, selected_box_coord); + #endif next_candidate.score *= FUNC_CALL(scaleIOU)(iou, IOU_THRESHOLD_VAL, scale); if (iou >= IOU_THRESHOLD_VAL && !(SOFT_NMS_SIGMA_VAL > 0.0f)) { @@ -531,7 +877,11 @@ KERNEL (non_max_suppression_ref_stage_3)( } #if SORT_RESULT_DESCENDING == 1 +#ifdef ROTATION + FUNC_CALL(reverseOutputBoxList)(sortedBoxList, outputIdx); +#else FUNC_CALL(sortOutputBoxList)(sortedBoxList, outputIdx); +#endif #endif unroll_for (int i = 0; i < outputIdx; i++) { diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl new file mode 100644 index 00000000000000..77d16d06405aa5 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl @@ -0,0 +1,72 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "include/batch_headers/fetch_data.cl" + +KERNEL(rms_gpu_bfyx_opt)( + OPTIONAL_SHAPE_INFO_ARG + const __global INPUT0_TYPE* input, + const __global INPUT1_TYPE* gamma, + __global OUTPUT_TYPE* output) +{ + const uint in_data_idx = get_global_id(0); + const uint data_idx = get_global_id(1); + const uint lws_size = LWS; + const uint items_num = VEC_SIZE; + const uint data_size = DATA_SIZE; + const uint total_items_num = lws_size * items_num; +#if !IS_DYNAMIC + const uint leftovers = LEFTOVERS; +#else + const uint leftovers = data_size % items_num; +#endif + + const uint data_offset = data_idx * data_size; + const uint in_data_offset = data_offset + in_data_idx * items_num; + const uint gamma_offset = in_data_idx * items_num; + + ACCUMULATOR_TYPE rms = ACCUMULATOR_VAL_ZERO; + + __local ACCUMULATOR_TYPE slm_buf[SLM_SIZE]; + + INPUT_VEC_TYPE inputs = AS_INPUT_VEC_TYPE(VLOAD(0, input + in_data_offset)); + ACCUMULATOR_VEC_TYPE square = native_powr(TO_ACCUMULATOR_VEC_TYPE(inputs), (ACCUMULATOR_VEC_TYPE)(2)); + unroll_for (uint i = 0; i < VEC_SIZE; ++i) { + rms += square[i]; + } + + if (in_data_idx < leftovers) { + const uint input_idx = data_offset + total_items_num + in_data_idx; + rms += native_powr(TO_ACCUMULATOR_TYPE(input[input_idx]), 2); + } + + slm_buf[in_data_idx] = rms; + + barrier(CLK_LOCAL_MEM_FENCE); + if (in_data_idx == 0) { +#if !IS_DYNAMIC + unroll_for (uint i = 1; i < LWS; ++i) +#else + for (uint i = 1; i < lws_size; ++i) +#endif + rms += slm_buf[i]; + + rms = rms / data_size; + slm_buf[0] = native_powr(sqrt(rms + TO_ACCUMULATOR_TYPE(EPSILON)), -1); + } + barrier(CLK_LOCAL_MEM_FENCE); + + rms = slm_buf[0]; + + OUTPUT_VEC_TYPE results = TO_OUTPUT_VEC_TYPE((ACCUMULATOR_VEC_TYPE)(rms) * TO_ACCUMULATOR_VEC_TYPE(inputs) * AS_ACCUMULATOR_VEC_TYPE(VLOAD(0, gamma + gamma_offset))); + VSTORE(results, 0, output + in_data_offset); + + if (in_data_idx < leftovers) { + const uint input_idx = data_offset + total_items_num + in_data_idx; + const uint output_idx = data_offset + total_items_num + in_data_idx; + const uint gamma_idx = total_items_num + in_data_idx; + OUTPUT_TYPE result = TO_OUTPUT_TYPE(rms * TO_ACCUMULATOR_TYPE(input[input_idx]) * TO_ACCUMULATOR_TYPE(gamma[gamma_idx])); + output[output_idx] = result; + } +} diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_ref.cl new file mode 100644 index 00000000000000..88c5eb520d33e3 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_ref.cl @@ -0,0 +1,45 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "include/fetch_utils.cl" + +KERNEL(rms_gpu_ref)( + OPTIONAL_SHAPE_INFO_ARG + const __global INPUT0_TYPE* input, + const __global INPUT1_TYPE* gamma, + __global OUTPUT_TYPE* output) +{ + const uint b = get_global_id(0); + const uint f = get_global_id(1); + const uint w = 0; + + ACCUMULATOR_TYPE rms = ACCUMULATOR_VAL_ZERO; + for (uint z = 0; z < INPUT0_SIZE_Z; z++) { + for (uint y = 0; y < INPUT0_SIZE_Y; y++) { + for (uint x = 0; x < INPUT0_SIZE_X; x++) { + const uint input_idx = FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, x); + rms += pow(TO_ACCUMULATOR_TYPE(input[input_idx]), 2); + } + } + } + + rms /= INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z; + rms = pow(sqrt(rms + TO_ACCUMULATOR_TYPE(EPSILON)), -1); + + for (uint z = 0; z < INPUT0_SIZE_Z; z++) { + for (uint y = 0; y < INPUT0_SIZE_Y; y++) { + for (uint x = 0; x < INPUT0_SIZE_X; x++) { + const uint input_idx = FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, x); + const uint output_idx = FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, x); +#if INPUT0_DIMS == 4 + const uint gamma_idx = y; +#elif INPUT0_DIMS == 5 + const uint gamma_idx = z; +#endif + OUTPUT_TYPE result = TO_OUTPUT_TYPE(rms) * TO_OUTPUT_TYPE(input[input_idx]) * TO_OUTPUT_TYPE(gamma[gamma_idx]); + output[output_idx] = result; + } + } + } +} diff --git a/src/plugins/intel_gpu/src/kernel_selector/common_types.h b/src/plugins/intel_gpu/src/kernel_selector/common_types.h index 7706da6003fe74..1acc0aa89e6af6 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/common_types.h +++ b/src/plugins/intel_gpu/src/kernel_selector/common_types.h @@ -97,6 +97,7 @@ enum class KernelType { MULTICLASS_NMS, UNIQUE_COUNT, UNIQUE_GATHER, + RMS, }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -569,6 +570,15 @@ enum class BoxEncodingType { BOX_ENCODING_CENTER, }; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// NMSRotationType +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +enum class NMSRotationType { + NONE, + CLOCKWISE, + COUNTERCLOCKWISE +}; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // ConvertColor //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.cpp index 28a6b2fa9e0bb6..fc85b23005ec84 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.cpp @@ -149,11 +149,17 @@ JitConstants NonMaxSuppressionKernelRef::GetJitConstants(const non_max_suppressi jit.AddConstant(MakeJitConstant("SCORE_THRESHOLD_VAL", params.score_threshold)); } - if (params.soft_nms_sigma_type == base_params::ArgType::Input) { - jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_TYPE", GetInputTypeStr(params.GetIndexSoftNmsSigma()))); - jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", "convert_float(soft_nms_sigma[0])")); + if (params.rotation == NMSRotationType::NONE) { + if (params.soft_nms_sigma_type == base_params::ArgType::Input) { + jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_TYPE", GetInputTypeStr(params.GetIndexSoftNmsSigma()))); + jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", "convert_float(soft_nms_sigma[0])")); + } else { + jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", params.soft_nms_sigma)); + } } else { - jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", params.soft_nms_sigma)); + jit.AddConstant(MakeJitConstant("ROTATION", static_cast(params.rotation))); + // for NMSRotated it is always zero + jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", 0.0f)); } if (params.has_second_output) { diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.h index 5ace6fbebffac3..8fc2dc2724a9bd 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.h @@ -36,6 +36,7 @@ struct non_max_suppression_params : public base_params { bool has_third_output; bool use_multiple_outputs; bool reuse_internal_buffer = false; + NMSRotationType rotation = NMSRotationType::NONE; uint32_t GetIndexNumSelectPerClass() const { uint32_t input_idx = 2; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.cpp new file mode 100644 index 00000000000000..f93cee2876de93 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.cpp @@ -0,0 +1,94 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "rms_kernel_base.h" +#include "kernel_selector_utils.h" + +namespace kernel_selector { +bool RMSKernelBase::Validate(const Params& p, const optional_params& o) const { + if (!KernelBaseOpenCL::Validate(p, o)) + return false; + + const rms_params& params = static_cast(p); + auto supported_dyn_layouts = { DataLayout::bfyx, DataLayout::bfzyx }; + if (params.has_dynamic_tensors() && (!layout_is_one_of(params.inputs, supported_dyn_layouts) || !layout_is_one_of(params.outputs, supported_dyn_layouts))) + return false; + + return true; +} + +JitConstants RMSKernelBase::GetJitConstants(const rms_params& params, RMSKernelBase::DispatchData) const { + JitConstants jit = MakeBaseParamsJitConstants(params); + + jit.AddConstant(MakeJitConstant("EPSILON", params.epsilon)); + jit.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR")); + + return jit; +} + +RMSKernelBase::DispatchData RMSKernelBase::SetDefault(const rms_params& params) const { + DispatchData dispatchData; + const auto& output = params.outputs[0]; + + dispatchData.gws = {output.Batch().v, output.Feature().v, 1}; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); + + return dispatchData; +} + +KernelsData RMSKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const { + assert(params.GetType() == KernelType::RMS); + + if (!Validate(params, options)) + return {}; + + const rms_params& orgParams = static_cast(params); + auto dispatchData = SetDefault(orgParams); + + KernelData kd = KernelData::Default(params); + + auto cldnn_jit = GetJitConstants(orgParams, dispatchData); + auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options); + auto jit = CreateJit(kernelName, cldnn_jit, entry_point); + + kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) { + const auto& prim_params = static_cast(params); + auto dispatchData = SetDefault(prim_params); + OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func"); + kd.kernels[0].params.workGroups.global = dispatchData.gws; + kd.kernels[0].params.workGroups.local = dispatchData.lws; + kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params); + }; + + auto& kernel = kd.kernels[0]; + FillCLKernelData(kernel, + dispatchData, + params.engineInfo, + kernelName, + jit, + entry_point, + EXE_MODE_DEFAULT, + false, + false, + 2, + GetFusedPrimitiveInputsCount(params), + 1, + orgParams.outputs[0].is_dynamic()); + + return {kd}; +} + +Datatype RMSKernelBase::GetAccumulatorType(const rms_params& params) const { + const auto& input_dt = params.inputs[0].GetDType(); + + switch (input_dt) { + case Datatype::F32: + case Datatype::F16: + return Datatype::F32; + case Datatype::INT8: return Datatype::INT32; + case Datatype::UINT8: return Datatype::INT32; + default: return Datatype::F32; + } +} +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.h new file mode 100644 index 00000000000000..546c209bf03d77 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.h @@ -0,0 +1,50 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "kernel_base_opencl.h" + +namespace kernel_selector { +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// rms_params +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct rms_params : public base_params { + rms_params() : base_params(KernelType::RMS) {} + float epsilon = 0.0f; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// rms_optional_params +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct rms_optional_params : optional_params { + rms_optional_params() : optional_params(KernelType::RMS) {} +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// RMSKernelBase +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class RMSKernelBase : public KernelBaseOpenCL { +public: + using KernelBaseOpenCL::KernelBaseOpenCL; + virtual ~RMSKernelBase() {} + + struct DispatchData : public CommonDispatchData { + size_t dataSize; + size_t dataCount; + size_t slmSize; + size_t maxSlmSize; + size_t leftovers; + + DispatchData() : dataSize(0), dataCount(0), slmSize(0), maxSlmSize(0), leftovers(0) {} + }; + +protected: + bool Validate(const Params&, const optional_params&) const override; + virtual JitConstants GetJitConstants(const rms_params& params, DispatchData dispatchData) const; + virtual DispatchData SetDefault(const rms_params& params) const; + KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const; + Datatype GetAccumulatorType(const rms_params& params) const; +}; +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp new file mode 100644 index 00000000000000..ad49fd86370e0a --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp @@ -0,0 +1,120 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "rms_kernel_bfyx_opt.h" +#include "kernel_selector_utils.h" +#include + +namespace kernel_selector { +ParamsKey RMSKernelBfyxOpt::GetSupportedKey() const { + ParamsKey k; + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::F32); + k.EnableInputLayout(DataLayout::bfyx); + k.EnableInputLayout(DataLayout::bfzyx); + k.EnableOutputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::bfzyx); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBatching(); + k.EnableDifferentTypes(); + k.EnableDynamicShapesSupport(); + return k; +} + +JitConstants RMSKernelBfyxOpt::GetJitConstants(const rms_params& params, DispatchData dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); + + if (params.has_dynamic_tensors()) { + const auto& input = params.inputs[0]; + DimensionAccessHelper dims(input); + const std::string data_size = toVectorMulString({dims.x(), dims.y(), dims.z()}); + const std::string lws_0 = "get_local_size(0)"; + jit.AddConstants({ + MakeJitConstant("DATA_SIZE", data_size), + MakeJitConstant("LWS", lws_0), + MakeJitConstant("SLM_SIZE", dispatchData.maxSlmSize) + }); + } else { + jit.AddConstants({ + MakeJitConstant("DATA_SIZE", dispatchData.dataSize), + MakeJitConstant("LWS", dispatchData.slmSize), + MakeJitConstant("SLM_SIZE", dispatchData.slmSize), + MakeJitConstant("LEFTOVERS", dispatchData.leftovers) + }); + } + jit.AddConstants({ + MakeJitConstant("VEC_SIZE", 8), + MakeJitConstant("VLOAD", "CAT(vload, VEC_SIZE)"), + MakeJitConstant("VSTORE", "CAT(vstore, VEC_SIZE)"), + MakeJitConstant("INPUT_VEC_TYPE", "MAKE_VECTOR_TYPE(INPUT0_TYPE, VEC_SIZE)"), + MakeJitConstant("ACCUMULATOR_VEC_TYPE", "MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, VEC_SIZE)"), + MakeJitConstant("OUTPUT_VEC_TYPE", "MAKE_VECTOR_TYPE(OUTPUT_TYPE, VEC_SIZE)"), + MakeJitConstant("AS_INPUT_VEC_TYPE", "CAT(as_, INPUT_VEC_TYPE)"), + MakeJitConstant("AS_ACCUMULATOR_VEC_TYPE", "CAT(as_, ACCUMULATOR_VEC_TYPE)"), + MakeJitConstant("TO_ACCUMULATOR_VEC_TYPE", "CAT(convert_, ACCUMULATOR_VEC_TYPE)"), + MakeJitConstant("TO_OUTPUT_VEC_TYPE", "CAT(convert_, OUTPUT_VEC_TYPE)"), + }); + + return jit; +} + +RMSKernelBase::DispatchData RMSKernelBfyxOpt::SetDefault(const rms_params& params) const { + DispatchData dispatchData; + const auto& input = params.inputs[0]; + + auto local_mem_per_wi = 2 * BytesPerElement(params.inputs[0].GetDType()); + auto max_lws = std::min(params.engineInfo.maxWorkGroupSize, params.engineInfo.maxLocalMemSize / local_mem_per_wi); + dispatchData.maxSlmSize = max_lws; + + if (!params.has_dynamic_tensors()) { + dispatchData.dataSize = input.X().v * input.Y().v * input.Z().v; + dispatchData.dataCount = input.Batch().v * input.Feature().v; + dispatchData.slmSize = dispatchData.dataSize / 8; + dispatchData.leftovers = dispatchData.dataSize % 8; + + dispatchData.gws[0] = dispatchData.slmSize; + dispatchData.gws[1] = dispatchData.dataCount; + dispatchData.gws[2] = 1; + + dispatchData.lws[0] = dispatchData.slmSize; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; + } + return dispatchData; +} + +bool RMSKernelBfyxOpt::Validate(const Params& p, const optional_params& o) const { + if (!Parent::Validate(p, o)) + return false; + + const rms_params& params = static_cast(p); + const auto& gamma = params.inputs[1]; + + if (!gamma.is_dynamic()) { + size_t data_size = gamma.LogicalSize(); + if (data_size < 8) { + return false; + } + auto local_mem_per_wi = 2 * BytesPerElement(params.inputs[0].GetDType()); + auto max_lws = std::min(params.engineInfo.maxWorkGroupSize, params.engineInfo.maxLocalMemSize / local_mem_per_wi); + auto slm_size = data_size / 8; + if (slm_size > max_lws) { + return false; + } + } + + return true; +} + +KernelsData RMSKernelBfyxOpt::GetKernelsData(const Params& params, const optional_params& options) const { + return GetCommonKernelsData(params, options); +} + +KernelsPriority RMSKernelBfyxOpt::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const { + return FORCE_PRIORITY_7; +} +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.h new file mode 100644 index 00000000000000..a9b49c4c1cc654 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.h @@ -0,0 +1,25 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "rms_kernel_base.h" + +namespace kernel_selector { +class RMSKernelBfyxOpt : public RMSKernelBase { +public: + using Parent = RMSKernelBase; + RMSKernelBfyxOpt() : RMSKernelBase("rms_gpu_bfyx_opt") {} + virtual ~RMSKernelBfyxOpt() {} + + KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; + ParamsKey GetSupportedKey() const override; + +protected: + bool Validate(const Params&, const optional_params&) const override; + DispatchData SetDefault(const rms_params& params) const override; + JitConstants GetJitConstants(const rms_params& params, DispatchData dispatchData) const override; +}; +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.cpp new file mode 100644 index 00000000000000..9dbdf30154aea9 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.cpp @@ -0,0 +1,35 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "rms_kernel_ref.h" +#include "kernel_selector_utils.h" +#include + +namespace kernel_selector { +ParamsKey RMSKernelRef::GetSupportedKey() const { + ParamsKey k; + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::F32); + k.EnableInputLayout(DataLayout::bfyx); + k.EnableInputLayout(DataLayout::bfzyx); + k.EnableOutputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::bfzyx); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBatching(); + k.EnableDifferentTypes(); + k.EnableDynamicShapesSupport(); + return k; +} + +KernelsData RMSKernelRef::GetKernelsData(const Params& params, const optional_params& options) const { + return GetCommonKernelsData(params, options); +} + +KernelsPriority RMSKernelRef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const { + return FORCE_PRIORITY_9; +} +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.h new file mode 100644 index 00000000000000..7c2e3dd512e8f7 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.h @@ -0,0 +1,20 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "rms_kernel_base.h" + +namespace kernel_selector { +class RMSKernelRef : public RMSKernelBase { +public: + using Parent = RMSKernelBase; + RMSKernelRef() : RMSKernelBase("rms_gpu_ref") {} + virtual ~RMSKernelRef() {} + + KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; + ParamsKey GetSupportedKey() const override; +}; +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.cpp new file mode 100644 index 00000000000000..13cabf77011d48 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.cpp @@ -0,0 +1,18 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "rms_kernel_selector.h" +#include "rms_kernel_ref.h" +#include "rms_kernel_bfyx_opt.h" + +namespace kernel_selector { +rms_kernel_selector::rms_kernel_selector() { + Attach(); + Attach(); +} + +KernelsData rms_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const { + return GetNaiveBestKernel(params, options, KernelType::RMS); +} +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.h new file mode 100644 index 00000000000000..f951264c7f5c6b --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.h @@ -0,0 +1,23 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "kernel_selector.h" + +namespace kernel_selector { +class rms_kernel_selector : public kernel_selector_base { +public: + static rms_kernel_selector& Instance() { + static rms_kernel_selector instance_; + return instance_; + } + + rms_kernel_selector(); + + virtual ~rms_kernel_selector() {} + + KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; +}; +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp b/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp index fd36533e2a5d47..6e91cc7db9fe2f 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp @@ -17,7 +17,18 @@ namespace ov { namespace intel_gpu { static void CreateNonMaxSuppressionIEInternalOp(ProgramBuilder& p, const std::shared_ptr& op) { - validate_inputs_count(op, {2, 3, 4, 5, 6}); + cldnn::non_max_suppression::Rotation rotation = cldnn::non_max_suppression::Rotation::NONE; + const bool is_nms_rotated = op->m_rotation != ov::op::internal::NonMaxSuppressionIEInternal::Rotation_None; + if (is_nms_rotated) { + // For NMSRotated threshold inputs are mandatory, and soft_nms_sigma input is absent + validate_inputs_count(op, {5}); + + rotation = op->m_rotation == ov::op::internal::NonMaxSuppressionIEInternal::Rotation_Clockwise ? + cldnn::non_max_suppression::Rotation::CLOCKWISE + : cldnn::non_max_suppression::Rotation::COUNTERCLOCKWISE; + } else { + validate_inputs_count(op, {2, 3, 4, 5, 6}); + } auto inputs = p.GetInputInfo(op); std::vector reordered_inputs; reordered_inputs.resize(inputs.size()); @@ -75,6 +86,7 @@ static void CreateNonMaxSuppressionIEInternalOp(ProgramBuilder& p, const std::sh prim.output_paddings = get_output_paddings(); prim.output_data_types = get_output_data_types(); + prim.rotation = rotation; switch (reordered_inputs.size()) { case 6: prim.soft_nms_sigma = reordered_inputs[5].pid; @@ -142,6 +154,7 @@ static void CreateNonMaxSuppressionIEInternalOp(ProgramBuilder& p, const std::sh "", "", "", "", "", ""); prim.output_data_types = get_output_data_types(); + prim.rotation = rotation; switch (reordered_inputs.size()) { case 6: prim.soft_nms_sigma = reordered_inputs[5].pid; diff --git a/src/plugins/intel_gpu/src/plugin/ops/rms.cpp b/src/plugins/intel_gpu/src/plugin/ops/rms.cpp new file mode 100644 index 00000000000000..01289bd5022d6d --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/ops/rms.cpp @@ -0,0 +1,43 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/op/rms.hpp" +#include "intel_gpu/plugin/program_builder.hpp" +#include "intel_gpu/plugin/common_utils.hpp" +#include "intel_gpu/primitives/rms.hpp" + +namespace ov { +namespace op { +namespace internal { +using RMS = ov::intel_gpu::op::RMS; +} // namespace internal +} // namespace op +} // namespace ov + +namespace ov { +namespace intel_gpu { + +static void CreateRMSOp(ProgramBuilder& p, const std::shared_ptr& op) { + validate_inputs_count(op, {2}); + auto inputs = p.GetInputInfo(op); + std::string primitive_name = layer_type_name_ID(op); + + auto get_output_data_types = [&]() { + std::vector output_data_types; + auto type = op->get_output_element_type(0); + output_data_types.push_back(cldnn::element_type_to_data_type(type)); + return output_data_types; + }; + auto rms = cldnn::rms(primitive_name, + inputs[0], + inputs[1], + op->get_epsilon()); + rms.output_data_types = get_output_data_types(); + p.add_primitive(*op, rms); +} + +REGISTER_FACTORY_IMPL(internal, RMS); + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/op/rms.cpp b/src/plugins/intel_gpu/src/plugin/transformations/op/rms.cpp new file mode 100644 index 00000000000000..5dcd12071d1712 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/op/rms.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/op/rms.hpp" + +namespace ov { +namespace intel_gpu { +namespace op { + +RMS::RMS(const Output& data, + const Output& gamma, + double epsilson, + const ov::element::Type output_type) + : Op({data, gamma}), m_epsilon(epsilson), m_output_type(output_type) { + validate_and_infer_types(); +} + +bool RMS::visit_attributes(ov::AttributeVisitor& visitor) { + visitor.on_attribute("epsilon", m_epsilon); + visitor.on_attribute("output_type", m_output_type); + return true; +} + +void RMS::validate_and_infer_types() { + auto output_type = m_output_type == ov::element::undefined ? get_input_element_type(0) : m_output_type; + set_output_type(0, output_type, get_input_partial_shape(0)); +} + +std::shared_ptr RMS::clone_with_new_inputs(const ov::OutputVector& new_args) const { + check_new_args_count(this, new_args); + return std::make_shared(new_args.at(0), + new_args.at(1), + m_epsilon); +} + +} // namespace op +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.cpp new file mode 100644 index 00000000000000..bcd192454f3d3a --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.cpp @@ -0,0 +1,103 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "rms_fusion.hpp" + +#include "intel_gpu/op/rms.hpp" + +#include "openvino/core/rt_info.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/power.hpp" +#include "openvino/op/reduce_mean.hpp" +#include "openvino/op/sqrt.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/utils/utils.hpp" + +namespace ov { +namespace intel_gpu { + +static std::function)> constant_value(const float target_value) { + return [=](const ov::Output& output) -> bool { + auto node = std::dynamic_pointer_cast(output.get_node_shared_ptr()); + if (!node) { + return false; + } + float value; + if (!ov::op::util::get_single_value(node, value)) { + return false; + } + return value == target_value; + }; +} + +RMSFusion::RMSFusion() { + using namespace ov::pass::pattern; + + // Detect RMS decomposition pattern + // x * 1/Sqrt(ReduceMean(x^2,axes)+eps) * gamma + auto x = any_input(); + + // x^2 + auto const_power = wrap_type(constant_value(2)); + auto power = wrap_type({x, const_power}); + + // ReduceMean(x^2,axes) + auto mean_axes = wrap_type(constant_value(-1)); + auto mean = wrap_type({power, mean_axes}); + + // ReduceMean(x^2,axes)+eps + auto eps = wrap_type(); + auto add_eps = wrap_type({mean, eps}); + + // Sqrt(ReduceMean(x^2,axes)+eps) + auto sqrt = wrap_type({add_eps}); + + // 1/Sqrt(ReduceMean(x^2,axes)+eps) + auto const_div = wrap_type(constant_value(-1)); + auto div = wrap_type({sqrt, const_div}); + + // x * 1/Sqrt(ReduceMean(x^2,axes)+eps) + auto mul1 = wrap_type({x, div}); + + // x * 1/Sqrt(ReduceMean(x^2,axes)+eps) * gamma + auto gamma = wrap_type(type_matches(element::f32)); + auto mul2 = wrap_type({gamma, mul1}); + + // compress RMS result + auto comp = wrap_type({mul2}); + + ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + auto x_output = pattern_map.at(x); + + auto const_eps_node = + std::dynamic_pointer_cast(pattern_map.at(eps).get_node_shared_ptr()); + float eps_value; + if (!ov::op::util::get_single_value(const_eps_node, eps_value)) { + return false; + } + + const auto& gamma_node = pattern_map.at(gamma).get_node_shared_ptr(); + auto output_type = m.get_match_root()->get_output_element_type(0); + + auto rms = std::make_shared(x_output, + gamma_node, + eps_value, + output_type); + rms->set_friendly_name(m.get_match_root()->get_friendly_name()); + ov::copy_runtime_info(m.get_matched_nodes(), rms); + ov::replace_node(m.get_match_root(), rms); + + return true; + }; + + auto m = std::make_shared(comp, "RMSFusion"); + this->register_matcher(m, callback); +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.hpp b/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.hpp new file mode 100644 index 00000000000000..66f236f3f26c38 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.hpp @@ -0,0 +1,19 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" + +namespace ov { +namespace intel_gpu { + +class RMSFusion : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("RMSFusion", "0"); + RMSFusion(); +}; + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 10275dae95d729..ac567cd998f9a2 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -88,6 +88,7 @@ #include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp" #include "transformations/op_conversions/convert_previous_nms_to_nms_9.hpp" #include "transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp" +#include "transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp" #include "transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp" #include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp" #include "transformations/op_conversions/convert_gather_downgrade.hpp" @@ -113,6 +114,7 @@ #include "plugin/transformations/convert_matmul_to_fc.hpp" #include "plugin/transformations/move_fc_reshape_to_weights.hpp" #include "plugin/transformations/convert_fc_to_compressed.hpp" +#include "plugin/transformations/rms_fusion.hpp" #include "transformations/low_precision/mark_dequantization_subgraph.hpp" #include "low_precision/pull_reshape_through_dequantization.hpp" @@ -271,6 +273,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); @@ -642,6 +645,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); manager.run_passes(func); } diff --git a/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_batch_shape_tests.cpp b/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_batch_shape_tests.cpp index 964e6dcad3dcc5..9f4c18ef5d2ce4 100644 --- a/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_batch_shape_tests.cpp +++ b/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_batch_shape_tests.cpp @@ -7,6 +7,7 @@ #include "common_test_utils/common_utils.hpp" #include "common_test_utils/file_utils.hpp" #include "functional_test_utils/skip_tests_config.hpp" +#include "functional_test_utils/ov_plugin_cache.hpp" #include "ov_models/subgraph_builders.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" @@ -59,8 +60,11 @@ class OVDynamicBatchShape_Tests : public WithParamInterfaceGetParam(); init_input_shapes(inputShape); @@ -73,6 +77,7 @@ class OVDynamicBatchShape_Tests : public WithParamInterfacereshape(dynShape); } + std::shared_ptr src_func; // std::map configuration; std::vector inputShape; @@ -81,7 +86,6 @@ class OVDynamicBatchShape_Tests : public WithParamInterface(); run(); } @@ -97,12 +101,12 @@ TEST_P(OVDynamicBatchShape_Tests, InferDynamicBatchBound_cached) { ov::test::utils::removeFilesWithExt(cacheFolderName, "cl_cache"); ov::test::utils::removeDir(cacheFolderName); - core = std::make_shared(); core->set_property(ov::cache_dir(cacheFolderName)); run(); } { - core = std::make_shared(); + core.reset(); + core = ov::test::utils::PluginCache::get().core(); core->set_property(ov::cache_dir(cacheFolderName)); run(); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convert_color_nv12.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convert_color_nv12.cpp index 63a57786fb3b79..5f74409c75d2f9 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convert_color_nv12.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convert_color_nv12.cpp @@ -4,12 +4,11 @@ #include -#include "single_layer_tests/convert_color_nv12.hpp" +#include "single_op_tests/convert_color_nv12.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::ConvertColorNV12LayerTest; const std::vector inShapes_nhwc = { {1, 10, 10, 1} @@ -20,27 +19,60 @@ const std::vector inTypes = { ov::element::f32 }; -INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorNV12, +auto generate_input_static_shapes = [] (const std::vector& original_shapes, bool single_plane) { + std::vector> result_shapes; + for (const auto& original_shape : original_shapes) { + std::vector one_result_shapes; + if (single_plane) { + auto shape = original_shape; + shape[1] = shape[1] * 3 / 2; + one_result_shapes.push_back(shape); + } else { + auto shape = original_shape; + one_result_shapes.push_back(shape); + auto uvShape = ov::Shape{shape[0], shape[1] / 2, shape[2] / 2, 2}; + one_result_shapes.push_back(uvShape); + } + result_shapes.push_back(one_result_shapes); + } + return result_shapes; +}; + +auto in_shapes_single_plane_static = generate_input_static_shapes(inShapes_nhwc, true); +auto in_shapes_two_planes_static = generate_input_static_shapes(inShapes_nhwc, false); + +INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorNV12SinglePlane, ConvertColorNV12LayerTest, - ::testing::Combine(::testing::ValuesIn(inShapes_nhwc), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(in_shapes_single_plane_static)), ::testing::ValuesIn(inTypes), ::testing::Bool(), + ::testing::Values(true), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + ConvertColorNV12LayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorNV12TwoPlane, + ConvertColorNV12LayerTest, + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(in_shapes_two_planes_static)), + ::testing::ValuesIn(inTypes), ::testing::Bool(), + ::testing::Values(false), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvertColorNV12LayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorNV12_acc, - ConvertColorNV12AccuracyTest, - ::testing::Combine(::testing::Values(ov::Shape{1, 16 * 6, 16, 1}), +INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorNV12SinglePlane_acc, + ConvertColorNV12LayerTest, + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + generate_input_static_shapes({{1, 16 * 6, 16, 1}}, true))), ::testing::Values(ov::element::u8), - ::testing::Bool(), - ::testing::Bool(), + ::testing::Values(false), + ::testing::Values(true), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvertColorNV12LayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(nightly_TestsConvertColorNV12_acc, - ConvertColorNV12AccuracyTest, - ::testing::Combine(::testing::Values(ov::Shape{1, 256 * 256, 256, 1}), +INSTANTIATE_TEST_SUITE_P(nightly_TestsConvertColorNV12SinglePlane_acc, + ConvertColorNV12LayerTest, + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + generate_input_static_shapes({{1, 256 * 256, 256, 1}}, true))), ::testing::Values(ov::element::u8), ::testing::Values(false), ::testing::Values(true), diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp index 13293d3f6dc42a..2f00b4e38e7090 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp @@ -4,15 +4,14 @@ #include -#include "single_layer_tests/convolution.hpp" +#include "single_op_tests/convolution.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +using ov::test::ConvolutionLayerTest; +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16 }; /* ============= 2D Convolution ============= */ @@ -27,9 +26,9 @@ const std::vector> padEnds = {{0, 0}, const std::vector> dilations = {{1, 1}, {3, 1}}; const std::vector numOutChannels = {1, 5}; -const std::vector padTypes = { - ngraph::op::PadType::EXPLICIT, - ngraph::op::PadType::VALID +const std::vector padTypes = { + ov::op::PadType::EXPLICIT, + ov::op::PadType::VALID }; const auto conv2DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(kernels), @@ -38,7 +37,7 @@ const auto conv2DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(padEnds), ::testing::ValuesIn(dilations), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ov::op::PadType::EXPLICIT) ); const auto conv2DParams_AutoPadValid = ::testing::Combine( ::testing::ValuesIn(kernels), @@ -54,11 +53,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_ExplicitPadding, ConvolutionLayerTe ::testing::Combine( conv2DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::Values(ov::test::static_shapes_to_test_representation({{1, 3, 30, 30}})), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionLayerTest::getTestCaseName); @@ -66,11 +61,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_AutoPadValid, ConvolutionLayerTest, ::testing::Combine( conv2DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::Values(ov::test::static_shapes_to_test_representation({{1, 3, 30, 30}})), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionLayerTest::getTestCaseName); /* ============= 3D Convolution ============= */ @@ -94,18 +85,14 @@ const auto conv3DParams = ::testing::Combine( ::testing::ValuesIn(paddings3d), ::testing::ValuesIn(dilations3d), ::testing::ValuesIn(numOutChannels3d), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ov::op::PadType::EXPLICIT) ); INSTANTIATE_TEST_SUITE_P(smoke_Convolution3D_Basic1, ConvolutionLayerTest, ::testing::Combine( conv3DParams, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 10, 10, 10})), + ::testing::Values(ov::test::static_shapes_to_test_representation({{1, 3, 10, 10, 10}})), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp index f80df8897d6e84..1488f7cbf6358b 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp @@ -4,32 +4,32 @@ #include -#include "single_layer_tests/convolution_backprop_data.hpp" +#include "single_op_tests/convolution_backprop_data.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::ConvolutionBackpropDataLayerTest; +using ov::test::convBackpropDataLayerTestParamsSet; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16 }; const std::vector numOutChannels = {1, 5, 16}; -const std::vector> emptyOutputShape = {{}}; +const std::vector emptyOutputShape = {{}}; const std::vector> emptyOutputPadding = {{}}; /* ============= 2D ConvolutionBackpropData ============= */ -const std::vector netPrecisions2D = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +const std::vector netPrecisions2D = { + ov::element::f32, + ov::element::f16 }; -const std::vector> inputShapes2D = {{1, 3, 30, 30}, - {1, 16, 10, 10}, - {1, 32, 10, 10}}; -const std::vector> kernels2D = {{1, 1}, {3, 3}, {3, 5}}; +const std::vector> inputShapes2D = {{{1, 3, 30, 30}}, + {{1, 16, 10, 10}}, + {{1, 32, 10, 10}}}; +const std::vector> kernels2D = {/*{1, 1},*/ {3, 3}, {3, 5}}; const std::vector> strides2D = {{1, 3}}; const std::vector> padBegins2D = {{0, 0}}; const std::vector> padEnds2D = {{0, 0}, {1, 1}}; @@ -42,7 +42,7 @@ const auto conv2DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(padEnds2D), ::testing::ValuesIn(dilations2D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::Values(ov::op::PadType::EXPLICIT), ::testing::ValuesIn(emptyOutputPadding) ); const auto conv2DParams_AutoPadValid = ::testing::Combine( @@ -52,7 +52,7 @@ const auto conv2DParams_AutoPadValid = ::testing::Combine( ::testing::Values(std::vector({0, 0})), ::testing::ValuesIn(dilations2D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::VALID), + ::testing::Values(ov::op::PadType::VALID), ::testing::ValuesIn(emptyOutputPadding) ); @@ -60,11 +60,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding, Convol ::testing::Combine( conv2DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions2D), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); @@ -73,11 +69,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadValid, Convoluti ::testing::Combine( conv2DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions2D), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); @@ -92,7 +84,7 @@ const auto conv2DParams_ExplicitPadding_output_padding = ::testing::Combine( ::testing::ValuesIn(padEnds2D), ::testing::ValuesIn(dilations2D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::Values(ov::op::PadType::EXPLICIT), ::testing::ValuesIn(outputPadding2D) ); const auto conv2DParams_AutoPadValid_output_padding = ::testing::Combine( @@ -102,7 +94,7 @@ const auto conv2DParams_AutoPadValid_output_padding = ::testing::Combine( ::testing::Values(std::vector({0, 0})), ::testing::ValuesIn(dilations2D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::VALID), + ::testing::Values(ov::op::PadType::VALID), ::testing::ValuesIn(outputPadding2D) ); @@ -110,11 +102,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding_OutputP ::testing::Combine( conv2DParams_AutoPadValid_output_padding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); @@ -123,27 +111,24 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadding_OutputPaddi ::testing::Combine( conv2DParams_ExplicitPadding_output_padding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); /* ============= 3D ConvolutionBackpropData ============= */ -const std::vector netPrecisions3D = { - InferenceEngine::Precision::FP32, + +const std::vector netPrecisions3D = { + ov::element::f32, }; -const std::vector> inputShapes3D = {{1, 3, 10, 10, 10}, - {1, 16, 5, 5, 5}, - {1, 32, 5, 5, 5}}; -const std::vector> kernels3D = {{1, 1, 1}, {3, 3, 3}}; -const std::vector> strides3D = {{1, 1, 1}}; +const std::vector> inputShapes3D = {{{1, 3, 10, 10, 10}}, + {{1, 16, 5, 5, 5}}, + {{1, 32, 5, 5, 5}}}; +const std::vector> kernels3D = {/*{1, 1, 1}, */{3, 3, 3}}; +const std::vector> strides3D = {{1, 1, 1}}; const std::vector> padBegins3D = {{0, 0, 0}}; const std::vector> padEnds3D = {{0, 0, 0}, {1, 1, 1}}; -const std::vector> dilations3D = {{1, 1, 1}}; +const std::vector> dilations3D = {{1, 1, 1}}; const auto conv3DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(kernels3D), @@ -170,11 +155,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding, Convol ::testing::Combine( conv3DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions3D), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); @@ -183,11 +164,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadValid, Convoluti ::testing::Combine( conv3DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions3D), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); @@ -202,7 +179,7 @@ const auto conv3DParams_ExplicitPadding_output_padding = ::testing::Combine( ::testing::ValuesIn(padEnds3D), ::testing::ValuesIn(dilations3D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::Values(ov::op::PadType::EXPLICIT), ::testing::ValuesIn(outputPadding3D) ); const auto conv3DParams_AutoPadValid_output_padding = ::testing::Combine( @@ -212,7 +189,7 @@ const auto conv3DParams_AutoPadValid_output_padding = ::testing::Combine( ::testing::Values(std::vector({0, 0, 0})), ::testing::ValuesIn(dilations3D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::VALID), + ::testing::Values(ov::op::PadType::VALID), ::testing::ValuesIn(outputPadding3D) ); @@ -220,11 +197,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding_OutputP ::testing::Combine( conv3DParams_AutoPadValid_output_padding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); @@ -233,11 +206,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadding_OutputPaddi ::testing::Combine( conv3DParams_ExplicitPadding_output_padding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/grn.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/grn.cpp index 9d3c5b80145d0f..2990b49ee6e44e 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/grn.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/grn.cpp @@ -3,30 +3,28 @@ // #include -#include "single_layer_tests/grn.hpp" +#include "single_op_tests/grn.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; -using namespace ngraph::helpers; - namespace { - // Common params - const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 - }; +using ov::test::GrnLayerTest; +// Common params +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16 +}; + +std::vector> input_shapes_static = { + {{1, 3, 30, 30}}, + {{2, 16, 15, 20}} +}; - INSTANTIATE_TEST_SUITE_P(smoke_Grn_Basic, - GrnLayerTest, - ::testing::Combine(::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30}), - std::vector({2, 16, 15, 20})), - ::testing::Values(0.33f, 1.1f), - ::testing::Values(ov::test::utils::DEVICE_GPU)), - GrnLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Grn_Basic, + GrnLayerTest, + ::testing::Combine(::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_static)), + ::testing::ValuesIn({0.33f, 1.1f}), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + GrnLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution.cpp index 446b0bc46336d0..e643f5716e5696 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution.cpp @@ -4,16 +4,16 @@ #include -#include "single_layer_tests/group_convolution.hpp" +#include "single_op_tests/group_convolution.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::GroupConvolutionLayerTest; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32 +const std::vector netPrecisions = { + ov::element::f32 }; + /* ============= 1D GroupConvolution ============= */ // 1D group convolution is not working correctly const std::vector> kernels1D = {{3}}; @@ -33,18 +33,15 @@ const auto groupConv1DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(dilations1D), ::testing::ValuesIn(numOutChannels1D), ::testing::ValuesIn(numGroups1D), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ov::op::PadType::EXPLICIT) ); INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution1D_ExplicitPadding_Disabled, GroupConvolutionLayerTest, ::testing::Combine( groupConv1DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 16, 30})), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>({{{1, 16, 30}}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvolutionLayerTest::getTestCaseName); @@ -56,18 +53,15 @@ const auto dwConv1DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(dilations1D), ::testing::ValuesIn(numOutChannels1D), ::testing::ValuesIn(numDWGroups1D), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ov::op::PadType::EXPLICIT) ); INSTANTIATE_TEST_SUITE_P(smoke_DwGroupConvolution1D_ExplicitPadding, GroupConvolutionLayerTest, ::testing::Combine( dwConv1DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 16, 30})), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>({{{1, 16, 30}}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvolutionLayerTest::getTestCaseName); @@ -105,11 +99,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution2D_ExplicitPadding, GroupConvolut ::testing::Combine( groupConv2DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 16, 30, 30})), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>({{{1, 16, 30, 30}}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvolutionLayerTest::getTestCaseName); @@ -117,11 +108,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution2D_AutoPadValid, GroupConvolution ::testing::Combine( groupConv2DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 16, 30, 30})), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>({{{1, 16, 30, 30}}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvolutionLayerTest::getTestCaseName); @@ -157,11 +145,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution3D_ExplicitPadding, GroupConvolut ::testing::Combine( groupConv3DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 4, 10, 10, 10})), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>({{{1, 4, 10, 10, 10}}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvolutionLayerTest::getTestCaseName); @@ -169,11 +154,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution3D_AutoPadValid, GroupConvolution ::testing::Combine( groupConv3DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 4, 10, 10, 10})), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>({{{1, 4, 10, 10, 10}}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvolutionLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp index d7baff2c2e0d7f..40d364385fe7d9 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp @@ -4,26 +4,28 @@ #include -#include "single_layer_tests/group_convolution_backprop_data.hpp" +#include "single_op_tests/group_convolution_backprop_data.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::GroupConvBackpropLayerTest; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32 +const std::vector netPrecisions = { + ov::element::f32 }; -const std::vector> emptyOutputShape = {{}}; +const std::vector emptyOutputShape = {{}}; const std::vector> emptyOutputPadding = {{}}; const std::vector numOutChannels = {16, 32}; const std::vector numGroups = {2, 8, 16}; /* ============= 2D GroupConvolution ============= */ -const std::vector> inputShapes2D = {{1, 16, 10, 10}, - {1, 32, 10, 10}}; +const std::vector> inputShapes2D = { + {{1, 16, 10, 10}}, + {{1, 32, 10, 10}} +}; + const std::vector> kernels2D = {{1, 1}, {3, 3}}; const std::vector> strides2D = {{1, 1}}; const std::vector> padBegins2D = {{0, 0}}; @@ -38,7 +40,7 @@ const auto groupConvBackpropData2DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(dilations2D), ::testing::ValuesIn(numOutChannels), ::testing::ValuesIn(numGroups), - ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::Values(ov::op::PadType::EXPLICIT), ::testing::ValuesIn(emptyOutputPadding) ); const auto groupConvBackpropData2DParams_AutoPadValid = ::testing::Combine( @@ -49,7 +51,7 @@ const auto groupConvBackpropData2DParams_AutoPadValid = ::testing::Combine( ::testing::ValuesIn(dilations2D), ::testing::ValuesIn(numOutChannels), ::testing::ValuesIn(numGroups), - ::testing::Values(ngraph::op::PadType::VALID), + ::testing::Values(ov::op::PadType::VALID), ::testing::ValuesIn(emptyOutputPadding) ); @@ -57,11 +59,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvBackpropData2D_ExplicitPadding, GroupCon ::testing::Combine( groupConvBackpropData2DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvBackpropLayerTest::getTestCaseName); @@ -70,18 +68,17 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvBackpropData2D_AutoPadValid, GroupConvBa ::testing::Combine( groupConvBackpropData2DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvBackpropLayerTest::getTestCaseName); /* ============= 3D GroupConvolution ============= */ -const std::vector> inputShapes3D = {{1, 16, 5, 5, 5}, - {1, 32, 5, 5, 5}}; +const std::vector> inputShapes3D = { + {{1, 16, 5, 5, 5}}, + {{1, 32, 5, 5, 5}} +}; + const std::vector> kernels3D = {{1, 1, 1}, {3, 3, 3}}; const std::vector> strides3D = {{1, 1, 1}}; const std::vector> padBegins3D = {{0, 0, 0}}; @@ -96,7 +93,7 @@ const auto groupConvBackpropData3DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(dilations3D), ::testing::ValuesIn(numOutChannels), ::testing::ValuesIn(numGroups), - ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::Values(ov::op::PadType::EXPLICIT), ::testing::ValuesIn(emptyOutputPadding) ); const auto groupConvBackpropData3DParams_AutoPadValid = ::testing::Combine( @@ -107,7 +104,7 @@ const auto groupConvBackpropData3DParams_AutoPadValid = ::testing::Combine( ::testing::ValuesIn(dilations3D), ::testing::ValuesIn(numOutChannels), ::testing::ValuesIn(numGroups), - ::testing::Values(ngraph::op::PadType::VALID), + ::testing::Values(ov::op::PadType::VALID), ::testing::ValuesIn(emptyOutputPadding) ); @@ -115,11 +112,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvBackpropData3D_ExplicitPadding, GroupCon ::testing::Combine( groupConvBackpropData3DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvBackpropLayerTest::getTestCaseName); @@ -128,11 +121,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvBackpropData3D_AutoPadValid, GroupConvBa ::testing::Combine( groupConvBackpropData3DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvBackpropLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gru_cell.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gru_cell.cpp index df58f1314d6346..70fe6f0d8fd995 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gru_cell.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gru_cell.cpp @@ -4,28 +4,34 @@ #include -#include "single_layer_tests/gru_cell.hpp" +#include "single_op_tests/gru_cell.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { - std::vector should_decompose{false, true}; - std::vector batch{5}; - std::vector hidden_size{1, 10}; - std::vector input_size{1, 30}; - std::vector> activations = {{"relu", "tanh"}, {"tanh", "sigmoid"}, {"sigmoid", "tanh"}, - {"tanh", "relu"}}; - std::vector clip = {0.0f, 0.7f}; - std::vector linear_before_reset = {true, false}; - std::vector layer_types = { - ngraph::helpers::InputLayerType::CONSTANT, - ngraph::helpers::InputLayerType::PARAMETER - }; - std::vector netPrecisions = {InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16}; - - INSTANTIATE_TEST_SUITE_P(GRUCellCommon, GRUCellTest, +using ov::test::GRUCellTest; + +std::vector should_decompose{false, true}; +std::vector batch{5}; +std::vector hidden_size{1, 10}; +std::vector input_size{1, 30}; +std::vector> activations = { + {"relu", "tanh"}, + {"tanh", "sigmoid"}, + {"sigmoid", "tanh"}, + {"tanh", "relu"} +}; + +std::vector clip = {0.0f, 0.7f}; +std::vector linear_before_reset = {true, false}; +std::vector layer_types = { + ov::test::utils::InputLayerType::CONSTANT, + ov::test::utils::InputLayerType::PARAMETER +}; + +std::vector netPrecisions = {ov::element::f32, + ov::element::f16}; + +INSTANTIATE_TEST_SUITE_P(GRUCellCommon, GRUCellTest, ::testing::Combine( ::testing::ValuesIn(should_decompose), ::testing::ValuesIn(batch), diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/is_inf.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/is_inf.cpp index 6614ab44b7e294..5f4e9c3f1acd84 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/is_inf.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/is_inf.cpp @@ -2,17 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "shared_test_classes/single_layer/is_inf.hpp" - -#include -#include - -using namespace ov::test; -using namespace ov::test::subgraph; +#include "single_op_tests/is_inf.hpp" namespace { +using ov::test::IsInfLayerTest; -const std::vector> inShapesStatic = { +const std::vector> inShapesStatic = { {{{}, {{2}}}}, {{{}, {{10, 200}}}}, {{{}, {{4, 4, 16}}}}, @@ -21,7 +16,7 @@ const std::vector> inShapesStatic = { {{{}, {{16, 16, 16, 16, 16, 16}}}}, }; -constexpr std::array netPrecisions = {ov::element::f32, ov::element::f16}; +constexpr std::array netPrecisions = {ov::element::f32, ov::element::f16}; constexpr std::array detectNegative = {true, false}; diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/log_softmax.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/log_softmax.cpp index 1f27e97f8f97c3..57359e6d928776 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/log_softmax.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/log_softmax.cpp @@ -4,21 +4,20 @@ #include -#include "single_layer_tests/log_softmax.hpp" +#include "single_op_tests/log_softmax.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::LogSoftmaxLayerTest; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, +const std::vector netPrecisions = { + ov::element::f32, }; -const std::vector inputShapes2D = { - InferenceEngine::SizeVector {1, 100}, - InferenceEngine::SizeVector {100, 1}, - InferenceEngine::SizeVector {10, 10}, +const std::vector> inputShapes2D = { + {{1, 100}}, + {{100, 1}}, + {{10, 10}}, }; const std::vector axis2D = { @@ -28,20 +27,15 @@ const std::vector axis2D = { INSTANTIATE_TEST_SUITE_P(smoke_LogSoftmax2D, LogSoftmaxLayerTest, testing::Combine(testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), - testing::ValuesIn(inputShapes2D), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)), testing::ValuesIn(axis2D), - testing::Values(ov::test::utils::DEVICE_GPU), - testing::Values(std::map())), + testing::Values(ov::test::utils::DEVICE_GPU)), LogSoftmaxLayerTest::getTestCaseName); -const std::vector inputShapes4D = { - InferenceEngine::SizeVector {1, 100, 1, 1}, - InferenceEngine::SizeVector {1, 3, 4, 3}, - InferenceEngine::SizeVector {2, 3, 4, 5}, +const std::vector> inputShapes4D = { + {{1, 100, 1, 1}}, + {{1, 3, 4, 3}}, + {{2, 3, 4, 5}}, }; const std::vector axis4D = { @@ -51,14 +45,9 @@ const std::vector axis4D = { INSTANTIATE_TEST_SUITE_P(smoke_LogSoftmax4D, LogSoftmaxLayerTest, testing::Combine(testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), - testing::ValuesIn(inputShapes4D), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes4D)), testing::ValuesIn(axis4D), - testing::Values(ov::test::utils::DEVICE_GPU), - testing::Values(std::map())), + testing::Values(ov::test::utils::DEVICE_GPU)), LogSoftmaxLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/logical.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/logical.cpp index f0b1d390149936..604b557aeab6f3 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/logical.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/logical.cpp @@ -3,15 +3,27 @@ // #include -#include "single_layer_tests/logical.hpp" +#include "single_op_tests/logical.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; -using namespace LayerTestsDefinitions::LogicalParams; - namespace { +using ov::test::LogicalLayerTest; + +std::vector> combine_shapes(const std::map>& input_shapes_static) { + std::vector> result; + for (const auto& input_shape : input_shapes_static) { + for (auto& item : input_shape.second) { + result.push_back({input_shape.first, item}); + } + + if (input_shape.second.empty()) { + result.push_back({input_shape.first, {}}); + } + } + return result; +} -std::map, std::vector>> inputShapes = { +std::map> inputShapes = { {{1}, {{1}, {17}, {1, 1}, {2, 18}, {1, 1, 2}, {2, 2, 3}, {1, 1, 2, 3}}}, {{5}, {{1}, {1, 1}, {2, 5}, {1, 1, 1}, {2, 2, 5}}}, {{2, 200}, {{1}, {200}, {1, 200}, {2, 200}, {2, 2, 200}}}, @@ -20,7 +32,7 @@ std::map, std::vector>> inputShapes = { {{2, 1, 1, 3, 1}, {{1}, {1, 3, 4}, {2, 1, 3, 4}, {1, 1, 1, 1, 1}}}, }; -std::map, std::vector>> inputShapesNot = { +std::map> inputShapesNot = { {{1}, {}}, {{5}, {}}, {{2, 200}, {}}, @@ -29,51 +41,39 @@ std::map, std::vector>> inputShapesNot {{2, 1, 1, 3, 1}, {}}, }; -std::vector inputsPrecisions = { - InferenceEngine::Precision::BOOL, -}; - -std::vector logicalOpTypes = { - ngraph::helpers::LogicalTypes::LOGICAL_AND, - ngraph::helpers::LogicalTypes::LOGICAL_OR, - ngraph::helpers::LogicalTypes::LOGICAL_XOR, +std::vector logicalOpTypes = { + ov::test::utils::LogicalTypes::LOGICAL_AND, + ov::test::utils::LogicalTypes::LOGICAL_OR, + ov::test::utils::LogicalTypes::LOGICAL_XOR, }; -std::vector secondInputTypes = { - ngraph::helpers::InputLayerType::CONSTANT, - ngraph::helpers::InputLayerType::PARAMETER, +std::vector secondInputTypes = { + ov::test::utils::InputLayerType::CONSTANT, + ov::test::utils::InputLayerType::PARAMETER, }; -std::vector netPrecisions = { - InferenceEngine::Precision::FP32, +std::vector netPrecisions = { + ov::element::boolean, }; std::map additional_config = {}; INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs, LogicalLayerTest, - ::testing::Combine(::testing::ValuesIn(LogicalLayerTest::combineShapes(inputShapes)), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(combine_shapes(inputShapes))), ::testing::ValuesIn(logicalOpTypes), ::testing::ValuesIn(secondInputTypes), ::testing::ValuesIn(netPrecisions), - ::testing::ValuesIn(inputsPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(ov::test::utils::DEVICE_GPU), ::testing::Values(additional_config)), LogicalLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefsNot, LogicalLayerTest, - ::testing::Combine(::testing::ValuesIn(LogicalLayerTest::combineShapes(inputShapesNot)), - ::testing::Values(ngraph::helpers::LogicalTypes::LOGICAL_NOT), - ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(combine_shapes(inputShapesNot))), + ::testing::Values(ov::test::utils::LogicalTypes::LOGICAL_NOT), + ::testing::Values(ov::test::utils::InputLayerType::CONSTANT), ::testing::ValuesIn(netPrecisions), - ::testing::ValuesIn(inputsPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(ov::test::utils::DEVICE_GPU), ::testing::Values(additional_config)), LogicalLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lrn.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lrn.cpp index 916690bc246a1e..776095cb34a4cd 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lrn.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lrn.cpp @@ -2,17 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "single_layer_tests/lrn.hpp" +#include "single_op_tests/lrn.hpp" #include #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { -const std::vector netPrecisions = {InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16}; +using ov::test::LrnLayerTest; + +const std::vector netPrecisions = {ov::element::f32, + ov::element::f16}; const std::vector> axes = {{1}, {2, 3}}; @@ -28,9 +28,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_LrnCheck, LrnLayerTest, ::testing::Values(size), ::testing::ValuesIn(axes), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(std::vector({10, 10, 3, 2})), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(std::vector>( + {{{10, 10, 3, 2}}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), LrnLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lstm_cell.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lstm_cell.cpp index 99bd6279d2f141..37db834db0e465 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lstm_cell.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lstm_cell.cpp @@ -4,12 +4,12 @@ #include -#include "single_layer_tests/lstm_cell.hpp" +#include "single_op_tests/lstm_cell.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::LSTMCellTest; + std::vector should_decompose{false, true}; std::vector batch{5}; std::vector hidden_size{1, 10}; @@ -20,12 +20,12 @@ std::vector> activations = {{"relu", "sigmoid", "tanh"} {"tanh", "relu", "sigmoid"}, {"sigmoid", "sigmoid", "sigmoid"}, {"tanh", "tanh", "tanh"}, {"relu", "relu", "relu"}}; std::vector clip{0.f, 0.7f}; -std::vector layer_types = { - ngraph::helpers::InputLayerType::CONSTANT, - ngraph::helpers::InputLayerType::PARAMETER +std::vector layer_types = { + ov::test::utils::InputLayerType::CONSTANT, + ov::test::utils::InputLayerType::PARAMETER }; -std::vector netPrecisions = {InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16}; +std::vector netPrecisions = {ov::element::f32, + ov::element::f16}; INSTANTIATE_TEST_SUITE_P(LSTMCellCommon, LSTMCellTest, ::testing::Combine( diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/mat_mul.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/mat_mul.cpp index 026a97154cee4a..95f9f58da6fcd8 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/mat_mul.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/mat_mul.cpp @@ -4,65 +4,102 @@ #include -#include "single_layer_tests/mat_mul.hpp" - -using namespace LayerTestsDefinitions; +#include "single_op_tests/mat_mul.hpp" namespace { +using ov::test::MatMulLayerTest; +using ov::test::utils::InputLayerType; + +const std::vector inputPrecisions = { + ov::element::f32, + ov::element::f16, +}; + +std::vector> no_transpose_shapeRelatedParams = { + { {2, 1, 1, 5, 6}, {1, 1, 6, 4} }, + { {2, 1, 2, 3, 5, 6}, {1, 1, 6, 4} }, + { {1, 4, 5, 6}, {1, 4, 6, 4} }, + { {4, 5, 6}, {6, 3} }, + { {9, 9, 9}, {9, 9} }, + { {1, 2, 3}, {1, 1, 3, 2} }, + { {1, 3, 2, 4}, {2, 1, 4, 2} }, + { {2, 1, 2, 4}, {1, 3, 4, 2} }, + { {3, 2, 4}, {2, 1, 4, 2} }, + { {2, 1, 4, 2}, {3, 2, 4} }, + { {3}, {2, 2, 3, 1} }, + { {2, 2, 1, 3}, {3} }, + { {1, 5}, {5, 1} }, + { {1, 5}, {5} }, + { {5}, {5, 1} }, + { {5}, {5} }, +}; + +std::vector> first_transpose_shapeRelatedParams = { + { {2, 1, 2, 3}, {3, 2, 4} }, + { {100, 65}, {100, 73} }, + { {5, 1}, {5, 1} }, +}; -const std::vector inputPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +std::vector> second_transpose_shapeRelatedParams = { + { {1, 16, 128}, {1, 64, 128} }, + { {2, 1, 3, 2}, {3, 4, 2} }, + { {1, 64, 80}, {1, 77, 80} }, + { {65, 100}, {73, 100} }, + { {1, 5}, {1, 5} }, }; -const std::vector shapeRelatedParams = { - { { {2, 1, 1, 5, 6}, false }, { {1, 1, 6, 4}, false } }, - { { {2, 2, 4, 16}, true }, { {1, 1, 1, 4}, true } }, - { { {2, 1, 2, 3, 5, 6}, false }, { {1, 1, 6, 4}, false } }, - { { {1, 4, 5, 6}, false }, { {1, 4, 6, 4}, false } }, - { { {1, 16, 128}, false }, { {1, 64, 128}, true } }, - { { {4, 5, 6}, false }, { {6, 3}, false } }, - { { {9, 9, 9}, false }, { {9, 9}, false } }, - { { {1, 2, 3}, false }, { {1, 1, 3, 2}, false } }, - { { {1, 3, 2, 4}, false }, { {2, 1, 4, 2}, false } }, - { { {2, 1, 2, 4}, false }, { {1, 3, 4, 2}, false } }, - { { {3, 2, 4}, false }, { {2, 1, 4, 2}, false } }, - { { {2, 1, 4, 2}, false }, { {3, 2, 4}, false } }, - { { {2, 1, 2, 3}, true }, { {3, 2, 4}, false } }, - { { {2, 1, 3, 2}, false }, { {3, 4, 2}, true } }, - { { {2, 1, 2, 3}, true }, { {3, 4, 2}, true } }, - { { {1, 64, 80}, false }, { {1, 77, 80}, true } }, - { { {3}, false }, { {2, 2, 3, 1}, false } }, - { { {2, 2, 1, 3}, false }, { {3}, false } }, - { { {65, 100}, false }, { {73, 100}, true } }, - { { {100, 65}, true }, { {100, 73}, false } }, - { { {100, 65}, true }, { {73, 100}, true } }, - { { {1, 5}, false }, { {5, 1}, false } }, - { { {5, 1}, true }, { {5, 1}, false } }, - { { {1, 5}, false }, { {1, 5}, true } }, - { { {1, 5}, false }, { {5}, false } }, - { { {5}, false }, { {5, 1}, false } }, - { { {5}, false }, { {5}, false } }, - { { {5}, true }, { {5}, true } } +std::vector> both_transpose_shapeRelatedParams = { + { {2, 2, 4, 16}, {1, 1, 1, 4} }, + { {2, 1, 2, 3}, {3, 4, 2} }, + { {100, 65}, {73, 100} }, + { {5}, {5} }, }; -std::vector secondaryInputTypes = { - ngraph::helpers::InputLayerType::CONSTANT, - ngraph::helpers::InputLayerType::PARAMETER, +std::vector secondaryInputTypes = { + InputLayerType::CONSTANT, + InputLayerType::PARAMETER, }; std::map additional_config = {}; -INSTANTIATE_TEST_SUITE_P(smoke_MatMul, MatMulTest, +INSTANTIATE_TEST_SUITE_P(smoke_MatMul_NoTranspose, MatMulLayerTest, + ::testing::Combine( + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(no_transpose_shapeRelatedParams)), + ::testing::Values(std::make_pair(false, false)), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(secondaryInputTypes), + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::Values(additional_config)), + MatMulLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_MatMul_FirstTranspose, MatMulLayerTest, + ::testing::Combine( + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(first_transpose_shapeRelatedParams)), + ::testing::Values(std::make_pair(true, false)), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(secondaryInputTypes), + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::Values(additional_config)), + MatMulLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_MatMul_SecondTranspose, MatMulLayerTest, + ::testing::Combine( + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(second_transpose_shapeRelatedParams)), + ::testing::Values(std::make_pair(false, true)), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(secondaryInputTypes), + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::Values(additional_config)), + MatMulLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_MatMul_BothTranspose, MatMulLayerTest, ::testing::Combine( - ::testing::ValuesIn(shapeRelatedParams), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(both_transpose_shapeRelatedParams)), + ::testing::Values(std::make_pair(true, true)), ::testing::ValuesIn(inputPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), ::testing::ValuesIn(secondaryInputTypes), ::testing::Values(ov::test::utils::DEVICE_GPU), ::testing::Values(additional_config)), - MatMulTest::getTestCaseName); + MatMulLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/nms_rotated.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/nms_rotated.cpp new file mode 100644 index 00000000000000..80224b57ebcff7 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/nms_rotated.cpp @@ -0,0 +1,40 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "single_layer_tests/nms_rotated.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; +using namespace InferenceEngine; +using namespace ngraph; + +const std::vector inShapeParams = { + InputShapeParams{2, 50, 50}, + InputShapeParams {9, 10, 10} +}; + +const std::vector maxOutBoxPerClass = {5, 20}; +const std::vector threshold = {0.3f, 0.7f}; +const std::vector sortResDesc = {true, false}; +const std::vector outType = {element::i32, element::i64}; +const std::vector clockwise = {true, false}; + +const std::vector inputPrecisions = {Precision::FP32, Precision::FP16}; + +INSTANTIATE_TEST_SUITE_P(smoke_NmsRotatedLayerTest, + NmsRotatedLayerTest, + ::testing::Combine(::testing::ValuesIn(inShapeParams), + ::testing::Combine(::testing::ValuesIn(inputPrecisions), + ::testing::Values(Precision::I32), + ::testing::Values(Precision::FP32)), + ::testing::ValuesIn(maxOutBoxPerClass), + ::testing::ValuesIn(threshold), + ::testing::ValuesIn(threshold), + ::testing::ValuesIn(sortResDesc), + ::testing::ValuesIn(outType), + ::testing::ValuesIn(clockwise), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + NmsRotatedLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 798282680dbccd..25b679cf22cc82 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -115,5 +115,7 @@ std::vector disabledTestPatterns() { R"(.*smoke_LPT.*ElementwiseBranchSelectionTransformation.*)", // Dynamic state unsupported for now R"(.*MemoryDynamicBatch.*)", + // Issue: 123493 + R"(.*GroupNormalizationTest.*CompareWithRefs.*NetType=f16.*)", }; } diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp new file mode 100644 index 00000000000000..6e95d1e29a15af --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "subgraph_tests/transpose_matmul_fusion.hpp" + +using namespace ov::test; + +namespace { +INSTANTIATE_TEST_SUITE_P(smoke_TransposeMatMulFusion, TransposeMatMulFusion, + ::testing::Values(ov::test::utils::DEVICE_GPU), + TransposeMatMulFusion::getTestCaseName); + +} // namespace diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp index d394eb7d05de17..a36426cd84c373 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp @@ -89,6 +89,13 @@ class DeconvolutionLayerGPUTest : public testing::WithParamInterface& targetInputStaticShapes) override { + if (function->get_parameters().size() != 1) { + // WA: output_shape depends on 3rd deconvolution input data + // but the reference implementation doesn't implement shape inference + // so we need to build a new ngraph function and replace the 3rd input parameter with a constant + // to get valid output shapes + functionRefs = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); + } inputs.clear(); const auto& funcInputs = function->inputs(); for (size_t i = 0; i < funcInputs.size(); ++i) { @@ -106,18 +113,6 @@ class DeconvolutionLayerGPUTest : public testing::WithParamInterface &funcRef, const std::vector& targetInputStaticShapes) override { - if (function->get_parameters().size() == 1) { - ngraph::helpers::resize_function(funcRef, targetInputStaticShapes); - } else { - // WA: output_shape depends on 3rd deconvolution input data - // but the reference implementation doesn't implement shape inference - // so we need to build a new ngraph function and replace the 3rd input parameter with a constant - // to get valid output shapes - funcRef = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); - } - } - void validate() override { auto actualOutputs = get_plugin_outputs(); if (function->get_parameters().size() == 2) { diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/group_convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/group_convolution_backprop_data.cpp index fb955a63a837de..531c97fa218a16 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/group_convolution_backprop_data.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/group_convolution_backprop_data.cpp @@ -90,6 +90,13 @@ class GroupDeconvolutionLayerGPUTest : public testing::WithParamInterface& targetInputStaticShapes) override { + if (function->get_parameters().size() != 1) { + // WA: output_shape depends on 3rd deconvolution input data + // but the reference implementation doesn't implement shape inference + // so we need to build a new ngraph function and replace the 3rd input parameter with a constant + // to get valid output shapes + functionRefs = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); + } inputs.clear(); const auto& funcInputs = function->inputs(); for (size_t i = 0; i < funcInputs.size(); ++i) { @@ -107,18 +114,6 @@ class GroupDeconvolutionLayerGPUTest : public testing::WithParamInterface &funcRef, const std::vector& targetInputStaticShapes) override { - if (function->get_parameters().size() == 1) { - ngraph::helpers::resize_function(funcRef, targetInputStaticShapes); - } else { - // WA: output_shape depends on 3rd deconvolution input data - // but the reference implementation doesn't implement shape inference - // so we need to build a new ngraph function and replace the 3rd input parameter with a constant - // to get valid output shapes - funcRef = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); - } - } - void validate() override { auto actualOutputs = get_plugin_outputs(); if (function->get_parameters().size() == 2) { diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/random_uniform.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/random_uniform.cpp index 440924fd0a541e..755371e1b0a548 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/random_uniform.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/random_uniform.cpp @@ -140,12 +140,6 @@ class RandomUnifromDynamicGPUTest : public testing::WithParamInterface(results, params, "random_uniform_test"); } - precisions_map get_ref_precisions_convert_map() override { - // Do not convert reference function from FP16 to FP32 precision, since in case of RandomUniform operation - // data type is matter - return {}; - } - private: std::pair min_max_values; }; diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/rms_norm_decomposition.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/rms_norm_decomposition.cpp new file mode 100644 index 00000000000000..2ea4fc415b52f0 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/rms_norm_decomposition.cpp @@ -0,0 +1,153 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ov_models/builders.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" + +using namespace ngraph; +using namespace ov::test; + +namespace SubgraphTestsDefinitions { +/* + * Input(F32) Const(F32) + * | \ / + * | Power(F32) Const(I64) + * | \ / + * | ReduceMean(F32) + * | | Const(F32) + * | | / + * | Add(F32) + * | | + * | Sqrt(F32) Const(F32) + * | | / + * | Divide(F32) + * | / + * Const(F32) Multiply(F32) + * \ | + * Multiply(F32) + * | + * Convert(F16) + */ +using RMSNormDecompositionParams = std::tuple, // input shapes + ov::test::ElementType, // input precision + std::map>; // additional config + +class RMSNormDecomposition : public testing::WithParamInterface, public SubgraphBaseTest { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + std::vector input_shapes; + ElementType input_precision; + std::map additional_config; + + std::tie(input_shapes, input_precision, additional_config) = obj.param; + + std::ostringstream result; + result << "IS=("; + for (const auto& shape : input_shapes) { + result << ov::test::utils::partialShape2str({shape.first}) << "_"; + } + result << ")_TS="; + for (const auto& shape : input_shapes) { + result << "("; + if (!shape.second.empty()) { + auto itr = shape.second.begin(); + do { + result << ov::test::utils::vec2str(*itr); + } while (++itr != shape.second.end() && result << "_"); + } + result << ")_"; + } + result << "input_precision=" << input_precision << "_"; + + result << "config=("; + for (const auto& configEntry : additional_config) { + result << configEntry.first << ", " << configEntry.second << ":"; + } + result << ")"; + + return result.str(); + } + +protected: + std::shared_ptr init_subgraph(std::vector& input_shapes, + const ov::Shape& target_shape, + const ov::element::Type input_precision) { + ov::ParameterVector params{std::make_shared(input_precision, input_shapes[0])}; + + // x^2 + auto power_const = ov::opset10::Constant::create(input_precision, {}, {2.f}); + auto power = std::make_shared(params[0], power_const); + + // ReduceMean(x^2,axes) + auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1}); + auto mean = std::make_shared(power, mean_axes, true); + + // ReduceMean(x^2,axes)+eps + auto eps = ov::opset10::Constant::create(input_precision, {}, {1e-5f}); + auto add_eps = std::make_shared(mean, eps); + + // Sqrt(ReduceMean(x^2,axes)+eps) + auto sqrt = std::make_shared(add_eps); + + // 1/Sqrt(ReduceMean(x^2,axes)+eps) + auto div_const = ov::opset10::Constant::create(input_precision, {}, {1}); + auto div = std::make_shared(div_const, sqrt); + + // x * 1/Sqrt(ReduceMean(x^2,axes)+eps) + auto mul1 = std::make_shared(params[0], div); + + // x * 1/Sqrt(ReduceMean(x^2,axes)+eps) * gamma + auto dim = *target_shape.rbegin(); + auto gamma = ngraph::builder::makeConstant(input_precision, ov::Shape{dim}, std::vector{}, true); + auto mul2 = std::make_shared(gamma, mul1); + + auto comp = std::make_shared(mul2, ov::element::f16); + + return std::make_shared(NodeVector{comp}, params, "RMSNormDecomposition"); + } + + void SetUp() override { + targetDevice = ov::test::utils::DEVICE_GPU; + + std::vector input_shapes; + ElementType input_precision; + std::map additional_config; + + std::tie(input_shapes, input_precision, additional_config) = GetParam(); + + configuration.insert(additional_config.begin(), additional_config.end()); + init_input_shapes(input_shapes); + + inType = outType = input_precision; + + function = init_subgraph(inputDynamicShapes, targetStaticShapes.front().front(), input_precision); + } +}; + +TEST_P(RMSNormDecomposition, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + run(); +} + +namespace { + +const std::vector input_precisions = {ov::element::f32, ov::element::f16}; + +const std::vector> input_shapes_basic = { + {{{-1, -1, 96}, {{1, 4, 96}}}}, + {{{-1, -1, -1}, {{1, 2, 16}}}}, + {{{}, {{1, 2, 6}}}}, + {{{}, {{1, 2, 18}}}}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_RMSNormDecomposition_basic, + RMSNormDecomposition, + ::testing::Combine(::testing::ValuesIn(input_shapes_basic), + ::testing::ValuesIn(input_precisions), + ::testing::Values(std::map())), + RMSNormDecomposition::getTestCaseName); +} // namespace + +} // namespace SubgraphTestsDefinitions diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/non_max_suppression_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/non_max_suppression_test.cpp index 909149b05e32fa..d14c0cab8d69a3 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/non_max_suppression_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/non_max_suppression_test.cpp @@ -709,3 +709,250 @@ TYPED_TEST(non_max_suppression_basic, soft_nms_sigma_cached) { TYPED_TEST(non_max_suppression_basic, multiple_outputs_cached) { this->test_multiple_outputs(true); } + +namespace { +template +struct NmsRotatedParams { + std::string test_name; + int num_batches; + int num_boxes; + int num_classes; + std::vector boxes; + std::vector scores; + int max_output_boxes_per_class; + float iou_threshold; + float score_threshold; + bool sort_result_descending; + bool clockwise; + std::vector expected_indices; + std::vector expected_scores; +}; + +template float getError(); + +template<> +float getError() { + return 0.001; +} + +template<> +float getError() { + return 0.1; +} + +template +struct nms_rotated_test : public ::testing::TestWithParam> { +public: + void test(bool is_caching_test = false + ) { + const NmsRotatedParams param = testing::TestWithParam>::GetParam(); + const auto data_type = ov::element::from(); + + auto& engine = tests::get_test_engine(); + + const auto boxes_layout = layout(ov::PartialShape{param.num_batches, param.num_boxes, 5}, data_type, + format::bfyx); + const auto scores_layout = layout(ov::PartialShape{param.num_batches, param.num_classes, param.num_boxes}, + data_type, format::bfyx); + + const int selected_indices_num = param.num_batches * param.num_classes * param.num_boxes; + const auto selected_scores_layout = layout(ov::PartialShape{selected_indices_num/*expected_indices_count*/, 3}, + data_type, format::bfyx); + const auto valid_outputs_layout = layout(ov::PartialShape{1}, cldnn::data_types::i32, format::bfyx); + + const auto boxes_mem = engine.allocate_memory(boxes_layout); + tests::set_values(boxes_mem, param.boxes); + + const auto scores_mem = engine.allocate_memory(scores_layout); + tests::set_values(scores_mem, param.scores); + + const auto num_per_class_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1)))); + tests::set_values(num_per_class_mem, {1.f * param.max_output_boxes_per_class}); + + const auto iou_threshold_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1)))); + tests::set_values(iou_threshold_mem, {param.iou_threshold}); + + const auto score_threshold_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1)))); + tests::set_values(score_threshold_mem, {param.score_threshold}); + + const auto selected_scores_mem = engine.allocate_memory(selected_scores_layout); + const auto valid_outputs_mem = engine.allocate_memory(valid_outputs_layout); + + topology topo; + topo.add(input_layout("boxes", boxes_layout)); + topo.add(input_layout("scores", scores_layout)); + topo.add(data("num_per_class", num_per_class_mem)); + topo.add(data("iou_threshold", iou_threshold_mem)); + topo.add(data("score_threshold", score_threshold_mem)); + topo.add(mutable_data("selected_scores", selected_scores_mem)); + topo.add(mutable_data("valid_outputs", valid_outputs_mem)); + auto nms = non_max_suppression("nms", + input_info("boxes"), + input_info("scores"), + selected_indices_num, + false, + param.sort_result_descending, + "num_per_class", + "iou_threshold", + "score_threshold", + "", + "selected_scores", + "valid_outputs"); + nms.rotation = param.clockwise ? non_max_suppression::Rotation::CLOCKWISE : + non_max_suppression::Rotation::COUNTERCLOCKWISE; + + topo.add(nms); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + + cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test); + net->set_input_data("boxes", boxes_mem); + net->set_input_data("scores", scores_mem); + const auto result = net->execute(); + const auto indices_mem = result.at("nms").get_memory(); + const cldnn::mem_lock indices_ptr(indices_mem, get_test_stream()); + const cldnn::mem_lock selected_scores_ptr(selected_scores_mem, get_test_stream()); + const cldnn::mem_lock valid_outputs_ptr(valid_outputs_mem, get_test_stream()); + + const auto expected_valid_outputs = param.expected_indices.size() / 3; + const size_t num_valid_outputs = static_cast(valid_outputs_ptr[0]); + + EXPECT_EQ(num_valid_outputs, expected_valid_outputs); + ASSERT_GE(indices_ptr.size(), param.expected_indices.size()); + ASSERT_GE(selected_scores_ptr.size(), param.expected_scores.size()); + + for (size_t i = 0; i < indices_ptr.size(); ++i) { + if (i < num_valid_outputs * 3) { + EXPECT_EQ(param.expected_indices[i], indices_ptr[i]) << "at i = " << i; + EXPECT_NEAR(param.expected_scores[i], selected_scores_ptr[i], getError()) << "at i = " << i; + } else { + EXPECT_EQ(indices_ptr[i], -1) << "at i = " << i; + EXPECT_NEAR(selected_scores_ptr[i], -1, getError()) << "at i = " << i; + } + } + } +}; + + +struct PrintToStringParamName { + template + std::string operator()(const testing::TestParamInfo>& info) { + const auto& p = info.param; + std::ostringstream result; + result << p.test_name << "_"; + result << "DataType=" << ov::element::Type(ov::element::from()); + result << "_IndexType=" << ov::element::Type(ov::element::from()); + return result.str(); + } +}; + + +using nms_rotated_test_f32_i32 = nms_rotated_test; +using nms_rotated_test_f16_i32 = nms_rotated_test; + +TEST_P(nms_rotated_test_f32_i32, basic) { + ASSERT_NO_FATAL_FAILURE(test()); +} + +TEST_P(nms_rotated_test_f16_i32, basic) { + ASSERT_NO_FATAL_FAILURE(test()); +} + +template +std::vector> getNmsRotatedParams() { + const std::vector> params = { + {"basic", + 1, 4, 1, + std::vector{ + 7.0, 4.0, 8.0, 7.0, 0.5, + 4.0, 7.0, 9.0, 11.0, 0.6, + 4.0, 8.0, 10.0, 12.0, 0.3, + 2.0, 5.0, 13.0, 7.0, 0.6}, + std::vector{0.65, 0.7, 0.55, 0.96}, + 5000, 0.5f, 0.0f, false, true, + std::vector{0, 0, 3, 0, 0, 1, 0, 0, 0}, + std::vector{0.0, 0.0, 0.96, 0.0, 0.0, 0.7, 0.0, 0.0, 0.65}, + }, + {"max_out_2", + 1, 4, 1, + std::vector{ + 7.0, 4.0, 8.0, 7.0, 0.5, + 4.0, 7.0, 9.0, 11.0, 0.6, + 4.0, 8.0, 10.0, 12.0, 0.3, + 2.0, 5.0, 13.0, 7.0, 0.6}, + std::vector{0.65, 0.7, 0.55, 0.96}, + 2, 0.5f, 0.0f, false, true, + std::vector{0, 0, 3, 0, 0, 1}, + std::vector{0.0, 0.0, 0.96, 0.0, 0.0, 0.7}, + }, + {"score_thresold", + 1, 4, 1, + std::vector{ + 7.0, 4.0, 8.0, 7.0, 0.5, + 4.0, 7.0, 9.0, 11.0, 0.6, + 4.0, 8.0, 10.0, 12.0, 0.3, + 2.0, 5.0, 13.0, 7.0, 0.6}, + std::vector{0.65, 0.7, 0.55, 0.96}, + 5000, 0.5f, 0.67f, false, true, + std::vector{0, 0, 3, 0, 0, 1}, + std::vector{0.0, 0.0, 0.96, 0.0, 0.0, 0.7}, + }, + {"iou_thresold_2", + 1, 4, 1, + std::vector{ + 7.0, 4.0, 8.0, 7.0, 0.5, + 4.0, 7.0, 9.0, 11.0, 0.6, + 4.0, 8.0, 10.0, 12.0, 0.3, + 2.0, 5.0, 13.0, 7.0, 0.6}, + std::vector{0.65, 0.7, 0.55, 0.96}, + 5000, 0.3f, 0.0f, false, true, + std::vector{0, 0, 3, 0, 0, 0}, + std::vector{0.0, 0.0, 0.96, 0.0, 0.0, 0.65}, + }, + {"negative_cw", + 1, 2, 1, + std::vector{6.0, 34.0, 4.0, 8.0, -0.7854, 9.0, 32, 2.0, 4.0, 0.0}, + std::vector{0.8, 0.7}, + 5000, 0.1f, 0.0f, false, true, + std::vector{0, 0, 0, 0, 0, 1}, + std::vector{0.0, 0.0, 0.8, 0.0, 0.0, 0.7} + }, + {"negative_ccw", + 1, 2, 1, + std::vector{6.0, 34.0, 4.0, 8.0, -0.7854, 9.0, 32, 2.0, 4.0, 0.0}, + std::vector{0.8, 0.7}, + 5000, 0.1f, 0.0f, false, false, + std::vector{0, 0, 0}, + std::vector{0.0, 0.0, 0.8} + }, + {"positive_ccw", + 1, 2, 1, + std::vector{6.0, 34.0, 4.0, 8.0, 0.7854, 9.0, 32, 2.0, 4.0, 0.0}, + std::vector{0.8, 0.7}, + 5000, 0.1f, 0.0f, false, false, + std::vector{0, 0, 0, 0, 0, 1}, + std::vector{0.0, 0.0, 0.8, 0.0, 0.0, 0.7} + }, + {"positive_cw", + 1, 2, 1, + std::vector{6.0, 34.0, 4.0, 8.0, 0.7854, 9.0, 32, 2.0, 4.0, 0.0}, + std::vector{0.8, 0.7}, + 5000, 0.1f, 0.0f, false, true, + std::vector{0, 0, 0}, + std::vector{0.0, 0.0, 0.8} + } + }; + + return params; +} +INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test, + nms_rotated_test_f32_i32, + ::testing::ValuesIn(getNmsRotatedParams()), + PrintToStringParamName()); + +INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test, + nms_rotated_test_f16_i32, + ::testing::ValuesIn(getNmsRotatedParams()), + PrintToStringParamName()); +} // namespace diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/rms_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/rms_gpu_test.cpp new file mode 100644 index 00000000000000..deee8418e23fae --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/test_cases/rms_gpu_test.cpp @@ -0,0 +1,184 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils.h" + +#include +#include +#include "rms_inst.h" + +using namespace cldnn; +using namespace ::tests; + +class rms_gpu_test : public ::testing::TestWithParam {}; + +template +void rms_ref(const memory::ptr input, const memory::ptr gamma, memory::ptr output, float epsilon) { + auto input_layout = input->get_layout(); + auto gamma_layout = gamma->get_layout(); + + uint32_t batch_size = input_layout.batch(); + uint32_t feature_size = input_layout.feature(); + uint32_t y_size = input_layout.spatial(1); + uint32_t x_size = input_layout.spatial(0); + + cldnn::mem_lock src(input, get_test_stream()); + cldnn::mem_lock weight(gamma, get_test_stream()); + cldnn::mem_lock dst(output, get_test_stream()); + + for (uint32_t b = 0; b < batch_size; ++b) { + for (uint32_t f = 0; f < feature_size; ++f) { + float rms = 0.f; + for (uint32_t y = 0; y < y_size; ++y) { + for (uint32_t x = 0; x < x_size; ++x) { + auto tensor_src = tensor(batch(b), feature(f), spatial(x, y, 0, 0)); + size_t src_offset = input_layout.get_linear_offset(tensor_src); + rms += std::pow(static_cast(src[src_offset]), 2); + } + } + rms /= y_size * x_size; + rms += epsilon; + rms = std::pow(std::sqrt(rms), -1); + + for (uint32_t y = 0; y < y_size; ++y) { + for (uint32_t x = 0; x < x_size; ++x) { + auto tensor_src = tensor(batch(b), feature(f), spatial(x, y, 0, 0)); + auto tensor_weight = tensor(batch(b), feature(0), spatial(x, y, 0, 0)); + auto tensor_dst = tensor(batch(b), feature(f), spatial(x, y, 0, 0)); + size_t src_offset = input_layout.get_linear_offset(tensor_src); + size_t weight_offset = input_layout.get_linear_offset(tensor_weight); + size_t dst_offset = input_layout.get_linear_offset(tensor_dst); + float result = rms * static_cast(src[src_offset]) * static_cast(weight[weight_offset]); + dst[dst_offset] = static_cast(result); + } + } + } + } +} + +TEST(rms_gpu_test, rms_test_bfyx_ref) { + auto& engine = get_test_engine(); + + auto input = engine.allocate_memory({ov::PartialShape{1, 2, 6}, data_types::f32, format::bfyx}); + auto gamma = engine.allocate_memory({ov::PartialShape{1, 6}, data_types::f32, format::bfyx}); + auto output_ref = engine.allocate_memory({ov::PartialShape{1, 2, 6}, data_types::f32, format::bfyx}); + + set_values(input, { + 0.001839f, -0.003815f, 0.000961f, 0.002930f, -0.003998f, -0.008057f, + 0.006744f, -0.000004f, 0.004303f, -0.002380f, 0.000072f, 0.001404f + }); + set_values(gamma, { + 0.029785f, 0.014038f, 0.003098f, 0.013123f, 0.015137f, 0.009399f + }); + + rms_ref(input, gamma, output_ref, 1e-5f); + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("gamma", gamma->get_layout())); + topology.add(rms("rms", input_info("input"), input_info("gamma"), 1e-5f)); + + network network(engine, topology, get_test_default_config(engine)); + + network.set_input_data("input", input); + network.set_input_data("gamma", gamma); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "rms"); + + auto output = outputs.begin()->second.get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock output_ref_ptr(output_ref, get_test_stream()); + + for (unsigned int i = 0; i < output_ref->count(); ++i) { + EXPECT_NEAR(output_ptr[i], output_ref_ptr[i], 1e-3); + } +} + +TEST(rms_gpu_test, rms_test_bfyx_opt) { + auto& engine = get_test_engine(); + + auto input = engine.allocate_memory({ov::PartialShape{1, 2, 16}, data_types::f32, format::bfyx}); + auto gamma = engine.allocate_memory({ov::PartialShape{1, 16}, data_types::f32, format::bfyx}); + auto output_ref = engine.allocate_memory({ov::PartialShape{1, 2, 16}, data_types::f32, format::bfyx}); + + set_values(input, { + 0.001839f, -0.003815f, 0.000961f, 0.002930f, -0.003998f, -0.008057f, -0.005402f, -0.002945f, + 0.006744f, -0.000004f, 0.004303f, -0.002380f, 0.000072f, 0.001404f, 0.000568f, 0.002579f, + 0.003098f, -0.006989f, -0.000244f, 0.010193f, 0.002899f, -0.005798f, -0.026978f, 0.008789f, + 0.002258f, 0.006500f, 0.003159f, -0.012329f, 0.026245f, -0.001839f, 0.000259f, 0.002670f + }); + set_values(gamma, { + 0.029785f, 0.014038f, 0.003098f, 0.013123f, 0.015137f, 0.009399f, 0.008362f, 0.008179f, + 0.018188f, 0.021973f, 0.005249f, 0.004639f, 0.004272f, 0.020264f, 0.013489f, 0.008789f + }); + + rms_ref(input, gamma, output_ref, 1e-5f); + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("gamma", gamma->get_layout())); + topology.add(rms("rms", input_info("input"), input_info("gamma"), 1e-5f)); + + network network(engine, topology, get_test_default_config(engine)); + + network.set_input_data("input", input); + network.set_input_data("gamma", gamma); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "rms"); + + auto output = outputs.begin()->second.get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock output_ref_ptr(output_ref, get_test_stream()); + + for (unsigned int i = 0; i < output_ref->count(); ++i) { + EXPECT_NEAR(output_ptr[i], output_ref_ptr[i], 1e-3); + } +} + +TEST(rms_gpu_test, rms_test_bfyx_opt_leftovers) { + auto& engine = get_test_engine(); + + auto input = engine.allocate_memory({ov::PartialShape{1, 2, 18}, data_types::f32, format::bfyx}); + auto gamma = engine.allocate_memory({ov::PartialShape{1, 18}, data_types::f32, format::bfyx}); + auto output_ref = engine.allocate_memory({ov::PartialShape{1, 2, 18}, data_types::f32, format::bfyx}); + + set_values(input, { + 0.001839f, -0.003815f, 0.000961f, 0.002930f, -0.003998f, -0.008057f, -0.005402f, -0.002945f, 0.006744f, + -0.000004f, 0.004303f, -0.002380f, 0.000072f, 0.001404f, 0.000568f, 0.002579f, 0.003098f, -0.006989f, + -0.000244f, 0.010193f, 0.002899f, -0.005798f, -0.026978f, 0.008789f, 0.002258f, 0.006500f, 0.003159f, + -0.012329f, 0.026245f, -0.001839f, 0.000259f, 0.002670f, 0.001419f, 0.001617f,-0.006622f, 0.010864f + }); + set_values(gamma, { + 0.029785f, 0.014038f, 0.003098f, 0.013123f, 0.015137f, 0.009399f, 0.008362f, 0.008179f, 0.018188f, + 0.021973f, 0.005249f, 0.004639f, 0.004272f, 0.020264f, 0.013489f, 0.008789f, 0.006653f, 0.010315f + }); + + rms_ref(input, gamma, output_ref, 1e-5f); + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("gamma", gamma->get_layout())); + topology.add(rms("rms", input_info("input"), input_info("gamma"), 1e-5f)); + + network network(engine, topology, get_test_default_config(engine)); + + network.set_input_data("input", input); + network.set_input_data("gamma", gamma); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "rms"); + + auto output = outputs.begin()->second.get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock output_ref_ptr(output_ref, get_test_stream()); + + for (unsigned int i = 0; i < output_ref->count(); ++i) { + EXPECT_NEAR(output_ptr[i], output_ref_ptr[i], 1e-3); + } +} diff --git a/src/plugins/intel_gpu/tests/unit/transformations/rms_norm_decomposition_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/rms_norm_decomposition_test.cpp new file mode 100644 index 00000000000000..26d8638d2b904e --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/transformations/rms_norm_decomposition_test.cpp @@ -0,0 +1,144 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/ov_test_utils.hpp" + +using namespace testing; +using namespace ov::intel_gpu; + +TEST_F(TransformationTestsF, RMSNormFusionTest1) { + { + auto input = std::make_shared(ov::element::f32, ov::Shape{1, 2, 6}); + auto power_const = ov::opset10::Constant::create(ov::element::f32, {}, {2.f}); + auto power = std::make_shared(input, power_const); + auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1}); + auto mean = std::make_shared(power, mean_axes, true); + auto eps = ov::opset10::Constant::create(ov::element::f32, {}, {1e-5f}); + auto add_eps = std::make_shared(mean, eps); + auto sqrt = std::make_shared(add_eps); + auto div_const = ov::opset10::Constant::create(ov::element::f32, {}, {-1}); + auto div = std::make_shared(sqrt, div_const); + auto mul1 = std::make_shared(input, div); + auto gamma = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f}); + auto mul2 = std::make_shared(gamma, mul1); + auto comp = std::make_shared(mul2, ov::element::f16); + + model = std::make_shared(ov::NodeVector{comp}, ov::ParameterVector{input}); + manager.register_pass(); + } + { + auto input = std::make_shared(ov::element::f32, ov::Shape{1, 2, 6}); + auto rms_const = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f}); + auto rms = std::make_shared(input, rms_const, 1e-5f, ov::element::f16); + + model_ref = std::make_shared(ov::NodeVector{rms}, ov::ParameterVector{input}); + } +} + +TEST_F(TransformationTestsF, RMSNormFusionTest2) { + { + auto input = std::make_shared(ov::element::f32, ov::Shape{1, 2, 6}); + auto power_const = ov::opset10::Constant::create(ov::element::f32, {}, {2.f}); + auto power = std::make_shared(input, power_const); + auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1}); + auto mean = std::make_shared(power, mean_axes, true); + auto eps = ov::opset10::Constant::create(ov::element::f32, {}, {1e-5f}); + auto add_eps = std::make_shared(mean, eps); + auto sqrt = std::make_shared(add_eps); + auto div_const = ov::opset10::Constant::create(ov::element::f32, {}, {1}); + auto div = std::make_shared(div_const, sqrt); + auto mul1 = std::make_shared(input, div); + auto gamma = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f}); + auto mul2 = std::make_shared(gamma, mul1); + auto comp = std::make_shared(mul2, ov::element::f16); + + model = std::make_shared(ov::NodeVector{comp}, ov::ParameterVector{input}); + manager.register_pass(); + } +} + +TEST_F(TransformationTestsF, RMSNormFusionTest3) { + { + auto input = std::make_shared(ov::element::f32, ov::Shape{1, 2, 6}); + auto power_const = ov::opset10::Constant::create(ov::element::f32, {}, {2.f}); + auto power = std::make_shared(input, power_const); + auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1}); + auto mean = std::make_shared(power, mean_axes, true); + auto eps = ov::opset10::Constant::create(ov::element::f32, {}, {1e-5f}); + auto add_eps = std::make_shared(mean, eps); + auto sqrt = std::make_shared(add_eps); + auto div_const = ov::opset10::Constant::create(ov::element::f32, {}, {1}); + auto div = std::make_shared(sqrt, div_const); + auto mul1 = std::make_shared(input, div); + auto gamma = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f}); + auto mul2 = std::make_shared(gamma, mul1); + auto comp = std::make_shared(mul2, ov::element::f16); + + model = std::make_shared(ov::NodeVector{comp}, ov::ParameterVector{input}); + manager.register_pass(); + } +} + +TEST_F(TransformationTestsF, RMSNormFusionTest4) { + { + auto input = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 6}); + auto power_const = ov::opset10::Constant::create(ov::element::f32, {}, {2.f}); + auto power = std::make_shared(input, power_const); + auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1}); + auto mean = std::make_shared(power, mean_axes, true); + auto eps = ov::opset10::Constant::create(ov::element::f32, {}, {1e-5f}); + auto add_eps = std::make_shared(mean, eps); + auto sqrt = std::make_shared(add_eps); + auto div_const = ov::opset10::Constant::create(ov::element::f32, {}, {1}); + auto div = std::make_shared(div_const, sqrt); + auto mul1 = std::make_shared(input, div); + auto gamma = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f}); + auto mul2 = std::make_shared(gamma, mul1); + auto comp = std::make_shared(mul2, ov::element::f16); + + model = std::make_shared(ov::NodeVector{comp}, ov::ParameterVector{input}); + manager.register_pass(); + } +} + +TEST_F(TransformationTestsF, RMSNormFusionTest5) { + { + auto input = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 6}); + auto power_const = ov::opset10::Constant::create(ov::element::f32, {}, {2.f}); + auto power = std::make_shared(input, power_const); + auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1}); + auto mean = std::make_shared(power, mean_axes, true); + auto eps = ov::opset10::Constant::create(ov::element::f32, {}, {1e-5f}); + auto add_eps = std::make_shared(mean, eps); + auto sqrt = std::make_shared(add_eps); + auto div_const = ov::opset10::Constant::create(ov::element::f32, {}, {-1}); + auto div = std::make_shared(sqrt, div_const); + auto mul1 = std::make_shared(input, div); + auto gamma = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f}); + auto mul2 = std::make_shared(gamma, mul1); + auto comp = std::make_shared(mul2, ov::element::f16); + + model = std::make_shared(ov::NodeVector{comp}, ov::ParameterVector{input}); + manager.register_pass(); + } + { + auto input = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 6}); + auto rms_const = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f}); + auto rms = std::make_shared(input, rms_const, 1e-5f, ov::element::f16); + + model_ref = std::make_shared(ov::NodeVector{rms}, ov::ParameterVector{input}); + } +} diff --git a/src/plugins/template/backend/ops/bitwise_and.cpp b/src/plugins/template/backend/ops/bitwise_and.cpp index d0e5d05b11360d..b6686175377aac 100644 --- a/src/plugins/template/backend/ops/bitwise_and.cpp +++ b/src/plugins/template/backend/ops/bitwise_and.cpp @@ -14,9 +14,9 @@ template bool evaluate(const std::shared_ptr& node, ov::TensorVector& outputs, const ov::TensorVector& inputs) { - OPENVINO_ASSERT(inputs.size() == 2); OPENVINO_ASSERT(outputs.size() == 1); - outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs[0].get_shape(), inputs[1].get_shape())); + + outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs)); using T = typename ov::element_type_traits::value_type; ov::reference::bitwise_and(inputs[0].data(), inputs[1].data(), diff --git a/src/plugins/template/backend/ops/bitwise_or.cpp b/src/plugins/template/backend/ops/bitwise_or.cpp index fe163edeccb3a1..69f45d2916731d 100644 --- a/src/plugins/template/backend/ops/bitwise_or.cpp +++ b/src/plugins/template/backend/ops/bitwise_or.cpp @@ -14,9 +14,9 @@ template bool evaluate(const std::shared_ptr& node, ov::TensorVector& outputs, const ov::TensorVector& inputs) { - OPENVINO_ASSERT(inputs.size() == 2); OPENVINO_ASSERT(outputs.size() == 1); - outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs[0].get_shape(), inputs[1].get_shape())); + + outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs)); using T = typename ov::element_type_traits::value_type; ov::reference::bitwise_or(inputs[0].data(), inputs[1].data(), diff --git a/src/plugins/template/backend/ops/bitwise_xor.cpp b/src/plugins/template/backend/ops/bitwise_xor.cpp index 3fa98775a05e18..43a15c60b5e0a8 100644 --- a/src/plugins/template/backend/ops/bitwise_xor.cpp +++ b/src/plugins/template/backend/ops/bitwise_xor.cpp @@ -14,9 +14,9 @@ template bool evaluate(const std::shared_ptr& node, ov::TensorVector& outputs, const ov::TensorVector& inputs) { - OPENVINO_ASSERT(inputs.size() == 2); OPENVINO_ASSERT(outputs.size() == 1); - outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs[0].get_shape(), inputs[1].get_shape())); + + outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs)); using T = typename ov::element_type_traits::value_type; ov::reference::bitwise_xor(inputs[0].data(), inputs[1].data(), diff --git a/src/plugins/template/tests/functional/op_reference/convert.cpp b/src/plugins/template/tests/functional/op_reference/convert.cpp index 0f3e47148790be..b6195744c9c6f3 100644 --- a/src/plugins/template/tests/functional/op_reference/convert.cpp +++ b/src/plugins/template/tests/functional/op_reference/convert.cpp @@ -103,7 +103,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{2, 2}, ov::element::u4, ov::element::f32, - std::vector{0xFB, 0x0A}, + std::vector{0xBF, 0xA0}, std::vector{15.0f, 11.0f, 0.0f, 10.0f}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -134,7 +134,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{2, 2}, ov::element::i4, ov::element::f32, - std::vector{0xFE, 0xF2}, + std::vector{0xEF, 0x2F}, std::vector{-1.0f, -2.0f, -1.0f, 2.0f}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -245,7 +245,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::i4, std::vector{0xA0}, - std::vector{0x10, 0x10}, + std::vector{0x01, 0x01}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -261,7 +261,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u8, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -269,7 +269,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u16, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -277,7 +277,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u32, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -285,7 +285,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u64, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -301,7 +301,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i8, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -309,7 +309,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i16, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -317,7 +317,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i32, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -325,7 +325,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i64, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -333,7 +333,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f16, ov::element::i4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -341,7 +341,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::bf16, ov::element::i4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -349,7 +349,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f32, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), // destination i8 @@ -364,7 +364,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -395,7 +395,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -452,7 +452,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -483,7 +483,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -540,7 +540,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -571,7 +571,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -628,7 +628,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -659,7 +659,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -718,7 +718,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{8}, ov::element::u4, ov::element::u1, - std::vector{0x10, 0x01, 0x00, 0x00}, + std::vector{0x01, 0x10, 0x00, 0x00}, std::vector{0x90}, 8, 8), @@ -758,7 +758,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{8}, ov::element::i4, ov::element::u1, - std::vector{0x10, 0x01, 0x00, 0x00}, + std::vector{0x01, 0x10, 0x00, 0x00}, std::vector{0x90}, 8, 8), @@ -825,7 +825,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u4, std::vector{0xA0}, - std::vector{0x10, 0x10}, + std::vector{0x01, 0x01}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -841,7 +841,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u8, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -849,7 +849,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u16, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -857,7 +857,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u32, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -865,7 +865,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u64, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -881,7 +881,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i8, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -889,7 +889,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i16, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -897,7 +897,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i32, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -905,7 +905,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i64, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -913,7 +913,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f16, ov::element::u4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -921,7 +921,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::bf16, ov::element::u4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -929,7 +929,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f32, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), @@ -945,7 +945,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -976,7 +976,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1034,7 +1034,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1065,7 +1065,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1123,7 +1123,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1154,7 +1154,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1211,7 +1211,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1242,7 +1242,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, diff --git a/src/plugins/template/tests/functional/op_reference/convert_like.cpp b/src/plugins/template/tests/functional/op_reference/convert_like.cpp index b46fe98af030c2..4ddf3dda276b92 100644 --- a/src/plugins/template/tests/functional/op_reference/convert_like.cpp +++ b/src/plugins/template/tests/functional/op_reference/convert_like.cpp @@ -6,6 +6,8 @@ #include +#include + #include "conversion.hpp" using namespace ov; @@ -101,7 +103,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{2, 2}, ov::element::u4, ov::element::f32, - std::vector{0xFB, 0x0A}, + std::vector{0xBF, 0xA0}, std::vector{15.0f, 11.0f, 0.0f, 10.0f}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -132,7 +134,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{2, 2}, ov::element::i4, ov::element::f32, - std::vector{0xFE, 0xF2}, + std::vector{0xEF, 0x2F}, std::vector{-1.0f, -2.0f, -1.0f, 2.0f}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -243,7 +245,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::i4, std::vector{0xA0}, - std::vector{0x10, 0x10}, + std::vector{0x01, 0x01}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -259,7 +261,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u8, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -267,7 +269,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u16, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -275,7 +277,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u32, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -283,7 +285,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u64, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -299,7 +301,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i8, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -307,7 +309,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i16, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -315,7 +317,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i32, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -323,7 +325,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i64, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -331,7 +333,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f16, ov::element::i4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -339,7 +341,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::bf16, ov::element::i4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -347,7 +349,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f32, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), // destination i8 @@ -362,7 +364,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -393,7 +395,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -436,7 +438,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::i8, - std::vector{-1, -2, 2, 3}, + std::vector{-1, -2, 2.2, 3.8}, std::vector{-1, -2, 2, 3}), // destination i16 ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -450,7 +452,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -481,7 +483,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -524,7 +526,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::i16, - std::vector{-1, -2, 2, 3}, + std::vector{-1, -2, 2.2, 3.8}, std::vector{-1, -2, 2, 3}), // destination i32 ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -538,7 +540,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -569,7 +571,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -612,7 +614,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::i32, - std::vector{-1, -2, 2, 3}, + std::vector{-1, -2, 2.2, 3.8}, std::vector{-1, -2, 2, 3}), // destination i64 ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -626,7 +628,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -657,7 +659,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -700,7 +702,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::i64, - std::vector{-1, -2, 2, 3}, + std::vector{-1, -2, 2.2, 3.8}, std::vector{-1, -2, 2, 3}), // destination u1 @@ -716,7 +718,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{8}, ov::element::u4, ov::element::u1, - std::vector{0x10, 0x01, 0x00, 0x00}, + std::vector{0x01, 0x10, 0x00, 0x00}, std::vector{0x90}, 8, 8), @@ -756,7 +758,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{8}, ov::element::i4, ov::element::u1, - std::vector{0x10, 0x01, 0x00, 0x00}, + std::vector{0x01, 0x10, 0x00, 0x00}, std::vector{0x90}, 8, 8), @@ -823,7 +825,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u4, std::vector{0xA0}, - std::vector{0x10, 0x10}, + std::vector{0x01, 0x01}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -839,7 +841,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u8, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -847,7 +849,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u16, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -855,7 +857,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u32, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -863,7 +865,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u64, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -879,7 +881,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i8, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -887,7 +889,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i16, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -895,7 +897,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i32, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -903,7 +905,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i64, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -911,7 +913,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f16, ov::element::u4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -919,7 +921,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::bf16, ov::element::u4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -927,7 +929,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f32, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), @@ -943,7 +945,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -974,7 +976,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1017,7 +1019,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::u8, - std::vector{1, 2, 2, 3}, + std::vector{1, 2, 2.2, 3.8}, std::vector{1, 2, 2, 3}), // destination u16 @@ -1032,7 +1034,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1063,7 +1065,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1106,7 +1108,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::u16, - std::vector{1, 2, 2, 3}, + std::vector{1, 2, 2.2, 3.8}, std::vector{1, 2, 2, 3}), // destination u32 @@ -1121,7 +1123,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1152,7 +1154,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1195,7 +1197,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::u32, - std::vector{1, 2, 2, 3}, + std::vector{1, 2, 2.2, 3.8}, std::vector{1, 2, 2, 3}), // destination u64 ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1209,7 +1211,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1240,7 +1242,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1283,7 +1285,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::u64, - std::vector{1, 2, 2, 3}, + std::vector{1, 2, 2.2, 3.8}, std::vector{1, 2, 2, 3})), ReferenceConversionLayerTest::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp index 67a57298da3d95..0ec25023f3801c 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp @@ -132,7 +132,7 @@ TEST_F(OpCacheUnitTest, update_cache_by_model) { ASSERT_EQ(meta.get_model_info().begin()->second.model_priority, 3); // check input_info ASSERT_EQ(meta.get_input_info().size(), 1); - ASSERT_EQ(meta.get_input_info().begin()->first, "Convert-1_0"); + ASSERT_EQ(meta.get_input_info().begin()->first, "Convert-0_0"); ASSERT_EQ(meta.get_input_info().begin()->second.ranges.max, DEFAULT_MAX_VALUE); ASSERT_EQ(meta.get_input_info().begin()->second.ranges.min, DEFAULT_MIN_VALUE); ASSERT_EQ(meta.get_input_info().begin()->second.is_const, false); @@ -149,7 +149,7 @@ TEST_F(OpCacheUnitTest, update_cache_by_model) { ASSERT_EQ(meta.get_model_info().begin()->second.model_priority, 1); // check input_info ASSERT_EQ(meta.get_input_info().size(), 1); - ASSERT_EQ(meta.get_input_info().begin()->first, "ShapeOf-1_0"); + ASSERT_EQ(meta.get_input_info().begin()->first, "ShapeOf-0_0"); ASSERT_EQ(meta.get_input_info().begin()->second.ranges.max, DEFAULT_MAX_VALUE); ASSERT_EQ(meta.get_input_info().begin()->second.ranges.min, DEFAULT_MIN_VALUE); ASSERT_EQ(meta.get_input_info().begin()->second.is_const, false); @@ -162,7 +162,7 @@ TEST_F(OpCacheUnitTest, serialize_op) { ASSERT_TRUE(this->serialize_op({convert_node, test_meta})); ASSERT_TRUE(ov::util::directory_exists(test_artifacts_dir)); auto serialized_model_path = ov::util::path_join({test_artifacts_dir, - "operation", "static", "Convert-1", "f16", "Convert-1_0.xml"}); + "operation", "static", "Convert-0", "f16", "Convert-0_0.xml"}); ASSERT_TRUE(ov::util::file_exists(serialized_model_path)); auto core = ov::Core(); auto serialized_model = core.read_model(serialized_model_path); @@ -171,7 +171,7 @@ TEST_F(OpCacheUnitTest, serialize_op) { } TEST_F(OpCacheUnitTest, get_rel_serilization_dir) { - auto ref_path = ov::util::path_join({"operation", "static", "Convert-1", "f16"}); + auto ref_path = ov::util::path_join({"operation", "static", "Convert-0", "f16"}); auto original_path = this->get_rel_serilization_dir(convert_node); ASSERT_EQ(ref_path, original_path); } diff --git a/src/tests/functional/plugin/shared/include/single_layer_tests/nms_rotated.hpp b/src/tests/functional/plugin/shared/include/single_layer_tests/nms_rotated.hpp new file mode 100644 index 00000000000000..d02a115acaeb18 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_layer_tests/nms_rotated.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_layer/nms_rotated.hpp" + +namespace LayerTestsDefinitions { + +TEST_P(NmsRotatedLayerTest, CompareWithRefs) { + Run(); +}; + +} // namespace LayerTestsDefinitions diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/transpose_matmul_fusion.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/transpose_matmul_fusion.hpp new file mode 100644 index 00000000000000..f253419ca924f4 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/subgraph_tests/transpose_matmul_fusion.hpp @@ -0,0 +1,17 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/subgraph/transpose_matmul_fusion.hpp" + +namespace ov { +namespace test { + +TEST_P(TransposeMatMulFusion, CompareWithRefs){ + run(); +}; + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/CMakeLists.txt b/src/tests/functional/shared_test_classes/CMakeLists.txt index a4f46b241437b0..0aa8d0f33592c1 100644 --- a/src/tests/functional/shared_test_classes/CMakeLists.txt +++ b/src/tests/functional/shared_test_classes/CMakeLists.txt @@ -12,6 +12,8 @@ ov_add_target( INCLUDES PUBLIC "$" + PRIVATE + "${OpenVINO_SOURCE_DIR}/src/plugins/template/include" ADDITIONAL_SOURCE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/src LINK_LIBRARIES @@ -27,4 +29,4 @@ ov_build_target_faster(${TARGET_NAME} # install & export ov_developer_package_export_targets(TARGET ${TARGET_NAME} - INSTALL_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include/") + INSTALL_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include/") \ No newline at end of file diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp index 5ca0b6531a39f3..c76cd8fbc1bc72 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp @@ -34,15 +34,14 @@ class SubgraphBaseTest : public ov::test::TestsCommon { protected: virtual void compare(const std::vector& expected, const std::vector& actual); - - virtual void configure_model(); virtual void compile_model(); - virtual void init_ref_function(std::shared_ptr& funcRef, - const std::vector& targetInputStaticShapes); - virtual void generate_inputs(const std::vector& targetInputStaticShapes); virtual void infer(); virtual void validate(); + virtual void configure_model();; + virtual void generate_inputs(const std::vector& targetInputStaticShapes); + void update_ref_model(); + void match_parameters(); void init_input_shapes(const std::vector& shapes); void TearDown() override { @@ -65,6 +64,10 @@ class SubgraphBaseTest : public ov::test::TestsCommon { ov::CompiledModel compiledModel; ov::InferRequest inferRequest; + // to provide correct inputs for reference function + std::map, std::shared_ptr> matched_parameters; + precisions_map convert_precisions; + constexpr static const double disable_threshold = std::numeric_limits::max(); double abs_threshold = disable_threshold, rel_threshold = disable_threshold; @@ -75,7 +78,6 @@ class SubgraphBaseTest : public ov::test::TestsCommon { virtual std::vector calculate_refs(); virtual std::vector get_plugin_outputs(); - virtual precisions_map get_ref_precisions_convert_map(); friend void core_configuration(SubgraphBaseTest* test); }; diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/nms_rotated.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/nms_rotated.hpp new file mode 100644 index 00000000000000..3d36cf3a2e0439 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/nms_rotated.hpp @@ -0,0 +1,46 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "shared_test_classes/base/layer_test_utils.hpp" + + +namespace LayerTestsDefinitions { + +using InputShapeParams = std::tuple; // Number of classes + +using InputPrecisions = + std::tuple; // iou_threshold, score_threshold, soft_nms_sigma precisions + +using NmsRotatedParams = std::tuple; // Device name + +class NmsRotatedLayerTest : public testing::WithParamInterface, virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + void GenerateInputs() override; + void Compare(const std::vector>>& expectedOutputs, + const std::vector& actualOutputs) override; + +protected: + void SetUp() override; + InputShapeParams inShapeParams; +}; + +} // namespace LayerTestsDefinitions diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/transpose_matmul_fusion.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/transpose_matmul_fusion.hpp new file mode 100644 index 00000000000000..c94383725f47ce --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/transpose_matmul_fusion.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { + +class TransposeMatMulFusion : public testing::WithParamInterface, + public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); + +protected: + void SetUp() override; + void TearDown() override; +}; + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp index 016dc26cccdfc5..aca76c4e9e65ab 100644 --- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp +++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp @@ -18,6 +18,8 @@ #include "openvino/pass/serialize.hpp" #include "transformations/convert_precision.hpp" +#include "template/properties.hpp" + #include "common_test_utils/graph_comparator.hpp" #include "ov_models/utils/ov_helpers.hpp" @@ -72,18 +74,7 @@ void SubgraphBaseTest::run() { try { compile_model(); for (const auto& targetStaticShapeVec : targetStaticShapes) { - try { - if (!inputDynamicShapes.empty()) { - // resize ngraph function according new target shape - // Note: output shapes of some nodes depend on the input data - // so for some tests we need to override this function and replace parameter with constant node to get correct output shapes - init_ref_function(functionRefs, targetStaticShapeVec); - } - generate_inputs(targetStaticShapeVec); - } catch (const std::exception& ex) { - throw std::runtime_error("[IE TEST INFRA] Impossible to reshape ov::Model using the shape: " + - ov::test::utils::vec2str(targetStaticShapeVec) + " " + ex.what()); - } + generate_inputs(targetStaticShapeVec); validate(); } status = ov::test::utils::PassRate::Statuses::PASSED; @@ -208,9 +199,6 @@ void SubgraphBaseTest::compile_model() { auto start_time = std::chrono::system_clock::now(); configure_model(); - if (functionRefs == nullptr) { - functionRefs = function->clone(); - } core_configuration(this); compiledModel = core->compile_model(function, targetDevice, configuration); if (is_report_stages) { @@ -220,10 +208,6 @@ void SubgraphBaseTest::compile_model() { } } -void SubgraphBaseTest::init_ref_function(std::shared_ptr &funcRef, const std::vector& targetInputStaticShapes) { - ngraph::helpers::resize_function(funcRef, targetInputStaticShapes); -} - void SubgraphBaseTest::generate_inputs(const std::vector& targetInputStaticShapes) { inputs.clear(); auto inputMap = utils::getInputMap(); @@ -255,44 +239,21 @@ void SubgraphBaseTest::infer() { inferRequest.infer(); } -precisions_map SubgraphBaseTest::get_ref_precisions_convert_map() { - //TODO: remove this conversions as soon as function interpreter fully support bf16 and f16 - precisions_map precisions = { - { ngraph::element::bf16, ngraph::element::f32 } - }; - - auto convert_added = false; - for (const auto ¶m : function->get_parameters()) { - for (size_t i = 0; i < param->get_output_size(); i++) { - for (const auto &node : param->get_output_target_inputs(i)) { - std::shared_ptr nodePtr = node.get_node()->shared_from_this(); - if (std::dynamic_pointer_cast(nodePtr)) { - convert_added = true; - break; - } - } - } - } - - if (!convert_added) { - precisions.insert({ ngraph::element::f16, ngraph::element::f32}); +void SubgraphBaseTest::update_ref_model() { + if (functionRefs == nullptr) { + functionRefs = function->clone(); } - - return precisions; -} - -std::vector SubgraphBaseTest::calculate_refs() { using InputsMap = std::map, ov::Tensor>; - auto functionToProcess = functionRefs->clone(); - precisions_map convert_precisions = get_ref_precisions_convert_map(); - pass::Manager manager; - manager.register_pass(convert_precisions, type_to_fuse_map{}, false, false); - manager.run_passes(functionToProcess); - functionToProcess->validate_nodes_and_infer_types(); + if (!convert_precisions.empty()) { + pass::Manager manager; + manager.register_pass(convert_precisions, type_to_fuse_map{}, false, false); + manager.run_passes(functionRefs); + functionRefs->validate_nodes_and_infer_types(); + } - ov::preprocess::PrePostProcessor p(functionToProcess); - const auto& inputNodes = functionToProcess->inputs(); + ov::preprocess::PrePostProcessor p(functionRefs); + const auto& inputNodes = functionRefs->inputs(); for (size_t i = 0; i < inputNodes.size(); ++i) { auto itr = std::find_if(inputs.begin(), inputs.end(), [&](const InputsMap::value_type& item) { @@ -310,18 +271,80 @@ std::vector SubgraphBaseTest::calculate_refs() { throw std::runtime_error(errMsg.str()); } } - - const auto& outputs = functionToProcess->outputs(); + const auto& outputs = functionRefs->outputs(); for (size_t i = 0; i < outputs.size(); ++i) { if (outType != ElementType::undefined && outType != outputs[i].get_element_type()) { p.output(i).tensor().set_element_type(outType); } } + functionRefs = p.build(); +} + +void SubgraphBaseTest::match_parameters() { + matched_parameters.clear(); + const auto& ref_params = functionRefs->get_parameters(); + const auto& params = function->get_parameters(); + size_t param_size = params.size(), ref_param_size = ref_params.size(); + if (params.size() < ref_params.size()) { + throw std::runtime_error("Incompatible parameters in original and reference model!"); + } + if (params.size() == ref_params.size()) { + for (size_t in_idx = 0; in_idx < params.size(); ++in_idx) { + matched_parameters.insert({ ref_params[in_idx], params[in_idx] }); + } + } else { + auto it = params.begin(); + auto it_ref = ref_params.begin(); + while (it_ref != ref_params.end() && it != params.end()) { + bool is_match_in = true; + if ((*it_ref)->get_output_partial_shape(0).is_static()) { + if (inputs.at(*it).get_shape() != (*it_ref)->get_output_shape(0)) { + is_match_in = false; + } + } else if ((*it)->get_output_partial_shape(0) != (*it_ref)->get_output_partial_shape(0)) { + is_match_in = false; + } + if ((*it)->get_output_element_type(0) != ((*it_ref)->get_output_element_type(0))) { + is_match_in = false; + } + if (is_match_in) { + matched_parameters.insert({ *it_ref, *it }); + ++it_ref; + } + ++it; + } + if (matched_parameters.size() != ref_params.size()) { + throw std::runtime_error("Incompatible parameters in original and reference model!"); + } + } +} + +std::vector SubgraphBaseTest::calculate_refs() { + if (is_report_stages) { + std::cout << "[ REFERENCE ] `SubgraphBaseTest::calculate_refs()` is started"<< std::endl; + } + auto start_time = std::chrono::system_clock::now(); - functionToProcess = p.build(); + update_ref_model(); + match_parameters(); - auto results = ngraph::helpers::interpretFunction(functionToProcess, inputs); - return results; + auto compiledModelRef = core->compile_model(functionRefs, ov::test::utils::DEVICE_TEMPLATE, {{ ov::template_plugin::disable_transformations(true) }}); + auto inferRequestRef = compiledModelRef.create_infer_request(); + for (const auto& param : functionRefs->get_parameters()) { + inferRequestRef.set_tensor(param->get_default_output(), inputs.at(matched_parameters[param])); + } + inferRequestRef.infer(); + + auto outputs = std::vector{}; + for (const auto& output : functionRefs->outputs()) { + outputs.push_back(inferRequestRef.get_tensor(output)); + } + if (is_report_stages) { + auto end_time = std::chrono::system_clock::now(); + std::chrono::duration duration = end_time - start_time; + std::cout << "[ REFERENCE ] `SubgraphBaseTest::calculate_refs()` is finished successfully. Duration is " << duration.count() << "s" << std::endl; + } + return outputs; } std::vector SubgraphBaseTest::get_plugin_outputs() { @@ -361,7 +384,7 @@ void SubgraphBaseTest::validate() { } ASSERT_EQ(actualOutputs.size(), expectedOutputs.size()) - << "nGraph interpreter has " << expectedOutputs.size() << " outputs, while IE " << actualOutputs.size(); + << "TEMPLATE plugin has " << expectedOutputs.size() << " outputs, while " << targetDevice << " " << actualOutputs.size(); if (is_report_stages) { std::cout << "[ COMPARATION ] `ov_tensor_utils.hpp::compare()` is started"<< std::endl; } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/nms_rotated.cpp b/src/tests/functional/shared_test_classes/src/single_layer/nms_rotated.cpp new file mode 100644 index 00000000000000..80e6cc98db203f --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_layer/nms_rotated.cpp @@ -0,0 +1,230 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_layer/nms_rotated.hpp" +#include "openvino/op/nms_rotated.hpp" + +#include + +namespace LayerTestsDefinitions { + +using namespace InferenceEngine; +using namespace FuncTestUtils::PrecisionUtils; + +std::string NmsRotatedLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + InputShapeParams inShapeParams; + InputPrecisions inPrecisions; + int32_t maxOutBoxesPerClass; + float iouThr, scoreThr; + bool sortResDescend, clockwise; + ov::element::Type outType; + std::string targetDevice; + std::tie(inShapeParams, + inPrecisions, + maxOutBoxesPerClass, + iouThr, + scoreThr, + sortResDescend, + outType, + clockwise, + targetDevice) = obj.param; + + size_t numBatches, numBoxes, numClasses; + std::tie(numBatches, numBoxes, numClasses) = inShapeParams; + + Precision inputPrec, maxBoxPrec, thrPrec; + std::tie(inputPrec, maxBoxPrec, thrPrec) = inPrecisions; + + std::ostringstream result; + result << "numBatches=" << numBatches << "_numBoxes=" << numBoxes << "_numClasses=" << numClasses << "_"; + result << "inputPrec=" << inputPrec << "_maxBoxPrec=" << maxBoxPrec << "_thrPrec=" << thrPrec << "_"; + result << "maxOutBoxesPerClass=" << maxOutBoxesPerClass << "_"; + result << "iouThr=" << iouThr << "_scoreThr=" << scoreThr << "_"; + result << "sortResDescend=" << sortResDescend << "_outType=" << outType << "_"; + result << "clockwise=" << clockwise << "_"; + result << "TargetDevice=" << targetDevice; + return result.str(); +} + +void NmsRotatedLayerTest::GenerateInputs() { + size_t it = 0; + for (const auto& input : cnnNetwork.getInputsInfo()) { + const auto& info = input.second; + Blob::Ptr blob; + + if (it == 1) { + blob = make_blob_with_precision(info->getTensorDesc()); + blob->allocate(); + if (info->getTensorDesc().getPrecision() == Precision::FP32) { + ov::test::utils::fill_data_random_float(blob, 1, 0, 1000); + } else { + ov::test::utils::fill_data_random_float(blob, 1, 0, 1000); + } + } else { + blob = GenerateInput(*info); + } + inputs.push_back(blob); + it++; + } +} + +void NmsRotatedLayerTest::Compare( + const std::vector>>& expectedOutputs, + const std::vector& actualOutputs) { + size_t num_batches, num_boxes, num_classes; + std::tie(num_batches, num_boxes, num_classes) = inShapeParams; + + struct OutBox { + OutBox() = default; + + OutBox(int32_t batchId, int32_t classId, int32_t boxId, float score) { + this->batchId = batchId; + this->classId = classId; + this->boxId = boxId; + this->score = score; + } + + bool operator==(const OutBox& rhs) const { + return batchId == rhs.batchId && classId == rhs.classId && boxId == rhs.boxId; + } + + int32_t batchId; + int32_t classId; + int32_t boxId; + float score; + }; + + std::vector expected; + { + const auto selected_indices_size = expectedOutputs[0].second.size() / expectedOutputs[0].first.size(); + const auto selected_scores_size = expectedOutputs[1].second.size() / expectedOutputs[1].first.size(); + + ASSERT_EQ(selected_indices_size, selected_scores_size); + + const auto boxes_count = selected_indices_size / 3; + expected.resize(boxes_count); + + if (expectedOutputs[0].first.size() == 4) { + auto selected_indices_data = reinterpret_cast(expectedOutputs[0].second.data()); + + for (size_t i = 0; i < selected_indices_size; i += 3) { + expected[i / 3].batchId = selected_indices_data[i + 0]; + expected[i / 3].classId = selected_indices_data[i + 1]; + expected[i / 3].boxId = selected_indices_data[i + 2]; + } + } else { + auto selected_indices_data = reinterpret_cast(expectedOutputs[0].second.data()); + + for (size_t i = 0; i < selected_indices_size; i += 3) { + expected[i / 3].batchId = static_cast(selected_indices_data[i + 0]); + expected[i / 3].classId = static_cast(selected_indices_data[i + 1]); + expected[i / 3].boxId = static_cast(selected_indices_data[i + 2]); + } + } + + if (expectedOutputs[1].first.size() == 4) { + auto selected_scores_data = reinterpret_cast(expectedOutputs[1].second.data()); + for (size_t i = 0; i < selected_scores_size; i += 3) { + expected[i / 3].score = selected_scores_data[i + 2]; + } + } else { + auto selected_scores_data = reinterpret_cast(expectedOutputs[1].second.data()); + for (size_t i = 0; i < selected_scores_size; i += 3) { + expected[i / 3].score = static_cast(selected_scores_data[i + 2]); + } + } + } + + std::vector actual; + { + const auto selected_indices_size = actualOutputs[0]->byteSize() / sizeof(float); + const auto selected_indices_memory = as(actualOutputs[0]); + IE_ASSERT(selected_indices_memory); + const auto selected_indices_lockedMemory = selected_indices_memory->rmap(); + const auto selected_indices_data = selected_indices_lockedMemory.as(); + + const auto selected_scores_memory = as(actualOutputs[1]); + IE_ASSERT(selected_scores_memory); + const auto selected_scores_lockedMemory = selected_scores_memory->rmap(); + const auto selected_scores_data = selected_scores_lockedMemory.as(); + + for (size_t i = 0; i < selected_indices_size; i += 3) { + const int32_t batchId = selected_indices_data[i + 0]; + const int32_t classId = selected_indices_data[i + 1]; + const int32_t boxId = selected_indices_data[i + 2]; + const float score = selected_scores_data[i + 2]; + if (batchId == -1 || classId == -1 || boxId == -1) + break; + + actual.emplace_back(batchId, classId, boxId, score); + } + } + + ASSERT_EQ(expected.size(), actual.size()); + for (size_t i = 0; i < expected.size(); ++i) { + ASSERT_EQ(expected[i], actual[i]) << ", i=" << i; + ASSERT_NEAR(expected[i].score, actual[i].score, abs_threshold) << ", i=" << i; + } +} + +void NmsRotatedLayerTest::SetUp() { + InputPrecisions inPrecisions; + size_t maxOutBoxesPerClass; + float iouThr, scoreThr; + bool sortResDescend, clockwise; + ov::element::Type outType; + std::tie(inShapeParams, + inPrecisions, + maxOutBoxesPerClass, + iouThr, + scoreThr, + sortResDescend, + outType, + clockwise, + targetDevice) = this->GetParam(); + + size_t numBatches, numBoxes, numClasses; + std::tie(numBatches, numBoxes, numClasses) = inShapeParams; + + Precision inputPrec, maxBoxPrec, thrPrec; + std::tie(inputPrec, maxBoxPrec, thrPrec) = inPrecisions; + + if (inputPrec == Precision::FP16) { + abs_threshold = 0.1; + } else { + abs_threshold = std::numeric_limits::epsilon(); + } + + ov::ParameterVector params; + + const std::vector boxesShape{numBatches, numBoxes, 5}, scoresShape{numBatches, numClasses, numBoxes}; + const auto ngPrc = convertIE2nGraphPrc(inputPrec); + + const auto boxesNode = std::make_shared(ngPrc, ov::Shape(boxesShape)); + params.push_back(boxesNode); + const auto scoresNode = std::make_shared(ngPrc, ov::Shape(scoresShape)); + params.push_back(scoresNode); + + const auto maxOutputBoxesPerClassNode = std::make_shared(ov::element::Type_t::u32, + ov::Shape{}, + std::vector{maxOutBoxesPerClass}); + const auto iouThresholdNode = std::make_shared(ov::element::Type_t::f32, + ov::Shape{}, + std::vector{iouThr}); + const auto scoreTresholdNode = std::make_shared(ov::element::Type_t::f32, + ov::Shape{}, + std::vector{scoreThr}); + + const auto nmsNode = std::make_shared(params[0], + params[1], + maxOutputBoxesPerClassNode, + iouThresholdNode, + scoreTresholdNode, + sortResDescend, + outType, + clockwise); + + function = std::make_shared(nmsNode, params, "NMS"); +} +} // namespace LayerTestsDefinitions diff --git a/src/tests/functional/shared_test_classes/src/subgraph/transpose_matmul_fusion.cpp b/src/tests/functional/shared_test_classes/src/subgraph/transpose_matmul_fusion.cpp new file mode 100644 index 00000000000000..dc95fe704400f9 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/subgraph/transpose_matmul_fusion.cpp @@ -0,0 +1,51 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/subgraph/transpose_matmul_fusion.hpp" + +namespace ov { +namespace test { + +std::string TransposeMatMulFusion::getTestCaseName(const testing::TestParamInfo &obj) { + return "device=" + std::string(obj.param); +} + +void TransposeMatMulFusion::SetUp() { + targetDevice = GetParam(); + + ov::PartialShape shape1{1, 3, 128, 64}; + ov::PartialShape shape2{1, 3, 64, 128}; + + InputShape input_shape1 = {shape1, {Shape{1, 3, 128, 64}}}; + InputShape input_shape2 = {shape2, {Shape{1, 3, 64, 128}}}; + init_input_shapes({input_shape1, input_shape2}); + + const auto param1 = std::make_shared(ov::element::f32, shape1); + const auto param2 = std::make_shared(ov::element::f32, shape2); + const auto order = ov::op::v0::Constant::create(ov::element::i32, Shape{4}, {0, 1, 3, 2}); + const auto transpose1 = std::make_shared(param1, order); + const auto transpose2 = std::make_shared(param2, order); + const auto matmul = std::make_shared(transpose1, transpose2, false, false); + const auto constant = op::v0::Constant::create(element::f32, Shape{1}, {9}); + const auto mul = std::make_shared(matmul, constant); + function = std::make_shared(mul, ov::ParameterVector{param1, param2}); +} + +void TransposeMatMulFusion::TearDown() { + const auto model = compiledModel.get_runtime_model(); + + int num_ops = 0; + for (const auto& node : model->get_ordered_ops()) { + const auto& rt_info = node->get_rt_info(); + const auto layer_type = rt_info.find("layerType")->second.as(); + if (layer_type != "Reorder" && layer_type != "Const") + num_ops++; + EXPECT_NE(layer_type, "Transpose"); + EXPECT_NE(layer_type, "Permute"); + } + ASSERT_EQ(num_ops, 5); // two Inputs, one Eltwise, one MatMul and one Output +} + +} // namespace test +} // namespace ov diff --git a/src/tests/ov_helpers/ov_models/include/ov_models/utils/ov_helpers.hpp b/src/tests/ov_helpers/ov_models/include/ov_models/utils/ov_helpers.hpp index 5d0f3cd4ac7d0b..862ff798efcf30 100644 --- a/src/tests/ov_helpers/ov_models/include/ov_models/utils/ov_helpers.hpp +++ b/src/tests/ov_helpers/ov_models/include/ov_models/utils/ov_helpers.hpp @@ -163,6 +163,7 @@ std::vector convertOutputPrecision(const std::vector std::ostream& operator<<(std::ostream& os, MemoryTransformation type); +// todo: remove the following function from the source code after cleaning up VPU repo void resize_function(std::shared_ptr function, const std::vector& targetInputStaticShapes); using ov::test::utils::operator<<; diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/graph_comparator.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/graph_comparator.hpp index cb90c0699a126d..22373d55292d2a 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/graph_comparator.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/graph_comparator.hpp @@ -14,6 +14,7 @@ #include "openvino/op/loop.hpp" #include "openvino/op/util/framework_node.hpp" #include "openvino/op/util/sub_graph_base.hpp" +#include "openvino/runtime/aligned_buffer.hpp" class FunctionsComparator { public: @@ -945,9 +946,7 @@ class ReadAndCompareAttributes : public ov::AttributeVisitor { template void verify(const std::string& name, const AttrValue& attr_value); - OPENVINO_SUPPRESS_DEPRECATED_START - void verify_mem_buf(const std::string& name, const std::shared_ptr& buffer); - OPENVINO_SUPPRESS_DEPRECATED_END + void verify_mem_buf(const std::string& name, const std::shared_ptr& buffer); using ModelAccessor = ov::ValueAccessor>; diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp index 70778266d8f1b2..c8026f4ef2d7e0 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp @@ -23,6 +23,8 @@ const char API_REPORT_FILENAME[] = "report_api"; const char REPORT_EXTENSION[] = ".xml"; const char LST_EXTENSION[] = ".lst"; +const char TEMPLATE_LIB[] = "openvino_template_plugin"; + const char DEVICE_SUFFIX_SEPARATOR = '.'; const unsigned int maxFileNameLength = 140; diff --git a/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp b/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp index e3c5a8b2ec1a55..f6cc70ed10a625 100644 --- a/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp +++ b/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp @@ -895,7 +895,6 @@ void check_rt_info(const std::shared_ptr& f) { namespace attributes { namespace detail { -OPENVINO_SUPPRESS_DEPRECATED_START void ReadAndStoreAttributes::on_adapter(const std::string& name, ov::ValueAccessor& adapter) { if (auto inputs = ov::as_type>(&adapter)) { insert(name, inputs->get()); @@ -904,7 +903,7 @@ void ReadAndStoreAttributes::on_adapter(const std::string& name, ov::ValueAccess } else if (ov::is_type>(&adapter)) { // drop comparison, no more info than port indexes which will be check in // subgraph::compare_io - } else if (auto a = ov::as_type>>(&adapter)) { + } else if (auto a = ov::as_type>>(&adapter)) { const auto beg = static_cast(a->get()->get_ptr()); const auto end = beg + a->get()->size(); insert(name, storage::MemoryChunk{storage::MemoryChunk::Data(beg, end)}); @@ -923,7 +922,6 @@ void ReadAndStoreAttributes::on_adapter(const std::string& name, ov::ValueAccess adapter.get_type_info().name + "']"; } } -OPENVINO_SUPPRESS_DEPRECATED_END template void ReadAndCompareAttributes::verify(const std::string& name, const AttrValue& attr_value) { if (should_return()) { @@ -942,9 +940,8 @@ void ReadAndCompareAttributes::verify(const std::string& name, const AttrValue& } } -OPENVINO_SUPPRESS_DEPRECATED_START void ReadAndCompareAttributes::verify_mem_buf(const std::string& name, - const std::shared_ptr& buffer) { + const std::shared_ptr& buffer) { if (should_return()) { return; } @@ -961,7 +958,6 @@ void ReadAndCompareAttributes::verify_mem_buf(const std::string& name, return; } } -OPENVINO_SUPPRESS_DEPRECATED_END void ReadAndCompareAttributes::verify_function(const std::string& name, ModelAccessor& adapter) { if (should_return()) { @@ -980,7 +976,6 @@ void ReadAndCompareAttributes::verify_function(const std::string& name, ModelAcc } } -OPENVINO_SUPPRESS_DEPRECATED_START void ReadAndCompareAttributes::verify_others(const std::string& name, ov::ValueAccessor& adapter) { if (auto inputs = ov::as_type>(&adapter)) { verify(name, inputs->get()); @@ -989,7 +984,7 @@ void ReadAndCompareAttributes::verify_others(const std::string& name, ov::ValueA } else if (ov::is_type>(&adapter)) { // drop comparison, no more info than port indexes which will be check in // subgraph::compare_io - } else if (auto a = ov::as_type>>(&adapter)) { + } else if (auto a = ov::as_type>>(&adapter)) { verify_mem_buf(name, a->get()); } else if (auto attrs = ov::as_type>(&adapter)) { verify(name, attrs->get()); @@ -1005,7 +1000,6 @@ void ReadAndCompareAttributes::verify_others(const std::string& name, ov::ValueA adapter.get_type_info().name + "']"; } } -OPENVINO_SUPPRESS_DEPRECATED_END } // namespace detail @@ -1042,7 +1036,6 @@ AccuracyCheckResult accuracy_check(const std::shared_ptr& ref_functio auto ref_outputs = ngraph::helpers::interpretFunction(ref_function, ref_input_data); auto outputs = ngraph::helpers::interpretFunction(cur_function, cur_input_data); - IE_ASSERT(ref_outputs.size() == outputs.size()); for (int i = 0; i < ref_outputs.size(); i++) { diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp index df4377d5cf9ad4..ef76694caf9691 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp @@ -4,45 +4,95 @@ #pragma once +#include "openvino/op/ops.hpp" #include "openvino/openvino.hpp" namespace ov { namespace test { namespace functional { +// {{ type_info, real_version }} +const std::map not_aligned_op_version = { + // opset 1 + {ov::op::v0::Abs::get_type_info_static(), 0}, + {ov::op::v0::Acos::get_type_info_static(), 0}, + {ov::op::v0::Asin::get_type_info_static(), 0}, + {ov::op::v0::Atan::get_type_info_static(), 0}, + {ov::op::v0::BatchNormInference::get_type_info_static(), 0}, + {ov::op::v0::CTCGreedyDecoder::get_type_info_static(), 0}, + {ov::op::v0::Ceiling::get_type_info_static(), 0}, + {ov::op::v0::Clamp::get_type_info_static(), 0}, + {ov::op::v0::Concat::get_type_info_static(), 0}, + {ov::op::v0::Constant::get_type_info_static(), 0}, + {ov::op::v0::Convert::get_type_info_static(), 0}, + {ov::op::v0::Cos::get_type_info_static(), 0}, + {ov::op::v0::Cosh::get_type_info_static(), 0}, + {ov::op::v0::DepthToSpace::get_type_info_static(), 0}, + {ov::op::v0::DetectionOutput::get_type_info_static(), 0}, + {ov::op::v0::Elu::get_type_info_static(), 0}, + {ov::op::v0::Erf::get_type_info_static(), 0}, + {ov::op::v0::Exp::get_type_info_static(), 0}, + {ov::op::v0::FakeQuantize::get_type_info_static(), 0}, + {ov::op::v0::Floor::get_type_info_static(), 0}, + {ov::op::v0::GRN::get_type_info_static(), 0}, + {ov::op::v0::HardSigmoid::get_type_info_static(), 0}, + {ov::op::v0::Interpolate::get_type_info_static(), 0}, + {ov::op::v0::Log::get_type_info_static(), 0}, + {ov::op::v0::LRN::get_type_info_static(), 0}, + {ov::op::v0::LSTMCell::get_type_info_static(), 0}, + {ov::op::v0::LSTMSequence::get_type_info_static(), 0}, + {ov::op::v0::MatMul::get_type_info_static(), 0}, + {ov::op::v0::Negative::get_type_info_static(), 0}, + {ov::op::v0::NormalizeL2::get_type_info_static(), 0}, + {ov::op::v0::PRelu::get_type_info_static(), 0}, + {ov::op::v0::PSROIPooling::get_type_info_static(), 0}, + {ov::op::v0::Parameter::get_type_info_static(), 0}, + {ov::op::v0::PriorBox::get_type_info_static(), 0}, + {ov::op::v0::PriorBoxClustered::get_type_info_static(), 0}, + {ov::op::v0::Proposal::get_type_info_static(), 0}, + {ov::op::v0::Range::get_type_info_static(), 0}, + {ov::op::v0::Relu::get_type_info_static(), 0}, + {ov::op::v0::RegionYolo::get_type_info_static(), 0}, + {ov::op::v0::Result::get_type_info_static(), 0}, + {ov::op::v0::ReverseSequence::get_type_info_static(), 0}, + {ov::op::v0::RNNCell::get_type_info_static(), 0}, + {ov::op::v0::Selu::get_type_info_static(), 0}, + {ov::op::v0::ShapeOf::get_type_info_static(), 0}, + {ov::op::v0::ShuffleChannels::get_type_info_static(), 0}, + {ov::op::v0::Sign::get_type_info_static(), 0}, + {ov::op::v0::Sigmoid::get_type_info_static(), 0}, + {ov::op::v0::Sin::get_type_info_static(), 0}, + {ov::op::v0::Sinh::get_type_info_static(), 0}, + {ov::op::v0::Sqrt::get_type_info_static(), 0}, + {ov::op::v0::SpaceToDepth::get_type_info_static(), 0}, + {ov::op::v0::SquaredDifference::get_type_info_static(), 0}, + {ov::op::v0::Squeeze::get_type_info_static(), 0}, + {ov::op::v0::Tan::get_type_info_static(), 0}, + {ov::op::v0::Tanh::get_type_info_static(), 0}, + {ov::op::v0::TensorIterator::get_type_info_static(), 0}, + {ov::op::v0::Tile::get_type_info_static(), 0}, + {ov::op::v0::Unsqueeze::get_type_info_static(), 0}, + {ov::op::v0::Xor::get_type_info_static(), 0}, + // opset 2 + {ov::op::v0::MVN::get_type_info_static(), 0}, + {ov::op::v0::ReorgYolo::get_type_info_static(), 0}, + {ov::op::v0::ROIPooling::get_type_info_static(), 0}, + {ov::op::v0::Gelu::get_type_info_static(), 0}, + {ov::op::v1::BatchToSpace::get_type_info_static(), 1}, + {ov::op::v1::SpaceToBatch::get_type_info_static(), 1}, + // opset 3 + {ov::op::v0::RNNCell::get_type_info_static(), 0}, + {ov::op::v0::ShuffleChannels::get_type_info_static(), 0}, + // opset 4 + {ov::op::v3::Acosh::get_type_info_static(), 3}, + {ov::op::v3::Asinh::get_type_info_static(), 3}, + {ov::op::v3::Atanh::get_type_info_static(), 3}, +}; + // todo: reuse in summary std::string get_node_version(const std::shared_ptr& node, const std::string& postfix = ""); +std::string get_node_version(const ov::NodeTypeInfo& node_type_info); } // namespace functional } // namespace test } // namespace ov - -// todo: remove these structure after remove old subgraphs dumper -namespace LayerTestsUtils { - -struct ModelInfo { - size_t unique_op_cnt; - // model_path, op_cnt - std::map model_paths; - - ModelInfo(size_t _op_cnt = 0, const std::map& _model_paths = {{}}); -}; - -struct PortInfo { - double min; - double max; - bool convert_to_const; - - PortInfo(double min, double max, bool convert_to_const); - PortInfo(); -}; - -struct OPInfo { - std::map found_in_models; - std::map ports_info; - - OPInfo(const std::string& source_model, const std::string& model_path, size_t total_op_cnt = 0); - - OPInfo() = default; -}; -} // namespace LayerTestsUtils diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_summary.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_summary.hpp index cc97fb45cd6949..44cf995f7184c8 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_summary.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_summary.hpp @@ -32,7 +32,7 @@ class OpSummary : public virtual Summary { std::map opsStats = {}; unsigned short int downgrade_coefficient; - std::string getOpVersion(const std::string& version); + std::string get_opset_number(const std::string& opset_full_name); protected: OpSummary(unsigned short int downgrade_coefficient = 1); diff --git a/src/tests/test_utils/functional_test_utils/src/ov_plugin_cache.cpp b/src/tests/test_utils/functional_test_utils/src/ov_plugin_cache.cpp index 118368f7a1180f..483b9b98c38db2 100644 --- a/src/tests/test_utils/functional_test_utils/src/ov_plugin_cache.cpp +++ b/src/tests/test_utils/functional_test_utils/src/ov_plugin_cache.cpp @@ -55,21 +55,20 @@ std::shared_ptr PluginCache::core(const std::string& deviceToCheck) { } assert(0 != ov_core.use_count()); - // register template plugin if it is needed - try { - std::string pluginName = "openvino_template_plugin"; - pluginName += OV_BUILD_POSTFIX; + // Register Template plugin as a reference provider + const auto devices = ov_core->get_available_devices(); + if (std::find(devices.begin(), devices.end(), std::string(ov::test::utils::DEVICE_TEMPLATE)) == devices.end()) { ov_core->register_plugin( - ov::util::make_plugin_library_name(ov::test::utils::getExecutableDirectory(), pluginName), - "TEMPLATE"); - } catch (...) { + ov::util::make_plugin_library_name(ov::test::utils::getExecutableDirectory(), + std::string(ov::test::utils::TEMPLATE_LIB) + OV_BUILD_POSTFIX), + ov::test::utils::DEVICE_TEMPLATE); } if (!deviceToCheck.empty()) { auto properties = ov_core->get_property(deviceToCheck, ov::supported_properties); if (std::find(properties.begin(), properties.end(), ov::available_devices) != properties.end()) { - auto availableDevices = ov_core->get_property(deviceToCheck, ov::available_devices); + const auto availableDevices = ov_core->get_property(deviceToCheck, ov::available_devices); if (availableDevices.empty()) { std::cerr << "No available devices for " << deviceToCheck << std::endl; diff --git a/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp b/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp index 0082cba61a343f..358be2d424b177 100644 --- a/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp +++ b/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp @@ -8,45 +8,30 @@ namespace ov { namespace test { namespace functional { -// todo: reuse in summary std::string get_node_version(const std::shared_ptr& node, const std::string& postfix) { - std::string op_name = node->get_type_info().name; - std::string opset_version = node->get_type_info().get_version(); - std::string opset_name = "opset"; - auto pos = opset_version.find(opset_name); - if (pos != std::string::npos) { - op_name += "-" + opset_version.substr(pos + opset_name.size()); - } + const auto& node_type_info = node->get_type_info(); + auto op_name = get_node_version(node_type_info); if (!postfix.empty()) { op_name += "_" + postfix; } return op_name; } +std::string get_node_version(const ov::NodeTypeInfo& node_type_info) { + std::string op_name = node_type_info.name + std::string("-"); + std::string opset_version = node_type_info.get_version(); + if (not_aligned_op_version.count(node_type_info)) { + op_name += std::to_string(not_aligned_op_version.at(node_type_info)); + } else { + std::string opset_name = "opset"; + auto pos = opset_version.find(opset_name); + if (pos != std::string::npos) { + op_name += opset_version.substr(pos + opset_name.size()); + } + } + return op_name; +} + } // namespace functional } // namespace test } // namespace ov - -namespace LayerTestsUtils { - -ModelInfo::ModelInfo(size_t _op_cnt, const std::map& _model_paths) - : unique_op_cnt(_op_cnt), - model_paths(_model_paths) {} - -PortInfo::PortInfo(double min, double max, bool convert_to_const) - : min(min), - max(max), - convert_to_const(convert_to_const) {} - -PortInfo::PortInfo() { - min = std::numeric_limits::min(); - max = std::numeric_limits::max(); - convert_to_const = false; -} - -OPInfo::OPInfo(const std::string& source_model, const std::string& model_path, size_t total_op_cnt) { - found_in_models = {{source_model, ModelInfo(1, {{model_path, total_op_cnt}})}}; - ports_info = {}; -} - -} // namespace LayerTestsUtils diff --git a/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp b/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp index b895d14ab26ca6..fbaaf4e16629c3 100644 --- a/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp +++ b/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp @@ -8,6 +8,7 @@ #include #include "common_test_utils/file_utils.hpp" +#include "functional_test_utils/summary/op_info.hpp" using namespace ov::test::utils; @@ -109,13 +110,13 @@ void OpSummary::updateOPsImplStatus(const ov::NodeTypeInfo& op, const bool implS } } -std::string OpSummary::getOpVersion(const std::string& version) { +std::string OpSummary::get_opset_number(const std::string& opset_full_name) { std::string opset_name = "opset"; - auto pos = version.find(opset_name); + auto pos = opset_full_name.find(opset_name); if (pos == std::string::npos) { return "undefined"; } else { - return version.substr(pos + opset_name.size()); + return opset_full_name.substr(pos + opset_name.size()); } } @@ -259,7 +260,7 @@ void OpSummary::saveReport() { const auto& type_info_set = opset.get_type_info_set(); for (const auto& type_info : type_info_set) { auto it = opsInfo.find(type_info); - std::string op_version = getOpVersion(opset_version); + std::string op_version = get_opset_number(opset_version); if (it == opsInfo.end()) { opsInfo.insert({type_info, op_version}); } else { @@ -304,7 +305,7 @@ void OpSummary::saveReport() { pugi::xml_node opsNode = root.append_child("ops_list"); for (const auto& op : opsInfo) { - std::string name = std::string(op.first.name) + "-" + getOpVersion(op.first.version_id); + std::string name = functional::get_node_version(op.first); opsNode.append_child(name.c_str()).append_attribute("opsets").set_value(op.second.c_str()); } @@ -315,7 +316,7 @@ void OpSummary::saveReport() { it.second.rel_passed /= downgrade_coefficient; it.second.rel_all /= downgrade_coefficient; - std::string name = std::string(it.first.name) + "-" + getOpVersion(it.first.version_id); + std::string name = functional::get_node_version(it.first); opList.insert(name); pugi::xml_node entry = currentDeviceNode.append_child(name.c_str()); entry.append_attribute("implemented").set_value(it.second.isImplemented); diff --git a/tests/layer_tests/common/utils/tf_utils.py b/tests/layer_tests/common/utils/tf_utils.py index fb02c3f0a1b298..913048acf2e762 100644 --- a/tests/layer_tests/common/utils/tf_utils.py +++ b/tests/layer_tests/common/utils/tf_utils.py @@ -98,7 +98,7 @@ def summarize_graph(model_path, output_nodes_for_freeze=None, reshape_net=None): variables = list() outputs = list() graph = load_graph(model_path, output_nodes_for_freeze) - unlikely_output_types = ['Const', 'Assign', 'NoOp', 'Placeholder', 'Assert', 'switch_t', 'switch_f'] + unlikely_output_types = ['Const', 'Assign', 'NoOp', 'Placeholder', 'Assert', 'switch_t', 'switch_f', 'TensorArrayCloseV3'] control_dependents_map = collect_control_dependencies(graph) for node in graph.as_graph_def().node: if node.op == 'Placeholder': diff --git a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py index 6eab63bf682bd0..60bb92b85b5920 100644 --- a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py +++ b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import unittest +import platform from typing import Tuple import numpy as np @@ -283,8 +284,8 @@ def scripted_fn(x: torch.Tensor, y: torch.Tensor): return torch.sigmoid(torch.relu(x * y)) inp_shape = PartialShape([Dimension(1, -1), Dimension(-1, 5), 10]) - ref_model = make_ref_pt_model_two_inputs(inp_shape, dtype=Type.dynamic) - return scripted_fn, ref_model, {'input': [(inp_shape), (inp_shape)]} + ref_model = make_ref_pt_model_two_inputs(inp_shape) + return scripted_fn, ref_model, {'input': [(inp_shape, Type.f32), (inp_shape, Type.f32)]} def create_pytorch_nn_module_layout_list(tmp_dir): @@ -471,9 +472,9 @@ def create_pytorch_nn_module_scale_list_compression_enabled(tmp_dir): def create_pytorch_nn_module_shapes_list_static(tmp_dir): pt_model = make_pt_model_two_inputs() - ref_model = make_ref_pt_model_two_inputs([1, 3, 20, 20], dtype=Type.dynamic) + ref_model = make_ref_pt_model_two_inputs([1, 3, 20, 20]) - return pt_model, ref_model, {'input': [[1, 3, 20, 20], [1, 3, 20, 20]]} + return pt_model, ref_model, {'input': [([1, 3, 20, 20], Type.f32), ([1, 3, 20, 20], Type.f32)]} def create_pytorch_nn_module_shapes_list_static_via_input(tmp_dir): @@ -489,17 +490,16 @@ def create_pytorch_nn_module_shapes_list_dynamic(tmp_dir): [-1, 3, 20, Dimension(-1, 20)]] param1 = ov.opset8.parameter(PartialShape( - inp_shapes[0]), name="x", dtype=Type.dynamic) + inp_shapes[0]), name="x", dtype=Type.f32) param2 = ov.opset8.parameter(PartialShape( - inp_shapes[1]), name="y", dtype=Type.dynamic) - cl = ov.opset8.convert_like(param2, param1) - mul = ov.opset8.multiply(param1, cl) + inp_shapes[1]), name="y", dtype=Type.f32) + mul = ov.opset8.multiply(param1, param2) relu = ov.opset8.relu(mul) sigm = ov.opset8.sigmoid(relu) parameter_list = [param1, param2] ref_model = Model([sigm], parameter_list, "test") - return pt_model, ref_model, {'input': inp_shapes} + return pt_model, ref_model, {'input': [(inp_shapes[0], Type.f32), (inp_shapes[1], Type.f32)]} def create_pytorch_nn_module_shapes_list_dynamic_via_input(tmp_dir): @@ -522,8 +522,8 @@ def create_pytorch_nn_module_shapes_list_dynamic_via_input(tmp_dir): def create_pytorch_nn_module_shapes_list_dynamic_single_input(tmp_dir): pt_model = make_pt_model_one_input() - inp_shapes = [[Dimension(-1), 3, 20, Dimension(20, -1)]] - ref_model = make_ref_pt_model_one_input(inp_shapes[0], dtype=Type.dynamic) + inp_shapes = [[Dimension(-1), 3, 20, Dimension(20, -1)], Type.f32] + ref_model = make_ref_pt_model_one_input(inp_shapes[0]) return pt_model, ref_model, {'input': inp_shapes} @@ -536,8 +536,8 @@ def create_pytorch_nn_module_shapes_list_dynamic_single_input_via_input(tmp_dir) def create_pytorch_nn_module_shapes_list_static_single_input(tmp_dir): pt_model = make_pt_model_one_input() - inp_shapes = [[1, 3, 20, 20]] - ref_model = make_ref_pt_model_one_input(inp_shapes[0], dtype=Type.dynamic) + inp_shapes = [[1, 3, 20, 20], Type.f32] + ref_model = make_ref_pt_model_one_input(inp_shapes[0]) return pt_model, ref_model, {'input': inp_shapes} @@ -1236,6 +1236,8 @@ class TestPrecisionSensitive(): @pytest.mark.parametrize("create_model", test_data) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122714') def test_precision_sensitive(self, create_model, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): import numpy.testing as npt from pathlib import Path diff --git a/tests/layer_tests/onnx_tests/test_reduce_lp.py b/tests/layer_tests/onnx_tests/test_reduce_lp.py index e64929a680c20d..73cd86a2bbbc6f 100644 --- a/tests/layer_tests/onnx_tests/test_reduce_lp.py +++ b/tests/layer_tests/onnx_tests/test_reduce_lp.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest from common.layer_test_class import check_ir_version @@ -232,6 +234,8 @@ def create_reduce_lp_const(self, shape, axes, keep_dims, reduce_p, ir_version): @pytest.mark.parametrize("keep_dims", [True, False]) @pytest.mark.parametrize("reduce_p", [1, 2]) @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122846') def test_reduce_lp_precommit(self, params, keep_dims, reduce_p, ie_device, precision, ir_version, temp_dir, use_old_api): self._test(*self.create_reduce_lp(**params, keep_dims=keep_dims, reduce_p=reduce_p, diff --git a/tests/layer_tests/onnx_tests/test_roi_align.py b/tests/layer_tests/onnx_tests/test_roi_align.py index 13663808a3acd3..a29ddc4c1d1213 100644 --- a/tests/layer_tests/onnx_tests/test_roi_align.py +++ b/tests/layer_tests/onnx_tests/test_roi_align.py @@ -136,6 +136,8 @@ def create_net(self, input_shape, rois_shape, indices_shape, output_shape, @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.xfail(condition=platform.system() == 'Windows', reason="Ticket - 122731") + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122846') def test_roi_alignv10(self, params, ie_device, precision, ir_version, temp_dir, use_old_api): # TODO: ticket for investigating GPU failures: CVS-86300 if ie_device != "GPU": diff --git a/tests/layer_tests/ovc_python_api_tests/test_pytorch.py b/tests/layer_tests/ovc_python_api_tests/test_pytorch.py index 5ae2dcac31c83e..90db75d01c0e78 100644 --- a/tests/layer_tests/ovc_python_api_tests/test_pytorch.py +++ b/tests/layer_tests/ovc_python_api_tests/test_pytorch.py @@ -281,8 +281,8 @@ def scripted_fn(x: torch.Tensor, y: torch.Tensor): return torch.sigmoid(torch.relu(x * y)) inp_shape = PartialShape([Dimension(1, -1), Dimension(-1, 5), 10]) - ref_model = make_ref_pt_model_two_inputs(inp_shape, dtype=Type.dynamic) - return scripted_fn, ref_model, {'input': [(inp_shape), (inp_shape)]} + ref_model = make_ref_pt_model_two_inputs(inp_shape) + return scripted_fn, ref_model, {'input': [(inp_shape, Type.f32), (inp_shape, Type.f32)]} def create_pytorch_nn_module_layout_list(tmp_dir): @@ -469,9 +469,9 @@ def create_pytorch_nn_module_scale_list_compression_enabled(tmp_dir): def create_pytorch_nn_module_shapes_list_static(tmp_dir): pt_model = make_pt_model_two_inputs() - ref_model = make_ref_pt_model_two_inputs([1, 3, 20, 20], dtype=Type.dynamic) + ref_model = make_ref_pt_model_two_inputs([1, 3, 20, 20]) - return pt_model, ref_model, {'input': [[1, 3, 20, 20], [1, 3, 20, 20]]} + return pt_model, ref_model, {'input': [([1, 3, 20, 20], Type.f32), ([1, 3, 20, 20], Type.f32)]} def create_pytorch_nn_module_shapes_list_static_via_input(tmp_dir): @@ -487,17 +487,16 @@ def create_pytorch_nn_module_shapes_list_dynamic(tmp_dir): [-1, 3, 20, Dimension(-1, 20)]] param1 = ov.opset8.parameter(PartialShape( - inp_shapes[0]), name="x", dtype=Type.dynamic) + inp_shapes[0]), name="x", dtype=Type.f32) param2 = ov.opset8.parameter(PartialShape( - inp_shapes[1]), name="y", dtype=Type.dynamic) - cl = ov.opset8.convert_like(param2, param1) - mul = ov.opset8.multiply(param1, cl) + inp_shapes[1]), name="y", dtype=Type.f32) + mul = ov.opset8.multiply(param1, param2) relu = ov.opset8.relu(mul) sigm = ov.opset8.sigmoid(relu) parameter_list = [param1, param2] ref_model = Model([sigm], parameter_list, "test") - return pt_model, ref_model, {'input': inp_shapes} + return pt_model, ref_model, {'input': [(inp_shapes[0], Type.f32), (inp_shapes[1], Type.f32)]} def create_pytorch_nn_module_shapes_list_dynamic_via_input(tmp_dir): @@ -520,8 +519,8 @@ def create_pytorch_nn_module_shapes_list_dynamic_via_input(tmp_dir): def create_pytorch_nn_module_shapes_list_dynamic_single_input(tmp_dir): pt_model = make_pt_model_one_input() - inp_shapes = [[Dimension(-1), 3, 20, Dimension(20, -1)]] - ref_model = make_ref_pt_model_one_input(inp_shapes[0], dtype=Type.dynamic) + inp_shapes = [[Dimension(-1), 3, 20, Dimension(20, -1)], Type.f32] + ref_model = make_ref_pt_model_one_input(inp_shapes[0]) return pt_model, ref_model, {'input': inp_shapes} @@ -534,8 +533,8 @@ def create_pytorch_nn_module_shapes_list_dynamic_single_input_via_input(tmp_dir) def create_pytorch_nn_module_shapes_list_static_single_input(tmp_dir): pt_model = make_pt_model_one_input() - inp_shapes = [[1, 3, 20, 20]] - ref_model = make_ref_pt_model_one_input(inp_shapes[0], dtype=Type.dynamic) + inp_shapes = [[1, 3, 20, 20], Type.f32] + ref_model = make_ref_pt_model_one_input(inp_shapes[0]) return pt_model, ref_model, {'input': inp_shapes} diff --git a/tests/layer_tests/py_frontend_tests/test_torch_decoder.py b/tests/layer_tests/py_frontend_tests/test_torch_decoder.py index 6adbf0d033e98f..7e1758bd84d4fe 100644 --- a/tests/layer_tests/py_frontend_tests/test_torch_decoder.py +++ b/tests/layer_tests/py_frontend_tests/test_torch_decoder.py @@ -641,7 +641,7 @@ def f(x, y): @pytest.mark.precommit def test_pytorch_decoder_can_convert_scripted_function(): - from openvino.tools.mo import convert_model + from openvino import convert_model, Type scripted = torch.jit.script(f) - model = convert_model(scripted) + model = convert_model(scripted, input=[Type.f32, Type.f32]) assert model is not None diff --git a/tests/layer_tests/py_frontend_tests/test_torch_frontend.py b/tests/layer_tests/py_frontend_tests/test_torch_frontend.py index 6b4d748fc45632..f76c7b1fa97ba8 100644 --- a/tests/layer_tests/py_frontend_tests/test_torch_frontend.py +++ b/tests/layer_tests/py_frontend_tests/test_torch_frontend.py @@ -22,6 +22,9 @@ def forward(self, x): class aten_multi_input_output(torch.nn.Module): def forward(self, x, y, z): + x = x.to(torch.float32) + y = y.to(torch.float32) + z = z.to(torch.float32) return torch.nn.functional.relu(x), x * y, z / x diff --git a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py index 00cdc427056518..f8b726c4c5f66d 100644 --- a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py +++ b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py @@ -77,10 +77,13 @@ def use_torch_compile_backend(): if use_torch_compile_backend(): self.torch_compile_backend_test(model, torch_inputs, custom_eps) else: + trace_model = kwargs.get('trace_model', False) + freeze_model = kwargs.get('freeze_model', True) with torch.no_grad(): - trace_model = kwargs.get('trace_model', False) - freeze_model = kwargs.get('freeze_model', True) - smodel, converted_model = self.convert_directly_via_frontend(model, torch_inputs, trace_model, dynamic_shapes, ov_inputs, freeze_model) + if kwargs.get('use_convert_model', False): + smodel, converted_model = self.convert_via_mo(model, torch_inputs, trace_model, dynamic_shapes, ov_inputs, freeze_model) + else: + smodel, converted_model = self.convert_directly_via_frontend(model, torch_inputs, trace_model, dynamic_shapes, ov_inputs, freeze_model) if kind is not None and not isinstance(kind, (tuple, list)): kind = [kind] @@ -162,12 +165,13 @@ def _prepare_input(self): raise RuntimeError("Please provide inputs generation function") def convert_via_mo(self, model, example_input, trace_model, dynamic_shapes, ov_inputs, freeze_model): - from openvino.tools.ovc import convert_model - kwargs = {"example_input": example_input if len(example_input) > 1 else example_input[0]} + from openvino import convert_model, PartialShape if trace_model: decoder = TorchScriptPythonDecoder(model, example_input=example_input, skip_freeze=not freeze_model) + kwargs = {"example_input": example_input if len(example_input) > 1 else example_input[0]} else: decoder = TorchScriptPythonDecoder(model, skip_freeze=not freeze_model) + kwargs = {"input": [(i.dtype, PartialShape([-1] * len(i.shape))) for i in example_input]} smodel = decoder.pt_module print(smodel.inlined_graph) if not dynamic_shapes: diff --git a/tests/layer_tests/pytorch_tests/test_adaptive_max_pool.py b/tests/layer_tests/pytorch_tests/test_adaptive_max_pool.py index c01e58c2107eec..09be641a0fb96e 100644 --- a/tests/layer_tests/pytorch_tests/test_adaptive_max_pool.py +++ b/tests/layer_tests/pytorch_tests/test_adaptive_max_pool.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -49,6 +51,8 @@ def forward(self, input_tensor): @pytest.mark.precommit @pytest.mark.precommit_ts_backend @pytest.mark.precommit_fx_backend + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_adaptive_max_pool3d(self, ie_device, precision, ir_version, input_tensor, output_size, return_indices): self.input_tensor = input_tensor self._test(*self.create_model(output_size, return_indices), ie_device, precision, ir_version) @@ -94,6 +98,8 @@ def forward(self, input_tensor): @pytest.mark.precommit @pytest.mark.precommit_ts_backend @pytest.mark.precommit_fx_backend + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_adaptive_max_pool2d(self, ie_device, precision, ir_version, input_tensor, output_size, return_indices): self.input_tensor = input_tensor self._test(*self.create_model(output_size, return_indices), ie_device, precision, ir_version) @@ -139,6 +145,8 @@ def forward(self, input_tensor): @pytest.mark.precommit @pytest.mark.precommit_ts_backend @pytest.mark.precommit_fx_backend + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_adaptive_max_pool1d(self, ie_device, precision, ir_version, input_tensor, output_size, return_indices): self.input_tensor = input_tensor self._test(*self.create_model(output_size, return_indices), ie_device, precision, ir_version) \ No newline at end of file diff --git a/tests/layer_tests/pytorch_tests/test_add.py b/tests/layer_tests/pytorch_tests/test_add.py index 7cf243b0577bcb..d89a05420eb7e1 100644 --- a/tests/layer_tests/pytorch_tests/test_add.py +++ b/tests/layer_tests/pytorch_tests/test_add.py @@ -43,7 +43,7 @@ def forward2(self, lhs, rhs): @pytest.mark.parametrize("op_type", ["add", "add_"]) def test_add(self, ie_device, precision, ir_version, alpha, input_rhs, op_type): self.input_rhs = input_rhs - self._test(*self.create_model(alpha, op_type), ie_device, precision, ir_version) + self._test(*self.create_model(alpha, op_type), ie_device, precision, ir_version, use_convert_model=True) class TestAddTypes(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_all.py b/tests/layer_tests/pytorch_tests/test_all.py index b5255f197cfef0..ca9b734c1ad1dd 100644 --- a/tests/layer_tests/pytorch_tests/test_all.py +++ b/tests/layer_tests/pytorch_tests/test_all.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -68,6 +70,8 @@ def test_all_noparams(self, input_tensor, ie_device, precision, ir_version): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_all(self, input_tensor, keepdim, ie_device, precision, ir_version): self.input_tensor = input_tensor for dim in range(len(input_tensor.shape)): diff --git a/tests/layer_tests/pytorch_tests/test_argmax_argmin.py b/tests/layer_tests/pytorch_tests/test_argmax_argmin.py index 05abf128da400d..80ed6fcb872b5f 100644 --- a/tests/layer_tests/pytorch_tests/test_argmax_argmin.py +++ b/tests/layer_tests/pytorch_tests/test_argmax_argmin.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest @@ -71,6 +73,8 @@ def forward(self, x): @pytest.mark.parametrize("dtype", ["float32", "int32", "int64"]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_argmin_argmax(self, axes, keep_dims, op_type, dtype, ie_device, precision, ir_version): self._test(*self.create_model(op_type, axes, keep_dims), ie_device, precision, ir_version, trace_model=True, diff --git a/tests/layer_tests/pytorch_tests/test_as_strided.py b/tests/layer_tests/pytorch_tests/test_as_strided.py new file mode 100644 index 00000000000000..9bfaa66d3a7f6b --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_as_strided.py @@ -0,0 +1,125 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import torch + +from pytorch_layer_test_class import PytorchLayerTest + + +class TestAsStrided(PytorchLayerTest): + def _prepare_input(self): + return (np.random.randn(8, 8).astype(np.float32),) + + def create_model(self, size, stride, offset): + class aten_as_strided(torch.nn.Module): + def __init__(self, size, stride, offset): + super().__init__() + self.size = size + self.stride = stride + self.offset = offset + + def forward(self, x): + return torch.as_strided(x, self.size, self.stride, self.offset) + + ref_net = None + + return aten_as_strided(size, stride, offset), ref_net, "aten::as_strided" + + @pytest.mark.parametrize( + "size,stride", + [ + ([1], [1]), + ([2, 2], [1, 1]), + ([5, 4, 3], [1, 3, 7]), + ([5, 5, 5], [5, 0, 5]), + ([1, 2, 3, 4], [4, 3, 2, 1]), + ], + ) + @pytest.mark.parametrize("offset", [None, 1, 3, 7]) + @pytest.mark.nightly + @pytest.mark.precommit + def test_as_strided(self, size, stride, offset, ie_device, precision, ir_version): + self._test(*self.create_model(size, stride, offset), ie_device, precision, ir_version, trace_model=True) + + +class TestAsStridedListConstruct(PytorchLayerTest): + def _prepare_input(self, size_shape_tensor=[1], stride_shape_tensor=[1]): + return ( + np.random.randn(8, 8).astype(np.float32), + np.ones(size_shape_tensor), + np.ones(stride_shape_tensor), + ) + + def create_model(self, size, stride, offset, mode): + class aten_as_strided(torch.nn.Module): + def __init__(self, size, stride, offset, mode): + super().__init__() + self.size = size + self.stride = stride + self.size_shape_tensor = torch.empty(size) + self.stride_shape_tensor = torch.empty(stride) + self.offset = offset + modes = { + "no_const": self.forward_no_const, + "stride_const": self.forward_stride_const, + "size_const": self.forward_size_const, + } + self.forward = modes.get(mode) + + def forward_no_const(self, x, size_shape_tensor, stride_shape_tensor): + sz1, sz2, sz3 = size_shape_tensor.shape + st1, st2, st3 = stride_shape_tensor.shape + return torch.as_strided(x, [sz1, sz2, sz3], [st1, st2, st3], self.offset) + + def forward_stride_const(self, x, size_shape_tensor, stride_shape_tensor): + sz1, sz2, sz3 = size_shape_tensor.shape + return torch.as_strided(x, [sz1, sz2, sz3], self.stride, self.offset) + + def forward_size_const(self, x, size_shape_tensor, stride_shape_tensor): + st1, st2, st3 = stride_shape_tensor.shape + return torch.as_strided(x, self.size, [st1, st2, st3], self.offset) + + ref_net = None + + return aten_as_strided(size, stride, offset, mode), ref_net, ["aten::as_strided", "prim::ListConstruct"] + + @pytest.mark.parametrize("size,stride", [([5, 4, 3], [1, 3, 7]), ([5, 5, 5], [5, 0, 5])]) + @pytest.mark.parametrize("offset", [None, 7]) + @pytest.mark.parametrize("mode", ["no_const", "stride_const", "size_const"]) + @pytest.mark.nightly + @pytest.mark.precommit + def test_as_strided_list_construct(self, size, stride, offset, mode, ie_device, precision, ir_version): + inp_kwargs = {"size_shape_tensor": size, "stride_shape_tensor": stride} + self._test( + *self.create_model(size, stride, offset, mode), + ie_device, + precision, + ir_version, + kwargs_to_prepare_input=inp_kwargs, + trace_model=True + ) + + +class TestAsStridedLongformer(PytorchLayerTest): + def _prepare_input(self): + return (np.random.randn(1, 10, 20, 40).astype(np.float32).transpose([0, 2, 3, 1]),) + + def create_model(self): + class aten_as_strided_lf(torch.nn.Module): + def forward(self, x): + chunk_size = list(x.size()) + chunk_size[1] = chunk_size[1] * 2 - 1 + chunk_stride = list(x.stride()) + chunk_stride[1] = chunk_stride[1] // 2 + return x.as_strided(size=chunk_size, stride=chunk_stride) + + ref_net = None + + return aten_as_strided_lf(), ref_net, "aten::as_strided" + + @pytest.mark.nightly + @pytest.mark.precommit + def test_as_strided_lf(self, ie_device, precision, ir_version): + self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True, freeze_model=False) diff --git a/tests/layer_tests/pytorch_tests/test_comparision.py b/tests/layer_tests/pytorch_tests/test_comparision.py index 98134a274f7bdb..a114afb1f712c8 100644 --- a/tests/layer_tests/pytorch_tests/test_comparision.py +++ b/tests/layer_tests/pytorch_tests/test_comparision.py @@ -55,7 +55,7 @@ def forward(self, x, y): @pytest.mark.nightly @pytest.mark.precommit def test_comp(self, op, ie_device, precision, ir_version): - self._test(*self.create_model(op), ie_device, precision, ir_version) + self._test(*self.create_model(op), ie_device, precision, ir_version, use_convert_model=True) class TestCompMixedTypes(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_cumsum.py b/tests/layer_tests/pytorch_tests/test_cumsum.py index 926cfe9e95c30a..771eb02768bdf0 100644 --- a/tests/layer_tests/pytorch_tests/test_cumsum.py +++ b/tests/layer_tests/pytorch_tests/test_cumsum.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest @@ -69,5 +71,7 @@ def forward_out_prim_dtype(self, x, y): @pytest.mark.parametrize("out,dtype_from_input", [(False, False), (True, False), (True, True)]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_cumsum(self, axis, dtype, out, dtype_from_input, ie_device, precision, ir_version): self._test(*self.create_model(axis, dtype, out, dtype_from_input), ie_device, precision, ir_version, kwargs_to_prepare_input={"out": out, "out_dtype": dtype}) diff --git a/tests/layer_tests/pytorch_tests/test_deformable_convolution.py b/tests/layer_tests/pytorch_tests/test_deformable_convolution.py index 8fa207efe85b41..fa4293b275e6c7 100644 --- a/tests/layer_tests/pytorch_tests/test_deformable_convolution.py +++ b/tests/layer_tests/pytorch_tests/test_deformable_convolution.py @@ -10,15 +10,6 @@ from torchvision.ops import deform_conv2d -def xfail_106712(test_param): - return pytest.param( - test_param, - marks=pytest.mark.xfail( - reason="Depending on number of groups and number of output channels, deformable convolution may return incorrect reasults. Ticket 106712" - ), - ) - - params = [ { "weights_shape": [64, 64, 3, 3], @@ -62,15 +53,13 @@ def xfail_106712(test_param): "padding": (2, 2), "dilation": (1, 1), }, - xfail_106712( - { - "weights_shape": [64, 16, 3, 3], - "offset_shape": [1, 18, 64, 64], - "stride": (1, 1), - "padding": (1, 1), - "dilation": (1, 1), - } - ), + { + "weights_shape": [64, 16, 3, 3], + "offset_shape": [1, 18, 64, 64], + "stride": (1, 1), + "padding": (1, 1), + "dilation": (1, 1), + }, { "weights_shape": [60, 16, 3, 3], "offset_shape": [1, 18, 64, 64], @@ -92,15 +81,13 @@ def xfail_106712(test_param): "padding": (1, 1), "dilation": (1, 1), }, - xfail_106712( - { - "weights_shape": [64, 32, 3, 3], - "offset_shape": [1, 36, 68, 68], - "stride": (1, 1), - "padding": (3, 3), - "dilation": (1, 1), - } - ), + { + "weights_shape": [64, 32, 3, 3], + "offset_shape": [1, 36, 68, 68], + "stride": (1, 1), + "padding": (3, 3), + "dilation": (1, 1), + }, { "weights_shape": [62, 32, 3, 3], "offset_shape": [1, 36, 68, 68], diff --git a/tests/layer_tests/pytorch_tests/test_device.py b/tests/layer_tests/pytorch_tests/test_device.py index a36bd731a6dae7..2efdf5c85c50fa 100644 --- a/tests/layer_tests/pytorch_tests/test_device.py +++ b/tests/layer_tests/pytorch_tests/test_device.py @@ -56,7 +56,8 @@ def test_device(self, device_string, ie_device, precision, ir_version): ie_device, precision, ir_version, - trace_model=False + trace_model=False, + use_convert_model=True, ) @pytest.mark.parametrize("device_string", ["cpu", "cuda"]) @@ -68,5 +69,6 @@ def test_device_type(self, device_string, ie_device, precision, ir_version): ie_device, precision, ir_version, - trace_model=False + trace_model=False, + use_convert_model=True, ) diff --git a/tests/layer_tests/pytorch_tests/test_dict.py b/tests/layer_tests/pytorch_tests/test_dict.py index a3e3c29b6baf1f..6e4db9dea825bd 100644 --- a/tests/layer_tests/pytorch_tests/test_dict.py +++ b/tests/layer_tests/pytorch_tests/test_dict.py @@ -23,4 +23,4 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit def test_dict(self, ie_device, precision, ir_version): - self._test(*self.create_model(), ie_device, precision, ir_version) + self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_distance.py b/tests/layer_tests/pytorch_tests/test_distance.py index 1c76a7243b47e3..fb9fade8206996 100644 --- a/tests/layer_tests/pytorch_tests/test_distance.py +++ b/tests/layer_tests/pytorch_tests/test_distance.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest @@ -29,8 +31,10 @@ def forward(self, x, y): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.parametrize("p", [2., 4., 6., 8.,]) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_cdist(self, p, ie_device, precision, ir_version): - self._test(*self.create_model(p), ie_device, precision, ir_version) + self._test(*self.create_model(p), ie_device, precision, ir_version, use_convert_model=True) class TestPairwiseDistance(PytorchLayerTest): @@ -61,5 +65,7 @@ def forward(self, x, y): @pytest.mark.parametrize("p", [2., 4., 6., 8.,]) @pytest.mark.parametrize("eps", [1e-06, 0.00001, 1e-07]) @pytest.mark.parametrize("keepdim", [True, False]) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_cdist(self, p, eps, keepdim, ie_device, precision, ir_version): - self._test(*self.create_model(p, eps, keepdim), ie_device, precision, ir_version) \ No newline at end of file + self._test(*self.create_model(p, eps, keepdim), ie_device, precision, ir_version, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_div.py b/tests/layer_tests/pytorch_tests/test_div.py index d6e696b62882d5..564cb2915c8686 100644 --- a/tests/layer_tests/pytorch_tests/test_div.py +++ b/tests/layer_tests/pytorch_tests/test_div.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -47,7 +49,7 @@ def test_div_pt_spec(self, input_array, other_array, rounding_mode, ie_device, p self.other_array = other_array self.other_type = np.float32 self._test(*self.create_model(rounding_mode), - ie_device, precision, ir_version) + ie_device, precision, ir_version, use_convert_model=True) class TestDivTypes(PytorchLayerTest): @@ -116,6 +118,8 @@ def forward3(self, lhs, rhs): ])) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_div_types(self, ie_device, precision, ir_version, lhs_type, lhs_shape, rhs_type, rhs_shape, rounding_mode): self.lhs_type = lhs_type self.lhs_shape = lhs_shape diff --git a/tests/layer_tests/pytorch_tests/test_embedding_bag.py b/tests/layer_tests/pytorch_tests/test_embedding_bag.py index 2595b2269316fd..d0c6d0c532856f 100644 --- a/tests/layer_tests/pytorch_tests/test_embedding_bag.py +++ b/tests/layer_tests/pytorch_tests/test_embedding_bag.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest @@ -42,6 +44,8 @@ def forward_offsets_per_sample_weights(self, indicies, weight, offsets, per_samp @pytest.mark.precommit @pytest.mark.parametrize("indicies_dtype", ["int", "int32"]) @pytest.mark.parametrize("per_sample_weights", [True, False]) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_embedding_bag(self, ie_device, precision, ir_version, indicies_dtype, per_sample_weights): self._test(*self.create_model(per_sample_weights), ie_device, precision, ir_version, kwargs_to_prepare_input={"indicies_dtype": indicies_dtype, "per_sample_weights": per_sample_weights}, @@ -85,6 +89,8 @@ def forward_per_sample_weights(self, indicies, weight, per_sample_wights): @pytest.mark.parametrize("indicies_size", [[1, 1], [2, 5], [3, 10], [4, 7]]) @pytest.mark.parametrize("indicies_dtype", ["int", "int32"]) @pytest.mark.parametrize("per_sample_weights", [True, False]) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_embedding_bag(self, ie_device, precision, ir_version, indicies_dtype, indicies_size, per_sample_weights): self._test(*self.create_model(per_sample_weights), ie_device, precision, ir_version, kwargs_to_prepare_input={"indicies_size": indicies_size, "indicies_dtype": indicies_dtype, "per_sample_weights": per_sample_weights}, diff --git a/tests/layer_tests/pytorch_tests/test_empty.py b/tests/layer_tests/pytorch_tests/test_empty.py index 7b5a4b92563dc5..c504e262038c87 100644 --- a/tests/layer_tests/pytorch_tests/test_empty.py +++ b/tests/layer_tests/pytorch_tests/test_empty.py @@ -134,7 +134,7 @@ def forward(self, input_tensor: torch.Tensor): @pytest.mark.precommit def test_new_empty(self, shape, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape), ie_device, precision, ir_version, - kwargs_to_prepare_input={'input_dtype': input_dtype}) + kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True) @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]]) @pytest.mark.parametrize("input_dtype", [bool, np.uint8, np.int8, np.int32, np.int64, np.float32, np.float64]) @@ -142,4 +142,4 @@ def test_new_empty(self, shape, input_dtype, ie_device, precision, ir_version): @pytest.mark.nightly def test_new_empty_with_dtype(self, shape, dtype, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape, dtype=dtype, used_dtype=True), ie_device, precision, ir_version, - kwargs_to_prepare_input={'input_dtype': input_dtype}) + kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_eq.py b/tests/layer_tests/pytorch_tests/test_eq.py index 91c30df79fabb8..6fa5f4ecfba5eb 100644 --- a/tests/layer_tests/pytorch_tests/test_eq.py +++ b/tests/layer_tests/pytorch_tests/test_eq.py @@ -45,4 +45,4 @@ def test_eq_pt_spec(self, input_array, other_array, types, ie_device, precision, self.input_type = types[0] self.other_array = other_array self.other_type = types[1] - self._test(*self.create_model(), ie_device, precision, ir_version) + self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_fake_quantize.py b/tests/layer_tests/pytorch_tests/test_fake_quantize.py index 6bb1d6601cb43b..3146ac87b90087 100644 --- a/tests/layer_tests/pytorch_tests/test_fake_quantize.py +++ b/tests/layer_tests/pytorch_tests/test_fake_quantize.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -46,6 +48,8 @@ def forward(self, x): (1.0, 0, 0, 127), ], ) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_fake_quantize_per_tensor_affine( self, ie_device, precision, ir_version, scale, zero_point, quant_min, quant_max ): @@ -96,6 +100,8 @@ def forward(self, x): (torch.tensor([-0.005, -0.7, 0.1]), torch.tensor([1, 0, 1], dtype=torch.int32), 0, 0, 255), ], ) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_fake_quantize_per_channel_affine( self, ie_device, precision, ir_version, scale, zero_point, axis, quant_min, quant_max ): diff --git a/tests/layer_tests/pytorch_tests/test_floor_divide.py b/tests/layer_tests/pytorch_tests/test_floor_divide.py index cd427acb3dba56..0cdc46333b2651 100644 --- a/tests/layer_tests/pytorch_tests/test_floor_divide.py +++ b/tests/layer_tests/pytorch_tests/test_floor_divide.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest @@ -52,10 +54,12 @@ def forward(self, input_tensor, other_tensor): ])) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_floor_divide(self, input_tensor, other_tensor, ie_device, precision, ir_version): self.input_tensor = input_tensor self.other_tensor = other_tensor - self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True) + self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True, use_convert_model=True) @pytest.mark.parametrize('input_tensor', ([ np.random.randint(low=0, high=10, size=5).astype(np.float32), diff --git a/tests/layer_tests/pytorch_tests/test_fp16.py b/tests/layer_tests/pytorch_tests/test_fp16.py index b754306727935c..5952f40f167c54 100644 --- a/tests/layer_tests/pytorch_tests/test_fp16.py +++ b/tests/layer_tests/pytorch_tests/test_fp16.py @@ -29,7 +29,7 @@ def forward(self, x): @pytest.mark.parametrize("to_trace", [True, False]) def test_bf16(self, ie_device, precision, ir_version, to_trace): self._test(*self.create_model(), ie_device, precision, - ir_version, trace_model=to_trace, freeze_model=False) + ir_version, trace_model=to_trace, freeze_model=False, use_convert_model=True) class TestFP16(PytorchLayerTest): @@ -53,4 +53,4 @@ def forward(self, x): @pytest.mark.parametrize("to_trace", [True, False]) def test_fp16(self, ie_device, precision, ir_version, to_trace): self._test(*self.create_model(), ie_device, precision, - ir_version, trace_model=to_trace, freeze_model=False) + ir_version, trace_model=to_trace, freeze_model=False, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_full.py b/tests/layer_tests/pytorch_tests/test_full.py index c564b1bb3731b9..cf3794be11e891 100644 --- a/tests/layer_tests/pytorch_tests/test_full.py +++ b/tests/layer_tests/pytorch_tests/test_full.py @@ -1,5 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 + +import platform + import numpy as np import pytest @@ -144,6 +147,8 @@ def forward(self, input_t: torch.Tensor, x:float): @pytest.mark.parametrize("mode", ["", "inplace", "out"]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_fill(self, shape, value, input_dtype, value_dtype, mode, ie_device, precision, ir_version): self._test(*self.create_model(mode), ie_device, precision, ir_version, kwargs_to_prepare_input={ @@ -183,6 +188,8 @@ def forward(self, x:torch.Tensor, y:float): @pytest.mark.parametrize("wrap", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_fill_diagonal(self, shape, value, input_dtype, value_dtype, wrap, ie_device, precision, ir_version): self._test(*self.create_model(shape, wrap), ie_device, precision, ir_version, kwargs_to_prepare_input={ @@ -340,7 +347,7 @@ def forward(self, input_tensor: torch.Tensor, x: float): @pytest.mark.precommit def test_new_full(self, shape, value, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape), ie_device, precision, ir_version, - kwargs_to_prepare_input={'value': value, 'input_dtype': input_dtype}) + kwargs_to_prepare_input={'value': value, 'input_dtype': input_dtype}, use_convert_model=True) @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]]) @pytest.mark.parametrize("value,input_dtype", [(0, np.uint8), (1, np.int32), (-1, np.float32), (0.5, np.float64)]) @@ -348,7 +355,7 @@ def test_new_full(self, shape, value, input_dtype, ie_device, precision, ir_vers @pytest.mark.nightly def test_new_full_with_dtype(self, value, shape, dtype, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape, dtype=dtype, used_dtype=True), ie_device, precision, ir_version, - kwargs_to_prepare_input={'value': value, 'input_dtype': input_dtype}) + kwargs_to_prepare_input={'value': value, 'input_dtype': input_dtype}, use_convert_model=True) class TestZerosAndOnes(PytorchLayerTest): @@ -555,7 +562,7 @@ def forward(self, input_tensor: torch.Tensor): @pytest.mark.precommit def test_new_zeros(self, shape, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape), ie_device, precision, ir_version, - kwargs_to_prepare_input={'input_dtype': input_dtype}) + kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True) @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]]) @pytest.mark.parametrize("input_dtype", [bool, np.uint8, np.int8, np.int32, np.int64, np.float32, np.float64]) @@ -563,7 +570,7 @@ def test_new_zeros(self, shape, input_dtype, ie_device, precision, ir_version): @pytest.mark.nightly def test_new_zeros_with_dtype(self, shape, dtype, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape, dtype=dtype, used_dtype=True), ie_device, precision, ir_version, - kwargs_to_prepare_input={'input_dtype': input_dtype}) + kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True) class TestNewOnes(PytorchLayerTest): @@ -614,7 +621,7 @@ def forward(self, input_tensor: torch.Tensor): @pytest.mark.precommit def test_new_ones(self, shape, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape), ie_device, precision, ir_version, - kwargs_to_prepare_input={'input_dtype': input_dtype}) + kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True) @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]]) @pytest.mark.parametrize("input_dtype", [bool, np.uint8, np.int8, np.int32, np.int64, np.float32, np.float64]) @@ -622,4 +629,4 @@ def test_new_ones(self, shape, input_dtype, ie_device, precision, ir_version): @pytest.mark.nightly def test_new_ones_with_dtype(self, shape, dtype, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape, dtype=dtype, used_dtype=True), ie_device, precision, ir_version, - kwargs_to_prepare_input={'input_dtype': input_dtype}) + kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_getitem.py b/tests/layer_tests/pytorch_tests/test_getitem.py index b827f626914125..62d7e12ada6e61 100644 --- a/tests/layer_tests/pytorch_tests/test_getitem.py +++ b/tests/layer_tests/pytorch_tests/test_getitem.py @@ -102,4 +102,4 @@ def _prepare_input(self): @pytest.mark.parametrize("idx", [-4, -3, -2, -1, 0, 1, 2, 3]) def test_add_cat(self, ie_device, precision, ir_version, idx): self._test(aten_add_getitem(idx), None, ["aten::__getitem__", "aten::add", "prim::ListConstruct"], - ie_device, precision, ir_version, freeze_model=False) + ie_device, precision, ir_version, freeze_model=False, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_grid_sampler.py b/tests/layer_tests/pytorch_tests/test_grid_sampler.py index b142544c3b6e62..7b55862e2f0c2d 100644 --- a/tests/layer_tests/pytorch_tests/test_grid_sampler.py +++ b/tests/layer_tests/pytorch_tests/test_grid_sampler.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest @@ -35,6 +37,8 @@ def forward(self, input, grid): @pytest.mark.parametrize("align_corners", [True, False, None]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_grid_sampler(self, h_in, w_in, h_out, w_out, mode, padding_mode, align_corners, ie_device, precision, ir_version): self._test(*self.create_model(mode, padding_mode, align_corners), ie_device, precision, ir_version, kwargs_to_prepare_input={ "h_in": h_in, "w_in": w_in, "h_out": h_out, "w_out": w_out diff --git a/tests/layer_tests/pytorch_tests/test_if.py b/tests/layer_tests/pytorch_tests/test_if.py index b4fd9470cc8be3..fa4b1e9967dd02 100644 --- a/tests/layer_tests/pytorch_tests/test_if.py +++ b/tests/layer_tests/pytorch_tests/test_if.py @@ -39,4 +39,4 @@ def forward(self, x, y): @pytest.mark.skipif(os.getenv("GITHUB_ACTIONS") == 'true', reason="Ticket - 114818") def test_if(self, y, ie_device, precision, ir_version): self.y = y - self._test(*self.create_model(), ie_device, precision, ir_version) + self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_index.py b/tests/layer_tests/pytorch_tests/test_index.py index 6f7cea86990d2c..4b4e53690f927e 100644 --- a/tests/layer_tests/pytorch_tests/test_index.py +++ b/tests/layer_tests/pytorch_tests/test_index.py @@ -150,4 +150,4 @@ def forward(self, x): [2, 2, 3, 4])) def test_index_mask(self, input_shape, ie_device, precision, ir_version): self._test(*self.create_model(), ie_device, precision, ir_version, kwargs_to_prepare_input={ - "input_shape": input_shape}, trace_model=True) + "input_shape": input_shape}, trace_model=True, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_index_put_.py b/tests/layer_tests/pytorch_tests/test_index_put_.py index 55cbe39bd92d58..6f94a0912d4df2 100644 --- a/tests/layer_tests/pytorch_tests/test_index_put_.py +++ b/tests/layer_tests/pytorch_tests/test_index_put_.py @@ -162,7 +162,7 @@ def test_nonzero_index_put_(self, ie_device, precision, ir_version, input_data, self.values = input_data["values"] self.indices_0 = indices[0] self.indices_1 = indices[1] - self._test(*self.create_model(accumulate), ie_device, precision, ir_version, trace_model=True) + self._test(*self.create_model(accumulate), ie_device, precision, ir_version, trace_model=True, use_convert_model=True) class TestMask_IndexPut(PytorchLayerTest): def _prepare_input(self): @@ -181,4 +181,4 @@ def forward(self, x, y): @pytest.mark.nightly @pytest.mark.precommit def test_nonzero_index_put_(self, ie_device, precision, ir_version): - self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True) + self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_instance_norm.py b/tests/layer_tests/pytorch_tests/test_instance_norm.py index 2fe3f5e13e066a..3ec2dd0144573d 100644 --- a/tests/layer_tests/pytorch_tests/test_instance_norm.py +++ b/tests/layer_tests/pytorch_tests/test_instance_norm.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest @@ -60,6 +62,8 @@ def forward(self, x): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_group_norm(self, params, ie_device, precision, ir_version, kwargs_to_prepare_input): self._test(*self.create_model(**params), ie_device, precision, ir_version, kwargs_to_prepare_input=kwargs_to_prepare_input, diff --git a/tests/layer_tests/pytorch_tests/test_len.py b/tests/layer_tests/pytorch_tests/test_len.py index d6d3a7dc211564..7aa5f020b9c7fc 100644 --- a/tests/layer_tests/pytorch_tests/test_len.py +++ b/tests/layer_tests/pytorch_tests/test_len.py @@ -48,7 +48,7 @@ def test_len(self, ie_device, precision, ir_version, input_tensor): def test_len_int_list(self, ie_device, precision, ir_version, input_tensor): self.input_tensor = input_tensor self._test(*self.create_model_int_list(), - ie_device, precision, ir_version) + ie_device, precision, ir_version, use_convert_model=True) class TestLenEmpty(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_linspace.py b/tests/layer_tests/pytorch_tests/test_linspace.py index aa6f70d3d71c89..4cf623e55fafad 100644 --- a/tests/layer_tests/pytorch_tests/test_linspace.py +++ b/tests/layer_tests/pytorch_tests/test_linspace.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -63,6 +65,8 @@ def forward(self, start, end, steps, d): @pytest.mark.parametrize( "start,end,steps", [(0, 1, 5), (-2, 1, 5), (1, -5, 7), (1, 10, 2), (-1, -5, 2), (-1, -5, 1), (1.25, -5.5, 5)] ) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_linspace_with_prim_dtype(self, dtype, end, start, steps, ie_device, precision, ir_version): self._test( *self.create_model(dtype, ref_dtype=True), @@ -79,6 +83,8 @@ def test_linspace_with_prim_dtype(self, dtype, end, start, steps, ie_device, pre "start,end,steps", [(0, 1, 5), (-2, 1, 5), (1, -5, 7), (1, 10, 2), (-1, -5, 2), (-1, -5, 1), (1.25, -5.5, 5)] ) @pytest.mark.parametrize("use_out", [False, True]) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_linspace_with_out(self, dtype, use_out, end, start, steps, ie_device, precision, ir_version): self._test( *self.create_model(dtype=dtype, use_out=use_out), diff --git a/tests/layer_tests/pytorch_tests/test_listunpack.py b/tests/layer_tests/pytorch_tests/test_listunpack.py index b001bccd2a97d1..39d72bfe54c6e9 100644 --- a/tests/layer_tests/pytorch_tests/test_listunpack.py +++ b/tests/layer_tests/pytorch_tests/test_listunpack.py @@ -123,6 +123,7 @@ def test_listconstruct_getitem_listunpack( ie_device, precision, ir_version, + use_convert_model=True, ) class TestMeshgridListUnpack(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_mul.py b/tests/layer_tests/pytorch_tests/test_mul.py index f22fd7b03bc0cb..c7aea00ee8ad89 100644 --- a/tests/layer_tests/pytorch_tests/test_mul.py +++ b/tests/layer_tests/pytorch_tests/test_mul.py @@ -37,7 +37,7 @@ def test_mul_pt_spec(self, input_array, other_array, ie_device, precision, ir_ve self.input_type = np.float32 self.other_array = other_array self.other_type = np.float32 - self._test(*self.create_model(), ie_device, precision, ir_version) + self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True) class TestMulTypes(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_native_multi_head_attention.py b/tests/layer_tests/pytorch_tests/test_native_multi_head_attention.py index 41e737dba6221d..26b7cdbd14812b 100644 --- a/tests/layer_tests/pytorch_tests/test_native_multi_head_attention.py +++ b/tests/layer_tests/pytorch_tests/test_native_multi_head_attention.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -74,6 +76,8 @@ def _prepare_input(self): ["need_weights", "average_attn_weights"], [[False, False], [True, False], [True, True]] ) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_native_multi_head_attention(self, ie_device, precision, ir_version, mask, need_weights, average_attn_weights): self._test(aten_native_multi_head_attention(mask, need_weights, average_attn_weights), None, "aten::_native_multi_head_attention", ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_nms.py b/tests/layer_tests/pytorch_tests/test_nms.py index ae09726a23b8f9..b703e98ccaffe9 100644 --- a/tests/layer_tests/pytorch_tests/test_nms.py +++ b/tests/layer_tests/pytorch_tests/test_nms.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest import numpy as np @@ -35,6 +37,8 @@ def forward(self, boxes, scores): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_nms(self, ie_device, precision, ir_version, boxes_num): self.boxes_num = boxes_num self._test(*self.create_model(), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_norm.py b/tests/layer_tests/pytorch_tests/test_norm.py index aef0a074059950..9422c170401702 100644 --- a/tests/layer_tests/pytorch_tests/test_norm.py +++ b/tests/layer_tests/pytorch_tests/test_norm.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -245,6 +247,8 @@ def forward_out(self, x, y): @pytest.mark.parametrize("dtype", ["float32", "float64", None]) @pytest.mark.parametrize("out", [True, False]) @pytest.mark.parametrize("prim_dtype", [True, False]) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_linalg_matrix_norm(self, p, dim, keepdim, dtype, out, prim_dtype, ie_device, precision, ir_version): self._test(*self.create_model(p, dim, keepdim, dtype, out, prim_dtype), ie_device, precision, ir_version, diff --git a/tests/layer_tests/pytorch_tests/test_or.py b/tests/layer_tests/pytorch_tests/test_or.py index c6592a11af083f..bde1e61ecce74d 100644 --- a/tests/layer_tests/pytorch_tests/test_or.py +++ b/tests/layer_tests/pytorch_tests/test_or.py @@ -25,4 +25,5 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit def test_or(self, ie_device, precision, ir_version): - self._test(*self.create_model(), ie_device, precision, ir_version, dynamic_shapes=False, trace_model=True) + self._test(*self.create_model(), ie_device, precision, ir_version, + dynamic_shapes=False, trace_model=True, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_pooling.py b/tests/layer_tests/pytorch_tests/test_pooling.py index 3f4c94db6d45d0..f54902282ece1b 100644 --- a/tests/layer_tests/pytorch_tests/test_pooling.py +++ b/tests/layer_tests/pytorch_tests/test_pooling.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest @@ -133,6 +135,8 @@ def forward(self, x): @pytest.mark.parametrize("count_include_pad", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_avg_pool1d(self, params, ceil_mode, count_include_pad, ie_device, precision, ir_version): self._test(*self.create_model("avg_pool1d", **params, ceil_mode=ceil_mode, count_include_pad=count_include_pad), ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 3}, trace_model=True, @@ -151,6 +155,8 @@ def test_avg_pool1d(self, params, ceil_mode, count_include_pad, ie_device, preci @pytest.mark.parametrize("count_include_pad", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_avg_pool2d(self, params, ceil_mode, count_include_pad, ie_device, precision, ir_version): self._test(*self.create_model("avg_pool2d", **params, ceil_mode=ceil_mode, count_include_pad=count_include_pad), ie_device, precision, ir_version, trace_model=True, dynamic_shapes=False) @@ -160,6 +166,8 @@ def test_avg_pool2d(self, params, ceil_mode, count_include_pad, ie_device, preci @pytest.mark.parametrize("count_include_pad", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_avg_pool3d(self, params, ceil_mode, count_include_pad, ie_device, precision, ir_version): self._test(*self.create_model("avg_pool3d", **params, ceil_mode=ceil_mode, count_include_pad=count_include_pad), ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 5}, trace_model=True, @@ -170,6 +178,8 @@ def test_avg_pool3d(self, params, ceil_mode, count_include_pad, ie_device, preci @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_max_pool1d(self, params, ceil_mode, dilation, ie_device, precision, ir_version): self._test(*self.create_model("max_pool1d", **params, ceil_mode=ceil_mode, dilation=dilation), ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 3}, dynamic_shapes=False) @@ -179,6 +189,8 @@ def test_max_pool1d(self, params, ceil_mode, dilation, ie_device, precision, ir_ @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_max_pool2d(self, params, ceil_mode, dilation, ie_device, precision, ir_version): to_trace = False if params["stride"] == []: @@ -191,6 +203,8 @@ def test_max_pool2d(self, params, ceil_mode, dilation, ie_device, precision, ir @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_max_pool3d(self, params, ceil_mode, dilation, ie_device, precision, ir_version): self._test(*self.create_model("max_pool3d", **params, ceil_mode=ceil_mode, dilation=dilation), ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 5}, dynamic_shapes=False) @@ -200,6 +214,8 @@ def test_max_pool3d(self, params, ceil_mode, dilation, ie_device, precision, ir_ @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_max_pool1d_indices(self, params, ceil_mode, dilation, ie_device, precision, ir_version): if ceil_mode and (np.array(params["padding"]).any() != 0): pytest.skip("ticket 122418") @@ -211,6 +227,8 @@ def test_max_pool1d_indices(self, params, ceil_mode, dilation, ie_device, precis @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_max_pool2d_indices(self, params, ceil_mode, dilation, ie_device, precision, ir_version): if ceil_mode and (np.array(params["padding"]).any() != 0): pytest.skip("ticket 122418") @@ -225,6 +243,8 @@ def test_max_pool2d_indices(self, params, ceil_mode, dilation, ie_device, preci @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_max_pool3d_indices(self, params, ceil_mode, dilation, ie_device, precision, ir_version): if ceil_mode and (np.array(params["padding"]).any() != 0): pytest.skip("ticket 122418") diff --git a/tests/layer_tests/pytorch_tests/test_pow.py b/tests/layer_tests/pytorch_tests/test_pow.py index 9cf6468404e5d3..92e65898e353eb 100644 --- a/tests/layer_tests/pytorch_tests/test_pow.py +++ b/tests/layer_tests/pytorch_tests/test_pow.py @@ -41,7 +41,7 @@ def forward(self, input_data, exponent): @pytest.mark.precommit def test_pow(self, ie_device, precision, ir_version, test_input): self.test_input = test_input - self._test(*self.create_model(), ie_device, precision, ir_version) + self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True) class TestPowMixedTypes(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_quantize.py b/tests/layer_tests/pytorch_tests/test_quantize.py index f1a7522159090e..600821fa16204c 100644 --- a/tests/layer_tests/pytorch_tests/test_quantize.py +++ b/tests/layer_tests/pytorch_tests/test_quantize.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -49,6 +51,8 @@ def _prepare_input(self): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantize_per_tensor_dequantize(self, scale, zero_point, dtype, ie_device, precision, ir_version): if dtype == torch.quint8: zero_point = abs(zero_point) self._test(aten_quantize_per_tensor_aten_dequantize(scale, zero_point, dtype), None, ["aten::quantize_per_tensor", "aten::dequantize"], @@ -88,6 +92,8 @@ def _prepare_input(self): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantize_per_channel_dequantize(self, scale, zero_point, dtype, axis, ie_device, precision, ir_version): np.random.shuffle(scale), np.random.shuffle(zero_point) if dtype == torch.quint8: zero_point = abs(zero_point) diff --git a/tests/layer_tests/pytorch_tests/test_quantized_add.py b/tests/layer_tests/pytorch_tests/test_quantized_add.py index 960d3b4cca7aef..59a992fc088d5a 100644 --- a/tests/layer_tests/pytorch_tests/test_quantized_add.py +++ b/tests/layer_tests/pytorch_tests/test_quantized_add.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -38,6 +40,8 @@ def _prepare_input(self): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantized_add(self, scale, zero_point, dtype, ie_device, precision, ir_version): if dtype == torch.quint8: zero_point = abs(zero_point) self._test(quantized_add(scale, zero_point, dtype), None, ["quantized::add"], diff --git a/tests/layer_tests/pytorch_tests/test_quantized_add_relu.py b/tests/layer_tests/pytorch_tests/test_quantized_add_relu.py index 4a0dd797e3525c..6cb64dfab053d6 100644 --- a/tests/layer_tests/pytorch_tests/test_quantized_add_relu.py +++ b/tests/layer_tests/pytorch_tests/test_quantized_add_relu.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -38,6 +40,8 @@ def _prepare_input(self): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantized_add_relu(self, scale, zero_point, dtype, ie_device, precision, ir_version): if dtype == torch.quint8: zero_point = abs(zero_point) self._test(quantized_add_relu(scale, zero_point, dtype), None, ["quantized::add_relu"], diff --git a/tests/layer_tests/pytorch_tests/test_quantized_cat.py b/tests/layer_tests/pytorch_tests/test_quantized_cat.py index db6e5278bb5c50..ce0bc880e78f66 100644 --- a/tests/layer_tests/pytorch_tests/test_quantized_cat.py +++ b/tests/layer_tests/pytorch_tests/test_quantized_cat.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -73,6 +75,8 @@ def _prepare_input(self): @pytest.mark.parametrize("dtype", [torch.quint8, torch.qint8]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantized_cat(self, scale, zero_point, dtype, ie_device, precision, ir_version): self._test( aten_quantized_cat(scale, zero_point, dtype), @@ -91,6 +95,8 @@ def test_quantized_cat(self, scale, zero_point, dtype, ie_device, precision, ir_ @pytest.mark.parametrize("dtype", [torch.quint8, torch.qint8]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_append_quantized_cat(self, scale, zero_point, dtype, ie_device, precision, ir_version): self._test( aten_append_quantized_cat(scale, zero_point, dtype), @@ -130,6 +136,8 @@ def test_loop_append_quantized_cat(self, scale, zero_point, dtype, ie_device, pr @pytest.mark.parametrize("dtype", [torch.quint8, torch.qint8]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_add_quantized_cat(self, scale, zero_point, dtype, ie_device, precision, ir_version): self._test( aten_add_quantized_cat(scale, zero_point, dtype), diff --git a/tests/layer_tests/pytorch_tests/test_quantized_convnd.py b/tests/layer_tests/pytorch_tests/test_quantized_convnd.py index cf3ec0142cf46b..bc4ac9e1788b34 100644 --- a/tests/layer_tests/pytorch_tests/test_quantized_convnd.py +++ b/tests/layer_tests/pytorch_tests/test_quantized_convnd.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest import numpy as np import torch @@ -78,6 +80,8 @@ def forward(self, x): @pytest.mark.parametrize("zero_point", [0, 1]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantized_conv2d(self, params, bias, relu, scale, zero_point, ie_device, precision, ir_version): self._test( *self.create_model(**params, bias=bias, relu=relu, diff --git a/tests/layer_tests/pytorch_tests/test_quantized_hardswish.py b/tests/layer_tests/pytorch_tests/test_quantized_hardswish.py index a0b40783c4e98d..4508bbcb266ab6 100644 --- a/tests/layer_tests/pytorch_tests/test_quantized_hardswish.py +++ b/tests/layer_tests/pytorch_tests/test_quantized_hardswish.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -36,6 +38,8 @@ def _prepare_input(self): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantized_hardswish(self, scale, zero_point, dtype, ie_device, precision, ir_version): if dtype == torch.quint8: zero_point = abs(zero_point) self._test(quantized_hardswish(scale, zero_point, dtype), None, ["quantized::hardswish"], diff --git a/tests/layer_tests/pytorch_tests/test_quantized_linear.py b/tests/layer_tests/pytorch_tests/test_quantized_linear.py index 1ded932f234055..bd89ea48303f25 100644 --- a/tests/layer_tests/pytorch_tests/test_quantized_linear.py +++ b/tests/layer_tests/pytorch_tests/test_quantized_linear.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest import torch import numpy as np @@ -73,6 +75,8 @@ def forward(self, inp): @pytest.mark.parametrize("trace", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantized_linear(self, params, scale, zero_point, trace, ie_device, precision, ir_version): input_shape = params.get("input_shape") weight_shape = params.get("weight_shape") @@ -84,6 +88,8 @@ def test_quantized_linear(self, params, scale, zero_point, trace, ie_device, pre @pytest.mark.parametrize("inplace", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantized_hardtanh_linear(self, trace, inplace, ie_device, precision, ir_version): self._test(*self.create_hardtanh_model([10, 9], True, 1, 0.3, inplace), ie_device, precision, ir_version, kwargs_to_prepare_input={"input_shape": [2, 3, 9]}, trace_model=trace, freeze_model=False, quantized_ops=True, quant_size=0.3) diff --git a/tests/layer_tests/pytorch_tests/test_quantized_mul.py b/tests/layer_tests/pytorch_tests/test_quantized_mul.py index cc877daa919b5b..d170d70308b6a5 100644 --- a/tests/layer_tests/pytorch_tests/test_quantized_mul.py +++ b/tests/layer_tests/pytorch_tests/test_quantized_mul.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -38,6 +40,8 @@ def _prepare_input(self): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantized_mul(self, scale, zero_point, dtype, ie_device, precision, ir_version): if dtype == torch.quint8: zero_point = abs(zero_point) self._test(quantized_mul(scale, zero_point, dtype), None, ["quantized::mul"], diff --git a/tests/layer_tests/pytorch_tests/test_remainder.py b/tests/layer_tests/pytorch_tests/test_remainder.py index 4a499e85a37870..05ad2d3b1387fe 100644 --- a/tests/layer_tests/pytorch_tests/test_remainder.py +++ b/tests/layer_tests/pytorch_tests/test_remainder.py @@ -32,7 +32,7 @@ def forward(self, lhs, rhs): @pytest.mark.precommit def test_remainder(self, ie_device, precision, ir_version, input_rhs): self.input_rhs = input_rhs - self._test(*self.create_model(), ie_device, precision, ir_version) + self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True) class TestRemainderTypes(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_repeat.py b/tests/layer_tests/pytorch_tests/test_repeat.py index 45263366c76c54..884a51e2a24f6f 100644 --- a/tests/layer_tests/pytorch_tests/test_repeat.py +++ b/tests/layer_tests/pytorch_tests/test_repeat.py @@ -77,4 +77,4 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit def test_repeat_t5(self, ie_device, precision, ir_version): - self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True) + self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_rsub.py b/tests/layer_tests/pytorch_tests/test_rsub.py index 64c4b9619d7b73..9c144ad4da247b 100644 --- a/tests/layer_tests/pytorch_tests/test_rsub.py +++ b/tests/layer_tests/pytorch_tests/test_rsub.py @@ -40,9 +40,9 @@ def forward(self, x, y:int, alpha: float): @pytest.mark.nightly @pytest.mark.precommit - def test_rsub(self, ie_device, precision, ir_version, input_data): + def test_rsub_f(self, ie_device, precision, ir_version, input_data): self.input_data = input_data - self._test(*self.create_model(second_type="float"), ie_device, precision, ir_version) + self._test(*self.create_model(second_type="float"), ie_device, precision, ir_version, use_convert_model=True) @pytest.mark.parametrize('input_data', [(np.random.randn(2, 3, 4).astype(np.float32), np.array(5).astype(int), @@ -50,9 +50,9 @@ def test_rsub(self, ie_device, precision, ir_version, input_data): @pytest.mark.nightly @pytest.mark.precommit - def test_rsub(self, ie_device, precision, ir_version, input_data): + def test_rsub_i(self, ie_device, precision, ir_version, input_data): self.input_data = input_data - self._test(*self.create_model(second_type="int"), ie_device, precision, ir_version) + self._test(*self.create_model(second_type="int"), ie_device, precision, ir_version, use_convert_model=True) class TestRsubTypes(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_scaled_dot_product_attention.py b/tests/layer_tests/pytorch_tests/test_scaled_dot_product_attention.py index 22ed325471823b..69c600a0b7562d 100644 --- a/tests/layer_tests/pytorch_tests/test_scaled_dot_product_attention.py +++ b/tests/layer_tests/pytorch_tests/test_scaled_dot_product_attention.py @@ -36,6 +36,7 @@ def forward(self, query, key, value): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize(['mask', "is_causal"], [(False, False), (False, True), (True, True), (True, False)]) def test_scaled_dot_product_atten(self, ie_device, precision, ir_version, mask, is_causal): self._test(*self.create_model(mask, is_causal),ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_strided_const.py b/tests/layer_tests/pytorch_tests/test_strided_const.py index 438edbc88e24fe..ab33e92f88b4b8 100644 --- a/tests/layer_tests/pytorch_tests/test_strided_const.py +++ b/tests/layer_tests/pytorch_tests/test_strided_const.py @@ -31,4 +31,4 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit def test_strides(self, ie_device, precision, ir_version): - self._test(*self.create_model(), ie_device, precision, ir_version) + self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_sub.py b/tests/layer_tests/pytorch_tests/test_sub.py index aa97b0f23653fb..381d1672454cbe 100644 --- a/tests/layer_tests/pytorch_tests/test_sub.py +++ b/tests/layer_tests/pytorch_tests/test_sub.py @@ -50,7 +50,7 @@ def _forward_inplace(self, x, y, alpha: float): @pytest.mark.precommit def test_sub(self, ie_device, precision, ir_version, input_data, inplace): self.input_data = input_data - self._test(*self.create_model(inplace), ie_device, precision, ir_version) + self._test(*self.create_model(inplace), ie_device, precision, ir_version, use_convert_model=True) class TestSubTypes(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_transpose.py b/tests/layer_tests/pytorch_tests/test_transpose.py index b3378761da8c74..5dec55ec59c597 100644 --- a/tests/layer_tests/pytorch_tests/test_transpose.py +++ b/tests/layer_tests/pytorch_tests/test_transpose.py @@ -91,4 +91,5 @@ def test_t_small(self, num_dims, input_dtype, mode, ie_device, precision, ir_ver precision, ir_version, kwargs_to_prepare_input={"num_dims": num_dims, "input_dtype": input_dtype}, + use_convert_model=True, ) diff --git a/tests/layer_tests/pytorch_tests/test_tuple_construct.py b/tests/layer_tests/pytorch_tests/test_tuple_construct.py index 1582df48c4b370..45413a940f132b 100644 --- a/tests/layer_tests/pytorch_tests/test_tuple_construct.py +++ b/tests/layer_tests/pytorch_tests/test_tuple_construct.py @@ -60,7 +60,7 @@ def forward(self, x): @pytest.mark.parametrize("case", ["single", "multiple", "none", "list", "tensor_tail", "list_and_tuple"]) @pytest.mark.nightly def test_tuple_construct(self, case, ie_device, precision, ir_version): - self._test(*self.create_model(case), ie_device, precision, ir_version) + self._test(*self.create_model(case), ie_device, precision, ir_version, use_convert_model=True) class TestTupleConstructTupleUnpack(PytorchLayerTest): @@ -86,7 +86,7 @@ def prepare_input(self, x): @pytest.mark.nightly def test_tuple_construct_unpack(self, ie_device, precision, ir_version): self._test(*self.create_model(), ie_device, - precision, ir_version, freeze_model=False) + precision, ir_version, freeze_model=False, use_convert_model=True) class TestTupleUnpackParameterSingle(PytorchLayerTest): @@ -208,7 +208,7 @@ def some_func(self, x: Tuple[torch.Tensor, torch.Tensor]): @pytest.mark.nightly def test(self, ie_device, precision, ir_version): self._test(*self.create_model(), ie_device, precision, - ir_version, trace_model=False, freeze_model=False) + ir_version, trace_model=False, freeze_model=False, use_convert_model=True) class TestTcOutsideTuInsideIfBody(PytorchLayerTest): @@ -236,4 +236,4 @@ def some_func(self, x: Tuple[torch.Tensor, torch.Tensor]): @pytest.mark.nightly def test(self, ie_device, precision, ir_version): self._test(*self.create_model(), ie_device, precision, - ir_version, trace_model=False, freeze_model=False) + ir_version, trace_model=False, freeze_model=False, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_var_mean.py b/tests/layer_tests/pytorch_tests/test_var_mean.py index 6ce85988e9edfb..bd8a5a10617eb4 100644 --- a/tests/layer_tests/pytorch_tests/test_var_mean.py +++ b/tests/layer_tests/pytorch_tests/test_var_mean.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest @@ -52,6 +54,8 @@ def forward(self, x): @pytest.mark.precommit @pytest.mark.parametrize("unbiased", [True, False]) @pytest.mark.parametrize("op_type", ["var", "var_mean", "std", "std_mean"]) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_var2args(self, unbiased, op_type, ie_device, precision, ir_version): self._test(*self.create_model(unbiased, op_type=op_type), ie_device, precision, ir_version) @@ -61,5 +65,7 @@ def test_var2args(self, unbiased, op_type, ie_device, precision, ir_version): @pytest.mark.parametrize("dim", [None, 0, 1, 2, 3, -1, -2, (0, 1), (-1, -2), (0, 1, -1), (0, 1, 2, 3)]) @pytest.mark.parametrize("keepdim", [True, False]) @pytest.mark.parametrize("op_type", ["var", "var_mean", "std", "std_mean"]) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_var(self, unbiased, dim, keepdim, op_type, ie_device, precision, ir_version): self._test(*self.create_model(unbiased, dim, keepdim, two_args_case=False, op_type=op_type), ie_device, precision, ir_version) \ No newline at end of file diff --git a/tests/layer_tests/tensorflow_lite_tests/test_tfl_BroadcastTo.py b/tests/layer_tests/tensorflow_lite_tests/test_tfl_BroadcastTo.py index bae3f51ce97ff0..6f3eb1b70ed2f2 100644 --- a/tests/layer_tests/tensorflow_lite_tests/test_tfl_BroadcastTo.py +++ b/tests/layer_tests/tensorflow_lite_tests/test_tfl_BroadcastTo.py @@ -1,3 +1,5 @@ +import platform + import pytest import tensorflow as tf @@ -29,5 +31,7 @@ def make_model(self, params): @pytest.mark.parametrize("params", test_params) @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 123324') def test_broadcast_to(self, params, ie_device, precision, temp_dir): self._test(ie_device, precision, temp_dir, params) diff --git a/tests/layer_tests/tensorflow_lite_tests/test_tfl_RFFT2D.py b/tests/layer_tests/tensorflow_lite_tests/test_tfl_RFFT2D.py index b534878970ac59..1ae3464c207b34 100644 --- a/tests/layer_tests/tensorflow_lite_tests/test_tfl_RFFT2D.py +++ b/tests/layer_tests/tensorflow_lite_tests/test_tfl_RFFT2D.py @@ -1,3 +1,5 @@ +import platform + import pytest import tensorflow as tf @@ -30,5 +32,7 @@ def make_model(self, params): @pytest.mark.parametrize("params", test_params) @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 123324') def test_rfft2d(self, params, ie_device, precision, temp_dir): self._test(ie_device, precision, temp_dir, params) diff --git a/tests/layer_tests/tensorflow_lite_tests/test_tfl_SegmentSum.py b/tests/layer_tests/tensorflow_lite_tests/test_tfl_SegmentSum.py index a5ce2d314aee0b..c7339efaf7f55e 100644 --- a/tests/layer_tests/tensorflow_lite_tests/test_tfl_SegmentSum.py +++ b/tests/layer_tests/tensorflow_lite_tests/test_tfl_SegmentSum.py @@ -1,3 +1,5 @@ +import platform + import pytest import tensorflow as tf @@ -40,5 +42,7 @@ def make_model(self, params): @pytest.mark.parametrize("params", test_params) @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 123324') def test_segment_sum(self, params, ie_device, precision, temp_dir): self._test(ie_device, precision, temp_dir, params) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_AdjustContrastv2.py b/tests/layer_tests/tensorflow_tests/test_tf_AdjustContrastv2.py index 88944c50a38091..896e1789111eaa 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_AdjustContrastv2.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_AdjustContrastv2.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -38,6 +40,8 @@ def create_adjust_contrast_net(self, input_shape, input_type): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_adjust_contrast_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_adjust_contrast_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py b/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py index 62689f5609cc12..7e3964e68c9c35 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest @@ -126,6 +128,8 @@ def create_add_placeholder_const_net(self, x_shape, y_shape, ir_version, op_type 'Xdivy']) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_binary_op(self, params, ie_device, precision, ir_version, temp_dir, op_type, use_new_frontend, use_old_api): if precision == "FP16": diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py b/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py index d981b2997542b5..8ab60f9ac65beb 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -40,6 +42,8 @@ def create_bucketize_net(self, input_shape, input_type, boundaries_size): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_bucketize_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_bucketize_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_CropAndResize.py b/tests/layer_tests/tensorflow_tests/test_tf_CropAndResize.py index 92ef18ff5aba98..30cefc07c942d2 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_CropAndResize.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_CropAndResize.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -53,6 +55,8 @@ def create_crop_and_resize_net(self, image_shape, num_boxes, crop_size_value, me @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_crop_and_resize_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_crop_and_resize_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_DivNoNan.py b/tests/layer_tests/tensorflow_tests/test_tf_DivNoNan.py index 58db73ece154e1..5a6f3883185f23 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_DivNoNan.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_DivNoNan.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -41,6 +43,8 @@ def create_div_no_nan_net(self, input_shape, input_type): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_div_no_nan_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_div_no_nan_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_FakeQuantWithMinMaxVars.py b/tests/layer_tests/tensorflow_tests/test_tf_FakeQuantWithMinMaxVars.py index 43d8da8e38019d..191b46e035a376 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_FakeQuantWithMinMaxVars.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_FakeQuantWithMinMaxVars.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -42,6 +44,8 @@ def create_fake_quant_with_min_max_vars_net(self, inputs_shape, min_value, max_v ]) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_fake_quant_with_min_max_vars_basic(self, params, fake_quant_op, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): diff --git a/tests/layer_tests/tensorflow_tests/test_tf_If.py b/tests/layer_tests/tensorflow_tests/test_tf_If.py index 0e4e7a6fb249e5..20085e6ac86672 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_If.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_If.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -67,6 +69,8 @@ def else_branch(): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_if_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): if ie_device == 'GPU': @@ -137,6 +141,8 @@ def else_branch(): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_if_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): if ie_device == 'GPU': @@ -215,6 +221,8 @@ def else_branch(): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_if_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): if ie_device == 'GPU': @@ -305,6 +313,8 @@ def else_branch(): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_if_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): if ie_device == 'GPU': diff --git a/tests/layer_tests/tensorflow_tests/test_tf_LeakyRelu.py b/tests/layer_tests/tensorflow_tests/test_tf_LeakyRelu.py index 1504ae706a9b19..ea672ac144d987 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_LeakyRelu.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_LeakyRelu.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest import tensorflow as tf from common.tf_layer_test_class import CommonTFLayerTest @@ -31,6 +33,8 @@ def create_leaky_relu_net(self, x_shape, alpha_value): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_leaky_relu_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_leaky_relu_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_LinSpace.py b/tests/layer_tests/tensorflow_tests/test_tf_LinSpace.py index c696eaaa0355e5..216fe7b7816de4 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_LinSpace.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_LinSpace.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest import tensorflow as tf from common.tf_layer_test_class import CommonTFLayerTest @@ -28,6 +30,8 @@ def create_lin_space_net(self, num_value): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_lin_space_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_lin_space_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_LogSoftmax.py b/tests/layer_tests/tensorflow_tests/test_tf_LogSoftmax.py index bef52905aa3159..063e310dd8174a 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_LogSoftmax.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_LogSoftmax.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -39,6 +41,8 @@ def create_log_softmax_net(self, logits_shape): @pytest.mark.precommit @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_log_softmax_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_log_softmax_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_MaxPoolWithArgmax.py b/tests/layer_tests/tensorflow_tests/test_tf_MaxPoolWithArgmax.py index 4d1fed5747ba11..f08995f3c09d11 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_MaxPoolWithArgmax.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_MaxPoolWithArgmax.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -59,6 +61,8 @@ def create_max_pool_with_argmax_net(self, input_shape, ksize, strides, input_typ ]) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_max_pool_with_argmax_basic(self, params, input_type, padding, targmax, include_batch_in_index, with_second_output, ie_device, precision, ir_version, temp_dir, diff --git a/tests/layer_tests/tensorflow_tests/test_tf_NormalizeL2.py b/tests/layer_tests/tensorflow_tests/test_tf_NormalizeL2.py index 51a1b322af6541..5de76778d1d837 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_NormalizeL2.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_NormalizeL2.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from common.tf_layer_test_class import CommonTFLayerTest @@ -30,6 +32,8 @@ def create_normalize_l2_net(shape, axes): @pytest.mark.precommit @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_normalize_l2_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_normalize_l2_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py b/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py index eb3ac133b3687d..7c523740d79f96 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from common.layer_test_class import check_ir_version from common.tf_layer_test_class import CommonTFLayerTest @@ -145,6 +147,8 @@ def create_pooling_net(self, kernel_size, strides, pads, in_shape, out_shape, me @pytest.mark.parametrize("params", test_data_4D) @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_pool_4D(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_pooling_net(**params, ir_version=ir_version, @@ -227,6 +231,8 @@ def test_pool_4D(self, params, ie_device, precision, ir_version, temp_dir, use_n @pytest.mark.parametrize("params", test_data_5D) @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_pool_5D(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): if ie_device == 'GPU': diff --git a/tests/layer_tests/tensorflow_tests/test_tf_RandomUniform.py b/tests/layer_tests/tensorflow_tests/test_tf_RandomUniform.py index 0006afd9ab9eca..1f5f778db3ac2f 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_RandomUniform.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_RandomUniform.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest import tensorflow as tf from common.layer_test_class import check_ir_version @@ -88,6 +90,8 @@ def create_tf_random_uniform_net(self, global_seed, op_seed, x_shape, min_val, m @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_tf_fe + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_random_uniform_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): if ie_device == 'GPU': diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Resize.py b/tests/layer_tests/tensorflow_tests/test_tf_Resize.py index 184a8115772128..c62492c7a76196 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Resize.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Resize.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -60,6 +62,8 @@ def create_resize_net(self, images_shape, images_type, size_value, align_corners @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_resize_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_resize_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_ScatterND.py b/tests/layer_tests/tensorflow_tests/test_tf_ScatterND.py index dac986b96c281e..26ddcfdd53bcc2 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_ScatterND.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_ScatterND.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from common.tf_layer_test_class import CommonTFLayerTest @@ -69,6 +71,8 @@ def create_tf_scatternd_placeholder_const_net(self, x_shape, indices, updates, i @pytest.mark.parametrize("params", test_data) @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_tf_scatter_nd(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_tf_scatternd_placeholder_const_net(**params, ir_version=ir_version, diff --git a/tests/layer_tests/tensorflow_tests/test_tf_SegmentSum.py b/tests/layer_tests/tensorflow_tests/test_tf_SegmentSum.py index 5d74c361f51c20..f0f99d4b9cf95f 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_SegmentSum.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_SegmentSum.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -41,6 +43,8 @@ def create_segment_sum_net(self, data_shape, segment_ids_shape, data_type, segme @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_segment_sum_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): if not use_new_frontend: diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Softmax.py b/tests/layer_tests/tensorflow_tests/test_tf_Softmax.py index fc9391feaae3e8..574fe3d32949f7 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Softmax.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Softmax.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -34,6 +36,8 @@ def create_softmax_net(self, input_shape): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_softmax_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_softmax_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py b/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py index b0f24322b01041..03e83dc39e9c8d 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from common.tf_layer_test_class import CommonTFLayerTest @@ -33,6 +35,8 @@ def create_space_to_batch_net(self, in_shape, pads_value, block_shape_value): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_space_to_batch_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_space_to_batch_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_TensorArrayOps.py b/tests/layer_tests/tensorflow_tests/test_tf_TensorArrayOps.py new file mode 100644 index 00000000000000..098f099f74d24d --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_TensorArrayOps.py @@ -0,0 +1,200 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +def create_tensor_array(data_shape, data_type): + size = data_shape[0] + data = tf.compat.v1.placeholder(data_type, data_shape, 'data') + indices = tf.compat.v1.placeholder(tf.int32, [size], 'indices') + size_const = tf.constant(size, dtype=tf.int32, shape=[]) + handle, flow = tf.raw_ops.TensorArrayV3(size=size_const, dtype=tf.as_dtype(data_type)) + flow = tf.raw_ops.TensorArrayScatterV3(handle=handle, indices=indices, value=data, flow_in=flow) + return handle, flow + + +class TestTensorArraySizeV3(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'data' in inputs_info + assert 'indices' in inputs_info + data_shape = inputs_info['data'] + inputs_data = {} + rng = np.random.default_rng() + inputs_data['data'] = rng.integers(-10, 10, data_shape).astype(self.data_type) + inputs_data['indices'] = rng.permutation(self.size).astype(np.int32) + return inputs_data + + def create_tensor_array_size_v3(self, data_shape, data_type): + size = data_shape[0] + self.data_type = data_type + self.size = size + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + handle, flow = create_tensor_array(data_shape, data_type) + tf.raw_ops.TensorArraySizeV3(handle=handle, flow_in=flow) + tf.raw_ops.TensorArrayCloseV3(handle=handle) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(data_shape=[5], data_type=np.float32), + dict(data_shape=[10, 20, 30], data_type=np.int32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_tensor_array_size_v3(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_tensor_array_size_v3(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) + + +class TestTensorArrayReadV3(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'data' in inputs_info + assert 'indices' in inputs_info + data_shape = inputs_info['data'] + inputs_data = {} + rng = np.random.default_rng() + inputs_data['data'] = rng.integers(-10, 10, data_shape).astype(self.data_type) + inputs_data['index_to_read'] = rng.integers(0, data_shape[0], []).astype(np.int32) + inputs_data['indices'] = rng.permutation(self.size).astype(np.int32) + return inputs_data + + def create_tensor_array_read_v3(self, data_shape, data_type): + size = data_shape[0] + self.data_type = data_type + self.size = size + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + handle, flow = create_tensor_array(data_shape, data_type) + index_to_read = tf.compat.v1.placeholder(tf.int32, [], 'index_to_read') + tf.raw_ops.TensorArrayReadV3(handle=handle, index=index_to_read, flow_in=flow, + dtype=tf.dtypes.as_dtype(data_type)) + tf.raw_ops.TensorArrayCloseV3(handle=handle) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(data_shape=[6], data_type=np.float32), + dict(data_shape=[8, 5, 6, 10], data_type=np.int32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_tensor_array_read_v3(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_tensor_array_read_v3(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) + + +class TestTensorArrayWriteGatherV3(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'data' in inputs_info + assert 'indices' in inputs_info + assert 'value_to_write' in inputs_info + data_shape = inputs_info['data'] + value_shape = inputs_info['value_to_write'] + inputs_data = {} + rng = np.random.default_rng() + inputs_data['data'] = rng.integers(-10, 10, data_shape).astype(self.data_type) + inputs_data['value_to_write'] = rng.integers(-10, 10, value_shape).astype(self.data_type) + indices_data = rng.permutation(self.size).astype(np.int32) + inputs_data['indices'] = np.delete(indices_data, np.where(indices_data == self.index_to_write)) + return inputs_data + + def create_tensor_array_write_v3(self, size, data_shape, data_type, index_to_write, indices_to_gather): + self.data_type = data_type + self.size = size + self.index_to_write = index_to_write + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + value_to_write = tf.compat.v1.placeholder(data_type, data_shape[1:], 'value_to_write') + index_to_write_const = tf.constant(index_to_write, dtype=tf.int32, shape=[]) + indices_to_gather_const = tf.constant(indices_to_gather, dtype=tf.int32, shape=[len(indices_to_gather)]) + data = tf.compat.v1.placeholder(data_type, data_shape, 'data') + indices = tf.compat.v1.placeholder(tf.int32, [size - 1], 'indices') + size_const = tf.constant(size, dtype=tf.int32, shape=[]) + handle, flow = tf.raw_ops.TensorArrayV3(size=size_const, dtype=tf.as_dtype(data_type)) + flow = tf.raw_ops.TensorArrayScatterV3(handle=handle, indices=indices, value=data, flow_in=flow) + flow = tf.raw_ops.TensorArrayWriteV3(handle=handle, index=index_to_write_const, + value=value_to_write, flow_in=flow) + tf.raw_ops.TensorArrayGatherV3(handle=handle, indices=indices_to_gather_const, flow_in=flow, + dtype=tf.dtypes.as_dtype(data_type)) + tf.raw_ops.TensorArrayCloseV3(handle=handle) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(size=7, data_shape=[6], data_type=np.float32, index_to_write=3, indices_to_gather=[0, 3, 1]), + dict(size=10, data_shape=[9, 2, 4], data_type=np.int32, index_to_write=2, indices_to_gather=[2, 1, 4, 3]), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_tensor_array_write_v3(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_tensor_array_write_v3(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) + + +class TestTensorArrayConcatV3(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'data' in inputs_info + assert 'indices' in inputs_info + data_shape = inputs_info['data'] + inputs_data = {} + rng = np.random.default_rng() + inputs_data['data'] = rng.integers(-10, 10, data_shape).astype(self.data_type) + inputs_data['indices'] = rng.permutation(self.size).astype(np.int32) + return inputs_data + + def create_tensor_array_concat_v3(self, data_shape, data_type): + size = data_shape[0] + self.data_type = data_type + self.size = size + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + handle, flow = create_tensor_array(data_shape, data_type) + tensor_array_concat_v3 = tf.raw_ops.TensorArrayConcatV3(handle=handle, flow_in=flow, + dtype=tf.as_dtype(data_type)) + tf.identity(tensor_array_concat_v3[0], name='values') + tf.identity(tensor_array_concat_v3[1], name='length') + tf.raw_ops.TensorArrayCloseV3(handle=handle) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(data_shape=[5, 3, 11, 2], data_type=np.int32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_tensor_array_concat_v3(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_tensor_array_concat_v3(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_ToBool.py b/tests/layer_tests/tensorflow_tests/test_tf_ToBool.py new file mode 100644 index 00000000000000..74da79c36d52a1 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_ToBool.py @@ -0,0 +1,43 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestToBool(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'x' in inputs_info + x_shape = inputs_info['x'] + inputs_data = {} + inputs_data['x'] = np.random.randint(-10, 10, x_shape).astype(np.float32) + + return inputs_data + + def create_tobool_net(self, input_shape, input_type): + self.input_type = input_type + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(input_type, input_shape, 'x') + tf.raw_ops.ToBool(input=x) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[10, 20], input_type=np.float32), + dict(input_shape=[2, 3, 4], input_type=np.float32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_to_bool_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_tobool_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) \ No newline at end of file diff --git a/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py b/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py index ece6f08471a643..73efaf490b23dd 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -46,6 +48,8 @@ def create_topk_v2_net(self, input_shape, input_type, k, sorted, is_first_output @pytest.mark.parametrize("params", test_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_topk_v2_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_topk_v2_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_TruncateDiv.py b/tests/layer_tests/tensorflow_tests/test_tf_TruncateDiv.py new file mode 100644 index 00000000000000..18440dbcd7f44a --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_TruncateDiv.py @@ -0,0 +1,53 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import platform + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestTruncateDiv(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'x' in inputs_info + assert 'y' in inputs_info + x_shape = inputs_info['x'] + y_shape = inputs_info['y'] + inputs_data = {} + # generate x and y to ensure truncation + inputs_data['x'] = np.random.randint(-10, 10, x_shape).astype(self.input_type) + inputs_data['y'] = np.random.randint(1, 10, y_shape).astype(self.input_type) + return inputs_data + + def create_truncate_div_net(self, input_shape, input_type): + self.input_type = input_type + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(input_type, input_shape, 'x') + y = tf.compat.v1.placeholder(input_type, input_shape, 'y') + tf.raw_ops.TruncateDiv(x=x, y=y) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[10, 20], input_type=np.float32), + dict(input_shape=[8, 5], input_type=np.float32), + dict(input_shape=[5, 3], input_type=np.int32), + dict(input_shape=[6, 4], input_type=np.int32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') + def test_truncate_div_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_truncate_div_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_TruncateMod.py b/tests/layer_tests/tensorflow_tests/test_tf_TruncateMod.py new file mode 100644 index 00000000000000..48b738095c8bb0 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_TruncateMod.py @@ -0,0 +1,49 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestTruncateMod(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'x' in inputs_info + assert 'y' in inputs_info + x_shape = inputs_info['x'] + y_shape = inputs_info['y'] + inputs_data = {} + # generate x and y to ensure truncation + inputs_data['x'] = np.random.randint(-10, 10, x_shape).astype(self.input_type) + inputs_data['y'] = np.random.randint(1, 10, y_shape).astype(self.input_type) + return inputs_data + + def create_truncate_mod_net(self, input_shape, input_type): + self.input_type = input_type + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(input_type, input_shape, 'x') + y = tf.compat.v1.placeholder(input_type, input_shape, 'y') + tf.raw_ops.TruncateMod(x=x, y=y) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[10, 20], input_type=np.float32), + dict(input_shape=[8, 5], input_type=np.float32), + dict(input_shape=[5, 3], input_type=np.int32), + dict(input_shape=[6, 4], input_type=np.int32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_truncate_mod_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_truncate_mod_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_UnsortedSegmentSum.py b/tests/layer_tests/tensorflow_tests/test_tf_UnsortedSegmentSum.py index 09afd6f26330ca..f7dcf2eeb324f2 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_UnsortedSegmentSum.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_UnsortedSegmentSum.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -55,6 +57,8 @@ def create_unsorted_segment_sum_net(self, data_shape, segment_ids_shape, num_seg ]) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_unsorted_segment_sum_basic(self, params, data_type, segment_ids_type, num_segments_type, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Xlog1py.py b/tests/layer_tests/tensorflow_tests/test_tf_Xlog1py.py index 7c80fbdad88b09..4da47e7b5356c4 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Xlog1py.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Xlog1py.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -42,6 +44,8 @@ def create_xlog1py_net(self, input_shape, input_type): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_xlog1py_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_xlog1py_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Xlogy.py b/tests/layer_tests/tensorflow_tests/test_tf_Xlogy.py index 6ecddeb439aed3..911c3b0eea2154 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Xlogy.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Xlogy.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -42,6 +44,8 @@ def create_xlogy_net(self, input_shape, input_type): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_xlogy_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_xlogy_net(**params), diff --git a/tests/model_hub_tests/torch_tests/hf_transformers_models b/tests/model_hub_tests/torch_tests/hf_transformers_models index 0618d98a4d9f31..31a24b681eb4c5 100644 --- a/tests/model_hub_tests/torch_tests/hf_transformers_models +++ b/tests/model_hub_tests/torch_tests/hf_transformers_models @@ -10,7 +10,6 @@ albert-base-v2,albert AlekseyKorshuk/test_reward_model,reward_model,skip,Load problem alibaba-damo/mgp-str-base,mgp-str,xfail,Compile error: unsupported Einsum allenai/hvila-block-layoutlm-finetuned-docbank,hierarchical_model,skip,Load problem -allenai/longformer-base-4096,longformer,xfail,Unsupported op aten::as_strided ameya772/sentence-t5-base-atis-fine-tuned,T5,skip,Load problem andreasmadsen/efficient_mlm_m0.40,roberta-prelayernorm anton-l/emformer-base-librispeech,emformer,skip,Load problem @@ -68,7 +67,7 @@ facebook/detr-resnet-50,detr facebook/dinov2-base,dinov2,skip,Load problem facebook/dpr-question_encoder-single-nq-base,dpr facebook/encodec_24khz,encodec,xfail,Unsupported op aten::lstm -facebook/esm2_t6_8M_UR50D,esm,xfail,Tracing error: The values for attribute 'shape' do not match +facebook/esm2_t6_8M_UR50D,esm facebook/flava-full,flava,xfail,Tracing problem facebook/flava-image-codebook,flava_image_codebook,skip,Load problem facebook/m2m100_418M,m2m_100 @@ -123,10 +122,10 @@ hf-internal-testing/tiny-random-Data2VecAudioModel,data2vec-audio,skip,Load prob hf-internal-testing/tiny-random-Data2VecTextModel,data2vec-text hf-internal-testing/tiny-random-Data2VecVisionModel,data2vec-vision hf-internal-testing/tiny-random-DeiTModel,deit -hf-internal-testing/tiny-random-DonutSwinModel,donut-swin,xfail,Unsupported op aten::adaptive_avg_pool1d +hf-internal-testing/tiny-random-DonutSwinModel,donut-swin hf-internal-testing/tiny-random-EfficientFormerForImageClassification,efficientformer hf-internal-testing/tiny-random-flaubert,flaubert -hf-internal-testing/tiny-random-FocalNetModel,focalnet,xfail,Unsupported op aten::adaptive_avg_pool1d +hf-internal-testing/tiny-random-FocalNetModel,focalnet hf-internal-testing/tiny-random-GPTBigCodeForCausalLM,gpt_bigcode,xfail,Conversion is failed for: aten::mul hf-internal-testing/tiny-random-GPTJModel,gptj hf-internal-testing/tiny-random-groupvit,groupvit @@ -155,7 +154,7 @@ hf-internal-testing/tiny-random-Speech2TextModel,speech_to_text,skip,Load proble hf-internal-testing/tiny-random-speech-encoder-decoder,speech-encoder-decoder,skip,Load problem hf-internal-testing/tiny-random-SplinterModel,splinter hf-internal-testing/tiny-random-SqueezeBertModel,squeezebert -hf-internal-testing/tiny-random-SwinModel,swin,xfail,Unsupported op aten::adaptive_avg_pool1d +hf-internal-testing/tiny-random-SwinModel,swin hf-internal-testing/tiny-random-unispeech,unispeech,skip,Load problem hf-internal-testing/tiny-random-UniSpeechSatModel,unispeech-sat,skip,Load problem hf-internal-testing/tiny-random-vision_perceiver_conv,perceiver @@ -247,7 +246,7 @@ microsoft/markuplm-base,markuplm microsoft/resnet-50,resnet microsoft/speecht5_hifigan,hifigan,skip,Load problem microsoft/speecht5_tts,speecht5,skip,Load problem -microsoft/swinv2-tiny-patch4-window8-256,swinv2,xfail,Unsupported op aten::adaptive_avg_pool1d +microsoft/swinv2-tiny-patch4-window8-256,swinv2 microsoft/table-transformer-detection,table-transformer microsoft/wavlm-large,wavlm,skip,Load problem microsoft/xclip-base-patch32,xclip,skip,Load problem @@ -301,7 +300,6 @@ pie/example-re-textclf-tacred,TransformerTextClassificationModel,skip,Load probl pleisto/yuren-baichuan-7b,multimodal_llama,skip,Load problem predictia/europe_reanalysis_downscaler_convbaseline,convbilinear,skip,Load problem predictia/europe_reanalysis_downscaler_convswin2sr,conv_swin2sr,skip,Load problem -pszemraj/led-large-book-summary,led,xfail,Unsupported op aten::as_strided qmeeus/whisper-small-ner-combined,whisper_for_slu,skip,Load problem raman-ai/pcqv2-tokengt-lap16,tokengt,skip,Load problem range3/pegasus-gpt2-medium,pegasusgpt2,skip,Load problem @@ -330,8 +328,8 @@ sheonhan/ict-imagenet-256,ict,skip,Load problem shibing624/text2vec-base-chinese-paraphrase,ernie shikhartuli/flexibert-mini,flexibert,skip,Load problem shikras/shikra-7b-delta-v1-0708,shikra,skip,Load problem -shi-labs/dinat-mini-in1k-224,dinat,xfail,Unsupported op aten::adaptive_avg_pool1d -shi-labs/nat-mini-in1k-224,nat,xfail,Unsupported op aten::adaptive_avg_pool1d +shi-labs/dinat-mini-in1k-224,dinat,xfail,Accuracy validation failed +shi-labs/nat-mini-in1k-224,nat,xfail,Accuracy validation failed shi-labs/oneformer_ade20k_swin_large,oneformer,skip,Load problem shuqi/seed-encoder,seed_encoder,skip,Load problem sijunhe/nezha-cn-base,nezha diff --git a/tests/model_hub_tests/torch_tests/test_hf_transformers.py b/tests/model_hub_tests/torch_tests/test_hf_transformers.py index 184e725a04f9b9..caeb2e0ff2a01d 100644 --- a/tests/model_hub_tests/torch_tests/test_hf_transformers.py +++ b/tests/model_hub_tests/torch_tests/test_hf_transformers.py @@ -292,7 +292,8 @@ def teardown_method(self): cleanup_dir(hf_hub_cache_dir) super().teardown_method() - @pytest.mark.parametrize("name,type", [("bert-base-uncased", "bert"), + @pytest.mark.parametrize("name,type", [("allenai/led-base-16384", "led"), + ("bert-base-uncased", "bert"), ("facebook/bart-large-mnli", "bart"), ("google/flan-t5-base", "t5"), ("google/tapas-large-finetuned-wtq", "tapas"), diff --git a/thirdparty/dependencies.cmake b/thirdparty/dependencies.cmake index fac4752c318250..4eed13c9a79af6 100644 --- a/thirdparty/dependencies.cmake +++ b/thirdparty/dependencies.cmake @@ -414,14 +414,14 @@ if(ENABLE_OV_PADDLE_FRONTEND OR ENABLE_OV_ONNX_FRONTEND OR ENABLE_OV_TF_FRONTEND if(CMAKE_VERBOSE_MAKEFILE) set(Protobuf_DEBUG ON) endif() - if(OV_VCPKG_BUILD) - set(protobuf_config CONFIG) - endif() # try to find newer version first (major is changed) # see https://protobuf.dev/support/version-support/ and # https://github.com/protocolbuffers/protobuf/commit/d61f75ff6db36b4f9c0765f131f8edc2f86310fa - find_package(Protobuf 4.22.0 QUIET ${protobuf_config}) + find_package(Protobuf 4.22.0 QUIET CONFIG) if(NOT Protobuf_FOUND) + if(OV_VCPKG_BUILD) + set(protobuf_config CONFIG) + endif() # otherwise, fallback to existing default find_package(Protobuf 3.20.3 REQUIRED ${protobuf_config}) endif() diff --git a/thirdparty/fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp b/thirdparty/fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp index 5c4c6f7031e6d8..5a6f237b27cda1 100644 --- a/thirdparty/fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp +++ b/thirdparty/fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp @@ -1214,7 +1214,7 @@ or column if there are N channels, or have N columns if there is a single channe @param src Input set of 2D points stored in one of possible containers: Mat, std::vector, std::vector, std::vector. @param distType Distance used by the M-estimator, see #DistanceTypes. @ref DIST_USER -and @ref DIST_C are not suppored. +and @ref DIST_C are not supported. @param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value is chosen. @param reps Sufficient accuracy for the radius (distance between the coordinate origin and the @@ -1286,7 +1286,7 @@ or column if there are N channels, or have N columns if there is a single channe @param src Input set of 3D points stored in one of possible containers: Mat, std::vector, std::vector, std::vector. @param distType Distance used by the M-estimator, see #DistanceTypes. @ref DIST_USER -and @ref DIST_C are not suppored. +and @ref DIST_C are not supported. @param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value is chosen. @param reps Sufficient accuracy for the radius (distance between the coordinate origin and the diff --git a/thirdparty/open_model_zoo b/thirdparty/open_model_zoo index e0e434f64a4da0..bb98fe444c84d6 160000 --- a/thirdparty/open_model_zoo +++ b/thirdparty/open_model_zoo @@ -1 +1 @@ -Subproject commit e0e434f64a4da07274c31c1aae48fbdcfa087fb0 +Subproject commit bb98fe444c84d67fd67ee7ec15a340722c652053 diff --git a/tools/constraints.txt b/tools/constraints.txt index 18a3080d3a1e78..2e1588a005e03f 100644 --- a/tools/constraints.txt +++ b/tools/constraints.txt @@ -18,4 +18,4 @@ pyenchant>=3.0.0 test-generator==0.1.1 py>=1.9.0 urllib3>=1.26.4 -openvino-telemetry>=2023.1.0 +openvino-telemetry>=2023.2.1 diff --git a/tools/mo/openvino/tools/mo/convert_impl.py b/tools/mo/openvino/tools/mo/convert_impl.py index ae6c39a144b0a3..9d683f4b6ac977 100644 --- a/tools/mo/openvino/tools/mo/convert_impl.py +++ b/tools/mo/openvino/tools/mo/convert_impl.py @@ -312,8 +312,8 @@ def update_fallback_with_conversion_error(use_new_frontend: bool, is_tf: bool, e conversion_error_re = r"^(\[TensorFlow\ Frontend\]\ Internal\ error\,\ no\ translator\ found\ for\ operation\(s\)\:\ )((\w+)(\,\ \w+)*)$" conversion_error_match = re.findall(conversion_error_re, ex_msg, re.MULTILINE) all_fallback_operations = [ - # corresponds to TF1 TensorList operation - "TensorArrayScatterV3", "TensorArrayV3", "TensorArraySizeV3", "TensorArrayGatherV3", + # corresponds to TF1 While operation + "LoopCond", "Enter", "NextIteration", "Exit", "Switch", "Merge", # corresponds to operations with complex tensors "FFT", "FFT2D", "FFT3D", "IFFT", "IFFT2D", "IFFT3D", "RFFT", "RFFT2D", "RFFT3D", "IRFFT", "IRFFT2D", "IRFFT3D", diff --git a/tools/mo/openvino/tools/mo/ops/Cast.py b/tools/mo/openvino/tools/mo/ops/Cast.py index 77beb07c74122e..24409912429f07 100644 --- a/tools/mo/openvino/tools/mo/ops/Cast.py +++ b/tools/mo/openvino/tools/mo/ops/Cast.py @@ -36,12 +36,14 @@ def backend_attrs(self): @staticmethod def type_infer(node: Node): - assert node.has_valid('dst_type'), 'Destination type of "Cast" operation should be extracted earlier' + assert node.has_valid( + 'dst_type'), 'Destination type of "Cast" operation should be extracted earlier' node.out_port(0).set_data_type(node.dst_type) @staticmethod def helper_value_propagation(node_name, value, dst_type): - new_blob, finite_match_count, zero_match_count = convert_blob(value, dst_type) + new_blob, finite_match_count, zero_match_count = convert_blob( + value, dst_type) if finite_match_count: log.error("{} elements of {} were clipped to infinity while converting an input blob for node '{}' to {}." @@ -63,6 +65,10 @@ def custom_type_casting_and_packing(node: Node, value, dst_type): we would pad them to 6 element with the last element as zero and we would pack them into 3 uint8 values """ assert dst_type in [packed_U4, packed_I4] + # TODO: Remove this comment when it's clear that we can fix it easily + # raise Exception("Packing of u4/i4 data is no longer supported in mo because it is now incompatible with the new " + # "order of the halfs of a byte that was introduced in OpenVINO runtime recently. Use ovc " + # "command line tool or openvino.convert_model python function instead.") minimum_regular_dtype = np.uint8 if dst_type == packed_U4 else np.int8 # initial casing from the source type to the numpy-friendly type which could absorb all the values of dst_type @@ -83,10 +89,12 @@ def custom_type_casting_and_packing(node: Node, value, dst_type): padded = np.concatenate((flattened, np.zeros([pad], dtype=minimum_regular_dtype))) assert np.prod(padded.shape) % num_values_fitting_into_uint8 == 0 - bit_order_little = (padded[:, None] & (1 << np.arange(num_bits)) > 0).astype(np.uint8) - bit_order_big = np.flip(bit_order_little, axis=1) - bit_order_big_flattened = bit_order_big.flatten() - packed = np.packbits(bit_order_big_flattened) + bit_order_little = (padded[:, None] & ( + 1 << np.arange(num_bits)) > 0).astype(np.uint8) + bit_order_big_flattened = bit_order_little.flatten() + # u1 still has reversed bit order: + packed = np.packbits(bit_order_big_flattened, + bitorder='little' if num_bits > 1 else 'big') node.out_node(0)['force_shape'] = data_shape.copy() node.out_node(0)['force_type'] = np_data_type_to_precision(dst_type) diff --git a/tools/mo/openvino/tools/mo/ops/multinomial.py b/tools/mo/openvino/tools/mo/ops/multinomial.py index 42f4b0d3eedbb9..233a9d4565f4c1 100644 --- a/tools/mo/openvino/tools/mo/ops/multinomial.py +++ b/tools/mo/openvino/tools/mo/ops/multinomial.py @@ -3,7 +3,7 @@ import numpy as np -from openvino.tools.mo.front.common.partial_infer.utils import dynamic_dimension +from openvino.tools.mo.front.common.partial_infer.utils import dynamic_dimension_value, shape_array from openvino.tools.mo.front.extractor import bool_to_str from openvino.tools.mo.graph.graph import Graph, Node @@ -63,7 +63,7 @@ def infer(node: Node): num_samples = node.in_port(1).data.get_value() if num_samples is not None: - output_shape.append(num_samples) + output_shape.append(np.array(num_samples).item()) else: - output_shape.append(dynamic_dimension) - node.out_port(0).data.set_shape(output_shape) + output_shape.append(dynamic_dimension_value) + node.out_port(0).data.set_shape(shape_array(output_shape)) diff --git a/tools/mo/openvino/tools/mo/utils/telemetry_utils.py b/tools/mo/openvino/tools/mo/utils/telemetry_utils.py index 802986edf4c4c0..e2cdd0b53f61d0 100644 --- a/tools/mo/openvino/tools/mo/utils/telemetry_utils.py +++ b/tools/mo/openvino/tools/mo/utils/telemetry_utils.py @@ -22,8 +22,13 @@ def init_mo_telemetry(app_name='Model Optimizer'): - return tm.Telemetry(tid=get_tid(), app_name=app_name, app_version=get_rt_version(), backend='ga4') - + return tm.Telemetry(tid=get_tid(), + app_name=app_name, + app_version=get_rt_version(), + backend='ga4', + enable_opt_in_dialog=False, + disable_in_ci=True + ) def send_framework_info(framework: str): """ diff --git a/tools/mo/unit_tests/mo/ops/cast_test.py b/tools/mo/unit_tests/mo/ops/cast_test.py index 985a7276514235..73a468e9fb80fa 100644 --- a/tools/mo/unit_tests/mo/ops/cast_test.py +++ b/tools/mo/unit_tests/mo/ops/cast_test.py @@ -21,81 +21,20 @@ class TestCastTest(): """ Example of checking: - 7 == 0111, padded to 0111 0000, results in 112 - 7 == 0111, 8 == 1000 packed to 0111 1000, results in 120 + 7 == 0111, padded to 00000111, results in 7 + 7 == 0111, 8 == 1000 packed to 10000111, results in 7+16 - -8 == 1000, padded to 1000 0000, results in 128 + -8 == 1000, padded to 00001000, results in 8 """ - @pytest.mark.parametrize("value, expected, custom_dtype",[ - ([0], [0], packed_U4), - ([1], [16], packed_U4), - ([2], [32], packed_U4), - ([3], [48], packed_U4), - ([4], [64], packed_U4), - ([5], [80], packed_U4), - ([6], [96], packed_U4), - ([7], [112], packed_U4), - ([8], [128], packed_U4), - ([9], [144], packed_U4), - ([10], [160], packed_U4), - ([11], [176], packed_U4), - ([12], [192], packed_U4), - ([13], [208], packed_U4), - ([14], [224], packed_U4), - ([15], [240], packed_U4), - - ([0, 15], [15], packed_U4), - ([1, 14], [30], packed_U4), - ([2, 13], [45], packed_U4), - ([3, 12], [60], packed_U4), - ([4, 11], [75], packed_U4), - ([5, 10], [90], packed_U4), - ([6, 9], [105], packed_U4), - ([7, 8], [120], packed_U4), - ([8, 7], [135], packed_U4), - ([9, 6], [150], packed_U4), - ([10, 5], [165], packed_U4), - ([11, 4], [180], packed_U4), - ([12, 3], [195], packed_U4), - ([13, 2], [210], packed_U4), - ([14, 1], [225], packed_U4), - ([15, 0], [240], packed_U4), - - ([-8], [128], packed_I4), - ([-7], [144], packed_I4), - ([-6], [160], packed_I4), - ([-5], [176], packed_I4), - ([-4], [192], packed_I4), - ([-3], [208], packed_I4), - ([-2], [224], packed_I4), - ([-1], [240], packed_I4), - ([0], [0], packed_I4), - ([1], [16], packed_I4), - ([2], [32], packed_I4), - ([3], [48], packed_I4), - ([4], [64], packed_I4), - ([5], [80], packed_I4), - ([6], [96], packed_I4), - ([7], [112], packed_I4), - - ([-8, 7], [135], packed_I4), - ([-7, 6], [150], packed_I4), - ([-6, 5], [165], packed_I4), - ([-5, 4], [180], packed_I4), - ([-4, 3], [195], packed_I4), - ([-3, 2], [210], packed_I4), - ([-2, 1], [225], packed_I4), - ([-1, 0], [240], packed_I4), - ([0, -1], [15], packed_I4), - ([1, -2], [30], packed_I4), - ([2, -3], [45], packed_I4), - ([3, -4], [60], packed_I4), - ([4, -5], [75], packed_I4), - ([5, -6], [90], packed_I4), - ([6, -7], [105], packed_I4), - ([7, -8], [120], packed_I4), - ]) + @pytest.mark.parametrize("value, expected, custom_dtype", + [([i], [i], packed_U4) for i in range(16)] + + [([i, 15-i], [i + (15-i)*16], packed_U4) for i in range(16)] + + [([-i], [16-i], packed_I4) for i in range(1, 8+1)] + + [([i], [i], packed_I4) for i in range(8)] + + [([-i-1, i], [16-i-1 + 16*i], packed_I4) for i in range(8)] + + [([i, -i-1], [i + 16*(16-i-1)], packed_I4) for i in range(8)] + ) def test_custom_value_propagation(self, value, expected, custom_dtype): graph = build_graph(nodes(value, custom_dtype), [ *connect('value', 'convert'), *connect('convert', 'output'), diff --git a/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py b/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py index 8d905d8f13129d..26ea01b77d6722 100644 --- a/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py +++ b/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py @@ -235,17 +235,13 @@ def test_freeze_placeholder_with_unknown_rank(self, inputs, inputs_data, expecte freeze_placeholder_with_value, input_shape, only_conversion, True) - def test_conversion_failure_fallback_default(self): + def test_conversion_tf1_while_default(self): self.basic("ctc_model_based.pbtxt", None, None, None, None, None, None, True, True, False, False) - @unittest.skipIf(platform == 'darwin', reason="Ticket - 122182") - def test_conversion_failure_fallback_use_new_frontend(self): - with self.assertRaisesRegex(Exception, - "\[TensorFlow Frontend\] Internal error, no translator found for operation\(s\)\: " - "TensorArrayGatherV3\, TensorArrayReadV3\, TensorArraySizeV3\, TensorArrayV3\, TensorArrayWriteV3"): - self.basic("ctc_model_based.pbtxt", None, None, None, None, - None, None, True, True, True, False) + def test_conversion_tf1_while_use_new_frontend(self): + self.basic("ctc_model_based.pbtxt", None, None, None, None, + None, None, True, True, True, False) @unittest.skip("88349: Fix auto-pruning in legacy FE") def test_conversion_model_oneshot_iterator_use_legacy_frontend(self): diff --git a/tools/ovc/openvino/tools/ovc/telemetry_utils.py b/tools/ovc/openvino/tools/ovc/telemetry_utils.py index 87e0132ccd17a6..42232b0839a6be 100644 --- a/tools/ovc/openvino/tools/ovc/telemetry_utils.py +++ b/tools/ovc/openvino/tools/ovc/telemetry_utils.py @@ -17,7 +17,13 @@ def init_mo_telemetry(app_name='Model Conversion API'): - return tm.Telemetry(tid=get_tid(), app_name=app_name, app_version=get_rt_version(), backend='ga4') + return tm.Telemetry(tid=get_tid(), + app_name=app_name, + app_version=get_rt_version(), + backend='ga4', + enable_opt_in_dialog=False, + disable_in_ci=True + ) def send_framework_info(framework: str): """