diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml
deleted file mode 100644
index 8626f9d609ed0e..00000000000000
--- a/.ci/azure/linux.yml
+++ /dev/null
@@ -1,590 +0,0 @@
-trigger:
-  branches:
-    include:
-    - 'master'
-    - 'releases/*'
-  paths:
-    exclude:
-    - '*/docs/*'
-    - 'docs/*'
-    - '*/*.md'
-    - '*.md'
-    - '*/layer_tests_summary/*'
-    - '*/conformance/*'
-
-pr:
-  branches:
-    include:
-    - 'master'
-    - 'releases/*'
-  paths:
-    exclude:
-    - '*/docs/*'
-    - 'docs/*'
-    - '*/*.md'
-    - '*.md'
-    - '*/layer_tests_summary/*'
-    - '*/conformance/*'
-
-resources:
-  repositories:
-  - repository: openvino_contrib
-    type: github
-    endpoint: openvinotoolkit
-    name: openvinotoolkit/openvino_contrib
-    ref: master
-
-variables:
-  - group: github
-
-jobs:
-- job: Lin
-  strategy:
-    matrix:
-      # Dynamic:
-      #   CMAKE_BUILD_SHARED_LIBS: 'ON'
-      #   PYTHON_STATIC_ARGS:
-      #   CMAKE_CPACK_GENERATOR:
-      #   SAMPLES_INSTALL_DIR: $(INSTALL_DIR)/samples
-      #   PYTHON_SAMPLES_INSTALL_DIR: $(SAMPLES_INSTALL_DIR)/python
-      #   RUN_PREFIX: . $(SETUPVARS) -pyver 3.8 &&
-      # Debian:
-      #   CMAKE_BUILD_SHARED_LIBS: 'ON'
-      #   PYTHON_STATIC_ARGS:
-      #   CMAKE_CPACK_GENERATOR: 'DEB'
-      #   SAMPLES_INSTALL_DIR: /usr/share/openvino/samples
-      #   PYTHON_SAMPLES_INSTALL_DIR: $(INSTALL_DIR)/share/openvino/samples/python
-      #   RUN_PREFIX: LD_LIBRARY_PATH=$(INSTALL_TEST_DIR):$(INSTALL_DIR)/opencv/lib:$LD_LIBRARY_PATH
-      Static:
-        CMAKE_BUILD_SHARED_LIBS: 'OFF'
-        PYTHON_STATIC_ARGS: -m "not dynamic_library"
-        CMAKE_CPACK_GENERATOR: "TGZ"
-        SAMPLES_INSTALL_DIR: $(INSTALL_DIR)/samples
-        PYTHON_SAMPLES_INSTALL_DIR: $(SAMPLES_INSTALL_DIR)/python
-        RUN_PREFIX: . $(SETUPVARS) &&
-    maxParallel: '2'
-
-  # About 150% of total time
-  timeoutInMinutes: '180'
-
-  pool:
-    name: LIN_VMSS_VENV_F16S_U20_WU2
-
-  variables:
-    system.debug: true
-    VSTS_HTTP_RETRY: 5
-    VSTS_HTTP_TIMEOUT: 200
-    BUILD_TYPE: Release
-    REPO_DIR: $(Build.Repository.LocalPath)
-    OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)/../openvino_contrib
-    WORK_DIR: $(Pipeline.Workspace)/_w
-    BUILD_DIR: $(WORK_DIR)/build
-    BUILD_SAMPLES_DIR: $(WORK_DIR)/build_samples
-    BUILD_LAYER_TESTS_DIR: $(WORK_DIR)/build_layer_tests
-    BUILD_SAMPLES_TESTS_DIR: $(WORK_DIR)/build_samples_tests
-    INSTALL_DIR: $(WORK_DIR)/install_pkg
-    INSTALL_TEST_DIR: $(INSTALL_DIR)/tests
-    LAYER_TESTS_DIR: $(INSTALL_TEST_DIR)/layer_tests
-    SETUPVARS: $(INSTALL_DIR)/setupvars.sh
-    TMP_DIR: /mnt/tmp
-    SHARE_DIR: /mount/cinfsshare/onnxtestdata
-    CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux
-    CMAKE_VERSION: 3.24.0
-    BUILD_PYTHON: $(WORK_DIR)/build_python
-    INSTALL_PYTHON: $(INSTALL_OPENVINO)/extras/python
-    LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib
-    OV_PYTHON_VERSION: 3.11.2 # Full version of Python its required for LD_LIBRARY_PATH. More details https://github.com/microsoft/azure-pipelines-tool-lib/blob/master/docs/overview.md#tool-cache
-
-  steps:
-  - task: UsePythonVersion@0
-    inputs:
-      versionSpec: '$(OV_PYTHON_VERSION)' # Setting only major & minor version will download latest release from GH repo example 3.10 will be 3.10.10.
-      addToPath: true
-      disableDownloadFromRegistry: false
-      architecture: 'x64'
-      githubToken: $(auth_token)
-    displayName: Setup Python 3.11
-    name: setupPython
-  - bash: |
-      #!/bin/bash
-      python -V
-
-  - script: |
-      curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
-      whoami
-      uname -a
-      echo Python3 info ; which python3 ; python3 --version
-      echo Java info ; which java ; java -version
-      echo gcc info ; which gcc ; gcc --version
-      echo cmake info ; which cmake ; cmake --version
-      lsb_release
-      env
-      cat /proc/cpuinfo
-      cat /proc/meminfo
-      cat /etc/fstab
-      vmstat -s
-      df
-      lsblk -o NAME,HCTL,SIZE,MOUNTPOINT | grep -i "sd"
-      free -h
-      echo TargetBranch: $(System.PullRequest.TargetBranch)
-      echo SourceBranch: $(Build.SourceBranch)
-    displayName: 'System info'
-
-  - script: |
-      set -e
-      rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
-      rm -rf $(BUILD_DIR) ; mkdir $(BUILD_DIR)
-      rm -rf $(BUILD_SAMPLES_DIR) ; mkdir $(BUILD_SAMPLES_DIR)
-      sudo rm -rf $(TMP_DIR) ; sudo mkdir $(TMP_DIR) ; sudo chmod 777 -R $(TMP_DIR)
-      sudo mkdir -p $(SHARE_DIR)
-      sudo apt --assume-yes update && sudo apt --assume-yes install nfs-common
-      sudo mount -vvv -t nfs cinfsshare.file.core.windows.net:/cinfsshare/onnxtestdata $(SHARE_DIR) -o vers=4,minorversion=1,sec=sys
-      mkdir -p $(CCACHE_DIR)
-    displayName: 'Make dir'
-
-  - checkout: self
-    clean: 'true'
-    submodules: 'true'
-    path: openvino
-
-  - checkout: openvino_contrib
-    clean: 'true'
-    submodules: 'true'
-    path: openvino_contrib
-
-  - script: |
-      set -e
-      sudo -E $(REPO_DIR)/install_build_dependencies.sh
-      # Move jdk into contrib
-      # 'clang' compiler is used as a default compiler
-      sudo apt --assume-yes install openjdk-11-jdk libbz2-dev clang
-      # For Python API
-      python3 -m pip install --upgrade pip
-      python3 -m pip install -r $(REPO_DIR)/src/bindings/python/wheel/requirements-dev.txt
-      python3 -m pip install -r $(REPO_DIR)/src/bindings/python/requirements.txt
-      # For running Python API tests
-      python3 -m pip install -r $(REPO_DIR)/src/bindings/python/src/compatibility/openvino/requirements-dev.txt
-      # For running Paddle frontend unit tests
-      # TODO Reenable PDPD after paddlepaddle==2.5.0 with compliant protobuf is released (ticket 95904)
-      #python3 -m pip install -r $(REPO_DIR)/src/frontends/paddle/tests/requirements.txt
-      # For running ONNX frontend unit tests
-      python3 -m pip install -r $(REPO_DIR)/src/frontends/onnx/tests/requirements.txt
-      # For running TensorFlow frontend unit tests
-      python3 -m pip install -r $(REPO_DIR)/src/frontends/tensorflow/tests/requirements.txt
-      # For running torchvision -> OpenVINO preprocess converter
-      python3 -m pip install -r $(REPO_DIR)/src/bindings/python/src/openvino/preprocess/torchvision/requirements.txt
-      # For MO unit tests
-      python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_mxnet.txt
-      python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_caffe.txt
-      python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_kaldi.txt
-      python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_onnx.txt
-      python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_tf2.txt
-      python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_dev.txt
-      # Speed up build
-      sudo apt -y --no-install-recommends install unzip
-      wget https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip
-      unzip ninja-linux.zip
-      sudo cp -v ninja /usr/local/bin/
-    displayName: 'Install dependencies'
-
-  - script: |
-      curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
-      whoami
-      uname -a
-      echo Python3 info ; which python3 ; python3 --version
-      echo Python info ; which python ; python --version
-      echo Java info ; which java ; java -version
-      echo gcc info ; which gcc ; gcc --version
-      echo cmake info ; which cmake ; cmake --version
-      lsb_release
-      env
-      cat /proc/cpuinfo
-      cat /proc/meminfo
-      cat /etc/fstab
-      vmstat -s
-      df
-      lsblk -o NAME,HCTL,SIZE,MOUNTPOINT | grep -i "sd"
-      free -h
-      echo TargetBranch: $(System.PullRequest.TargetBranch)
-      echo SourceBranch: $(Build.SourceBranch)
-    displayName: 'System info'
-
-  - task: CMake@1
-    inputs:
-      # CMake must get Python 3.x version by default
-      cmakeArgs: >
-        -GNinja
-        -DCMAKE_VERBOSE_MAKEFILE=ON
-        -DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
-        -DCMAKE_COMPILE_WARNING_AS_ERROR=ON
-        -DENABLE_PYTHON=ON
-        -DBUILD_SHARED_LIBS=$(CMAKE_BUILD_SHARED_LIBS)
-        -DENABLE_ONEDNN_FOR_GPU=$(CMAKE_BUILD_SHARED_LIBS)
-        -DENABLE_TESTS=ON
-        -DENABLE_OV_ONNX_FRONTEND=ON
-        -DENABLE_FASTER_BUILD=ON
-        -DENABLE_STRICT_DEPENDENCIES=OFF
-        -DOPENVINO_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules
-        -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose"
-        -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
-        -DCMAKE_C_COMPILER_LAUNCHER=ccache
-        -DCMAKE_CXX_LINKER_LAUNCHER=ccache
-        -DCMAKE_C_LINKER_LAUNCHER=ccache
-        -DCMAKE_CXX_COMPILER=clang++
-        -DCMAKE_C_COMPILER=clang
-        -DENABLE_SYSTEM_SNAPPY=ON
-        -DENABLE_SYSTEM_TBB=ON
-        -DCPACK_GENERATOR=$(CMAKE_CPACK_GENERATOR)
-        -DBUILD_nvidia_plugin=OFF
-        -S $(REPO_DIR)
-        -B $(BUILD_DIR)
-    displayName: 'Cmake OpenVINO'
-
-  - script: ls -alR $(REPO_DIR)/temp/
-    displayName: 'List temp SDKs'
-
-  - script: ccache --zero-stats --max-size=50G --show-config
-    displayName: 'Clean ccache stats'
-
-  - script: cmake --build $(BUILD_DIR) --parallel --config $(BUILD_TYPE)
-    env:
-      CCACHE_DIR: $(CCACHE_DIR)
-      CCACHE_TEMPDIR: $(TMP_DIR)/ccache
-      CCACHE_BASEDIR: $(Pipeline.Workspace)
-      CCACHE_MAXSIZE: 50G
-    displayName: 'Build Lin'
-
-  - script: ccache --show-stats
-    displayName: 'Show ccache stats'
-
-  - script: ls -alR $(REPO_DIR)/bin/
-    displayName: 'List bin files'
-
-  - task: CMake@1
-    inputs:
-      cmakeArgs: >
-        -GNinja
-        -S $(REPO_DIR)/tests/layer_tests
-        -B $(BUILD_LAYER_TESTS_DIR)
-    displayName: 'Cmake Layer Tests'
-
-  - script: cmake --build $(BUILD_LAYER_TESTS_DIR) --parallel --config $(BUILD_TYPE)
-    displayName: 'Build Layer Tests'
-
-  - script: sudo apt-get remove libtbb2 -y
-    displayName: 'Remove debian dependencies'
-    condition: eq(variables['CMAKE_CPACK_GENERATOR'], 'DEB')
-
-  - script: cmake -DCOMPONENT=python_wheels -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P $(BUILD_DIR)/cmake_install.cmake
-    displayName: 'Install wheel packages'
-
-  - script: cmake -DCOMPONENT=tests -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P $(BUILD_LAYER_TESTS_DIR)/cmake_install.cmake
-    displayName: 'Install Layer Tests'
-
-  - script: python3 -m pip install openvino-dev --find-links=$(INSTALL_DIR)/tools
-    displayName: 'Install python wheels'
-
-  - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P $(BUILD_DIR)/cmake_install.cmake
-    displayName: 'Install tests'
-
-  - script: ls -alR $(INSTALL_DIR)
-    displayName: 'List install test files'
-
-  - script: |
-      set -e
-      sudo apt-get install libtbb-dev libpugixml-dev -y
-      cmake --build $(BUILD_DIR) --target package --parallel
-    condition: eq(variables['CMAKE_CPACK_GENERATOR'], 'DEB')
-    displayName: 'Build Debian packages'
-
-  - script: |
-      set -e
-      # install debian packages from previous release
-      sudo apt-get install --no-install-recommends gnupg wget -y
-      wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-      sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-      echo "deb https://apt.repos.intel.com/openvino/2022 focal main" | sudo tee /etc/apt/sources.list.d/intel-openvino-2022.list
-      sudo apt-get update -o Dir::Etc::sourcelist=/etc/apt/sources.list.d/intel-openvino-2022.list
-      sudo apt-get install openvino -y
-      # install our local one and make sure the conflicts are resolved
-      sudo apt-get install --no-install-recommends dpkg-dev -y
-      rm -r _CPack_Packages
-      dpkg-scanpackages . /dev/null | gzip -9c > Packages.gz
-      echo "deb [trusted=yes] file:$(BUILD_DIR) ./" | sudo tee /etc/apt/sources.list.d/openvino-local.list
-      sudo apt-get update -o Dir::Etc::sourcelist=/etc/apt/sources.list.d/openvino-local.list
-      sudo apt-get install openvino -y
-    workingDirectory: $(BUILD_DIR)
-    condition: eq(variables['CMAKE_CPACK_GENERATOR'], 'DEB')
-    displayName: 'Install Debian packages'
-
-  - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P $(BUILD_DIR)/cmake_install.cmake
-    condition: ne(variables['CMAKE_CPACK_GENERATOR'], 'DEB')
-    displayName: 'Install openvino'
-
-  - script: ls -alR $(INSTALL_DIR)
-    condition: ne(variables['CMAKE_CPACK_GENERATOR'], 'DEB')
-    displayName: 'List install files'
-
-  - script: $(SAMPLES_INSTALL_DIR)/cpp/build_samples.sh -i $(INSTALL_DIR) -b $(BUILD_DIR)/cpp_samples
-    displayName: 'Build cpp samples - gcc'
-
-  - script: $(SAMPLES_INSTALL_DIR)/cpp/build_samples.sh -b $(BUILD_DIR)/cpp_samples_clang
-    env:
-      CC: clang
-      CXX: clang++
-    displayName: 'Build cpp samples - clang'
-
-  - script: $(SAMPLES_INSTALL_DIR)/c/build_samples.sh -i $(INSTALL_DIR) -b $(BUILD_DIR)/c_samples
-    env:
-      VERBOSE: 1
-    displayName: 'Build c samples'
-
-  - script: rm -fr $(BUILD_DIR)
-    displayName: 'Clean build dir'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_core_unit_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-OVCoreUT.xml
-    displayName: 'OV Core UT'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_inference_functional_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-InferenceFunc.xml
-    displayName: 'Inference Func Tests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_inference_unit_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-InferenceUnit.xml
-    displayName: 'Inference Unit Tests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_proxy_plugin_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-OVProxyTests.xml
-    displayName: 'OV Proxy Plugin Tests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_hetero_unit_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-OVHeteroUnitTests.xml
-    displayName: 'OV Hetero Unit Tests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-OVHeteroFuncTests.xml
-    displayName: 'OV Hetero Func Tests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_conditional_compilation_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ConditionalCompilation.xml
-    displayName: 'Conditional Compilation Tests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_ir_frontend_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-IRFrontend.xml
-    displayName: 'IR Frontend Tests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_onnx_frontend_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ONNXFrontend.xml
-    displayName: 'ONNX Frontend Tests'
-
-  # TODO Reenable PDPD after paddlepaddle==2.5.1 with compliant protobuf is released (ticket 95904)
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/paddle_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Paddle.xml
-    displayName: 'Paddle Frontend UT'
-    enabled: 'false'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_tensorflow_frontend_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Tensorflow.xml
-    displayName: 'TensorFlow Frontend Unit Tests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_tensorflow_common_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-TensorflowCommon.xml
-    displayName: 'TensorFlow Common Unit Tests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_tensorflow_lite_frontend_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-TensorflowLite.xml
-    displayName: 'TensorFlow Lite Frontend Unit Tests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_lp_transformations_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-LpTransformations.xml
-    displayName: 'Low Precision Transformations Tests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_transformations_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Transformations.xml
-    displayName: 'Transformations Tests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_legacy_transformations_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-LegacyTransformations.xml
-    displayName: 'Legacy Transformations Tests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_util_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-CommonUtilTests.xml
-    displayName: 'Common Utils Tests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-InferenceEngineUnitTests.xml
-    displayName: 'IE UT old'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_snippets_func_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_snippets_func_tests.xml
-    displayName: 'Snippets Func Tests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_cpu_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_cpu_unit_tests.xml
-    displayName: 'Intel CPU Unit Tests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_gna_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_gna_unit_tests.xml
-    displayName: 'GNA UT'
-    enabled: 'false' # TODO: fix
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_auto_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_unit_tests.xml
-    displayName: 'AUTO UT'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_auto_func_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_func_tests.xml
-    displayName: 'AUTO FuncTests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_auto_batch_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_batch_unit_tests.xml
-    displayName: 'AutoBatch UT'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_template_func_tests --gtest_filter=*smoke* --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-templateFuncTests.xml
-    displayName: 'TEMPLATE FuncTests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/InferenceEngineCAPITests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-InferenceEngineCAPITests.xml
-    displayName: 'IE CAPITests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_capi_test --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_capi_test.xml
-    displayName: 'OV CAPITests'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_auto_batch_func_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_batch_func_tests.xml
-    displayName: 'AutoBatch FuncTests'
-
-    # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time
-  - script: |
-      $(RUN_PREFIX) python3 -m pytest -s $(INSTALL_TEST_DIR)/pyngraph $(PYTHON_STATIC_ARGS) \
-        --junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \
-        --ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_zoo_models.py \
-        --ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_backend.py
-    displayName: 'nGraph and IE Python Bindings Tests'
-
-  - script: |
-      set -e
-      export LD_LIBRARY_PATH=$INSTALL_TEST_DIR:$LD_LIBRARY_PATH
-      $(RUN_PREFIX) python3 -m pytest -sv $(INSTALL_TEST_DIR)/pyopenvino $(PYTHON_STATIC_ARGS) \
-        --junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \
-        --ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_utils/test_utils.py
-    displayName: 'Python API 2.0 Tests'
-
-  # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time
-  - script: |
-      python3 -m pytest -sv $(REPO_DIR)/src/frontends/onnx/tests $(PYTHON_STATIC_ARGS) \
-        --ignore=$(REPO_DIR)/src/frontends/onnx/tests/test_python/test_zoo_models.py \
-        --ignore=$(REPO_DIR)/src/frontends/onnx/tests/test_python/test_backend.py -v
-    displayName: 'ONNX Frontend Python Tests'
-
-  - script: python3 -m pytest -s $(INSTALL_TEST_DIR)/mo/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-ModelOptimizer.xml
-    displayName: 'Model Optimizer UT'
-
-  - script: python3 -m pytest -s $(REPO_DIR)/tools/ovc/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-OpenVinoConversion.xml
-    displayName: 'OpenVino Conversion UT'
-
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_cpu_func_tests --gtest_filter=*smoke* --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_cpu_func_tests.xml
-    displayName: 'CPU FuncTests'
-    condition: and(succeeded(), eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'OFF'))
-
-  - task: CMake@1
-    inputs:
-      cmakeArgs: >
-        -GNinja
-        -S $(REPO_DIR)/tests/samples_tests
-        -B $(BUILD_SAMPLES_TESTS_DIR)
-    displayName: 'CMake Samples Tests'
-
-  - script: cmake -DCOMPONENT=tests -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P $(BUILD_SAMPLES_TESTS_DIR)/cmake_install.cmake
-    displayName: 'Install Samples Tests'
-
-  - script: python3 -m pip install -r $(INSTALL_TEST_DIR)/smoke_tests/requirements.txt
-    displayName: 'Install dependencies for samples smoke tests'
-
-  - script: |
-      set -e
-      export PATH=$HOME/.local/bin:$PATH
-      export LD_LIBRARY_PATH=$IE_APP_PATH:$LD_LIBRARY_PATH
-      $(RUN_PREFIX) python3 -m pytest $(INSTALL_TEST_DIR)/smoke_tests/ \
-        --env_conf $(INSTALL_TEST_DIR)/smoke_tests/env_config.yml \
-        -s --junitxml=$(INSTALL_TEST_DIR)/TEST-SamplesSmokeTests.xml
-    env:
-      IE_APP_PATH: $(INSTALL_DIR)/samples_bin
-      IE_APP_PYTHON_PATH: $(PYTHON_SAMPLES_INSTALL_DIR)/
-      SHARE: $(INSTALL_TEST_DIR)/smoke_tests/samples_smoke_tests_data/
-      WORKSPACE: $(INSTALL_DIR)
-    displayName: 'Samples Smoke Tests'
-
-  - script: |
-      set -e
-      python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt
-      $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/pytorch_tests/ -m precommit --junitxml=$(INSTALL_TEST_DIR)/TEST-pytorch.xmlTEST
-    env:
-      PYTHONPATH: $(REPO_DIR)/tools/mo/:$(LAYER_TESTS_DIR)
-      TEST_DEVICE: CPU
-    displayName: 'PyTorch Layer Tests'
-
-  - script: |
-      set -e
-      python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt
-      $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/tensorflow_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=$(INSTALL_TEST_DIR)/TEST-tf_fe.xmlTEST
-    env:
-      PYTHONPATH: $(REPO_DIR)/tools/mo/:$(LAYER_TESTS_DIR)
-      TEST_DEVICE: CPU
-    displayName: 'TensorFlow 1 Layer Tests - TF FE'
-
-  - script: |
-      set -e
-      python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt
-      $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/tensorflow2_keras_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=$(INSTALL_TEST_DIR)/TEST-tf2_fe.xmlTEST
-    env:
-      PYTHONPATH: $(REPO_DIR)/tools/mo/:$(LAYER_TESTS_DIR)
-      TEST_DEVICE: CPU
-    displayName: 'TensorFlow 2 Layer Tests - TF FE'
-
-  - script: |
-      set -e
-      python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt
-      $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/jax_tests/ -m precommit --junitxml=$(INSTALL_TEST_DIR)/TEST-jax.xmlTEST
-    env:
-      PYTHONPATH: $(LAYER_TESTS_DIR)
-      TEST_DEVICE: CPU
-    displayName: 'JAX Layer Tests - TF FE'
-
-  - script: |
-      set -e
-      python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt
-      $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/tensorflow_tests/test_tf_Roll.py --ir_version=10 --junitxml=$(INSTALL_TEST_DIR)/TEST-tf_Roll.xmlTEST
-    env:
-      PYTHONPATH: $(LAYER_TESTS_DIR)
-    displayName: 'TensorFlow 1 Layer Tests - Legacy FE'
-
-  - script: |
-      set -e
-      python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt
-      $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/tensorflow2_keras_tests/test_tf2_keras_activation.py --ir_version=11 --junitxml=./TEST-tf2_Activation.xmlTEST -k "sigmoid"
-    env:
-      PYTHONPATH: $(LAYER_TESTS_DIR)
-      TEST_DEVICE: CPU
-    displayName: 'TensorFlow 2 Layer Tests - Legacy FE'
-
-  - script: |
-      set -e
-      python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt
-      $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/tensorflow_lite_tests/ --junitxml=$(INSTALL_TEST_DIR)/TEST-tfl_fe.xmlTEST
-    env:
-      PYTHONPATH: $(REPO_DIR)/tools/mo/:$(LAYER_TESTS_DIR)
-      TEST_DEVICE: CPU
-    displayName: 'TensorFlow Lite Layer Tests - TFL FE'
-
-  - script: |
-      set -e
-      python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt
-      $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/ovc_python_api_tests/  --junitxml=./TEST-test_ovc_convert.xmlTEST
-    env:
-      PYTHONPATH: $(LAYER_TESTS_DIR)
-      TEST_DEVICE: CPU
-    displayName: 'OVC Python API Tests'
-
-  - script: |
-      set -e
-      python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt
-      $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/mo_python_api_tests/  --junitxml=./TEST-test_mo_convert.xmlTEST
-    env:
-      PYTHONPATH: $(LAYER_TESTS_DIR)
-      TEST_DEVICE: CPU
-    displayName: 'MO Python API Tests'
-
-  - script: |
-      set -e
-      python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt
-      $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/py_frontend_tests --junitxml=./TEST-test_py_fontend.xml
-    displayName: 'Python Frontend tests'
-
-  - task: PublishTestResults@2
-    condition: always()
-    inputs:
-      testResultsFormat: 'JUnit' # Options: JUnit, NUnit, VSTest, xUnit, cTest
-      testResultsFiles: '**/TEST-*.xml'
-      #searchFolder: '$(BUILD_DIR)'
-      mergeTestResults: false # Optional
-      #failTaskOnFailedTests: false # Optional
-      #testRunTitle: 'Pre/Post-Commit' # Optional
-      buildPlatform: 'x64' # Optional
-      buildConfiguration: 'Linux' # Optional
-      #publishRunAttachments: true # Optional
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 3f2178a1c681e5..3727c4d88f6e8b 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -94,6 +94,7 @@
 /tests/layer_tests/tensorflow_tests @openvinotoolkit/openvino-tf-frontend-maintainers
 /tests/layer_tests/jax_tests @openvinotoolkit/openvino-tf-frontend-maintainers
 /tests/model_hub_tests @openvinotoolkit/openvino-tf-frontend-maintainers
+/tests/model_hub_tests/torch_tests @openvinotoolkit/openvino-pytorch-frontend-maintainers
 
 # Tools:
 /tools/  @openvinotoolkit/openvino-tools-maintainers
diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml
index e4360eb08d3850..fb4b36c69f5a55 100644
--- a/.github/workflows/android_arm64.yml
+++ b/.github/workflows/android_arm64.yml
@@ -35,11 +35,12 @@ jobs:
       image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04
       volumes:
         - /mount/caches:/mount/caches
+      options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING
     env:
       DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input
       CMAKE_GENERATOR: 'Ninja'
-      CMAKE_CXX_COMPILER_LAUNCHER: ccache
-      CMAKE_C_COMPILER_LAUNCHER: ccache
+      CMAKE_CXX_COMPILER_LAUNCHER: sccache
+      CMAKE_C_COMPILER_LAUNCHER: sccache
       OPENVINO_REPO: '/__w/openvino/openvino/openvino'
       VCPKG_ROOT: '/__w/openvino/openvino/vcpkg'
       BUILD_DIR: '/__w/openvino/openvino/build'
@@ -49,10 +50,7 @@ jobs:
       ANDROID_ABI_CONFIG: arm64-v8a
       VCPKG_DEFAULT_BINARY_CACHE: '/mount/caches/ccache/android_arm64/vcpkg_cache'
       VCPKG_FORCE_SYSTEM_BINARIES: '1'
-      CCACHE_DIR: '/mount/caches/ccache/android_arm64'
-      CCACHE_TEMPDIR: '/__w/openvino/openvino/ccache_temp'
-      CCACHE_COMPILERCHECK: 'content'
-      CCACHE_MAXSIZE: 50G
+      SCCACHE_AZURE_KEY_PREFIX: android_arm64
     steps:
       - name: Install git
         run: apt-get update && apt-get install --assume-yes --no-install-recommends git ca-certificates
@@ -102,6 +100,11 @@ jobs:
           unzip commandlinetools-linux-7583922_latest.zip
           echo "yes" | ./cmdline-tools/bin/sdkmanager --sdk_root=${ANDROID_TOOLS} --install "ndk-bundle" "platform-tools" "platforms;android-${{ env.ANDROID_SDK_VERSION }}"
 
+      - name: Install sccache
+        uses: mozilla-actions/sccache-action@v0.0.3
+        with:
+          version: "v0.5.4"
+
       #
       # Build
       #
@@ -138,10 +141,10 @@ jobs:
             -B ${BUILD_DIR}
 
       - name: Clean ccache stats
-        run: ccache --zero-stats --show-config
+        run: sccache --zero-stats
 
       - name: Cmake - build
         run: cmake --build ${BUILD_DIR} --parallel
 
       - name: Show ccache stats
-        run: ccache --show-stats
+        run: ${SCCACHE_PATH} --show-stats
diff --git a/.github/workflows/fedora.yml b/.github/workflows/fedora.yml
index a554dfa98b462b..f398b1a3623fc1 100644
--- a/.github/workflows/fedora.yml
+++ b/.github/workflows/fedora.yml
@@ -36,19 +36,18 @@ jobs:
       image: fedora:33
       volumes:
         - /mount/caches:/mount/caches
+      options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING
     env:
       CMAKE_BUILD_TYPE: 'Release'
       CMAKE_GENERATOR: 'Ninja'
-      CMAKE_CXX_COMPILER_LAUNCHER: ccache
-      CMAKE_C_COMPILER_LAUNCHER: ccache
+      CMAKE_CXX_COMPILER_LAUNCHER: sccache
+      CMAKE_C_COMPILER_LAUNCHER: sccache
       GITHUB_WORKSPACE: '/__w/openvino/openvino'
       OPENVINO_REPO: /__w/openvino/openvino/openvino
       INSTALL_DIR: /__w/openvino/openvino/openvino_install
       INSTALL_TEST_DIR: /__w/openvino/openvino/tests_install
       BUILD_DIR: /__w/openvino/openvino/openvino_build
-      CCACHE_DIR: /mount/caches/ccache/fedora33_x86_64_Release
-      CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp
-      CCACHE_MAXSIZE: 50G
+      SCCACHE_AZURE_KEY_PREFIX: fedora33_x86_64_Release
     steps:
       - name: Install git
         run: yum update -y && yum install -y git
@@ -66,6 +65,11 @@ jobs:
       - name: Install build dependencies
         run: bash ${OPENVINO_REPO}/install_build_dependencies.sh
 
+      - name: Install sccache
+        uses: mozilla-actions/sccache-action@v0.0.3
+        with:
+          version: "v0.5.4"
+
       - name: Install python dependencies
         run: |
           python3 -m pip install -U pip
@@ -112,8 +116,8 @@ jobs:
       - name: Cmake build - OpenVINO
         run: cmake --build ${BUILD_DIR} --parallel --verbose
 
-      - name: Show ccache stats
-        run: ccache --show-stats
+      - name: Show sccache stats
+        run: ${SCCACHE_PATH} --show-stats
 
       - name: Cmake install - OpenVINO
         run: |
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index 733dfed4c09d14..9050ab3d161509 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -43,12 +43,13 @@ jobs:
       image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04
       volumes:
         - /mount/caches:/mount/caches
+      options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING
     env:
       DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input
       CMAKE_BUILD_TYPE: 'Release'
       CMAKE_GENERATOR: 'Ninja Multi-Config'
-      CMAKE_CXX_COMPILER_LAUNCHER: ccache
-      CMAKE_C_COMPILER_LAUNCHER: ccache
+      CMAKE_CXX_COMPILER_LAUNCHER: sccache
+      CMAKE_C_COMPILER_LAUNCHER: sccache
       GITHUB_WORKSPACE: '/__w/openvino/openvino'
       OPENVINO_REPO: /__w/openvino/openvino/openvino
       OPENVINO_CONTRIB_REPO: /__w/openvino/openvino/openvino_contrib
@@ -56,9 +57,7 @@ jobs:
       INSTALL_TEST_DIR: /__w/openvino/openvino/tests_install
       DEVELOPER_PACKAGE_DIR: /__w/openvino/openvino/developer_package_install
       BUILD_DIR: /__w/openvino/openvino/openvino_build
-      CCACHE_DIR: /mount/caches/ccache/ubuntu20_x86_64_Release
-      CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp
-      CCACHE_MAXSIZE: 50G
+      SCCACHE_AZURE_KEY_PREFIX: ubuntu20_x86_64_Release
       ONNX_RUNTIME_UTILS: /__w/openvino/openvino/openvino/.ci/azure/ci_utils/onnxruntime
 
     steps:
@@ -92,6 +91,11 @@ jobs:
           # libssl1.1 - 'python3 -m pip' in self-hosted runner
           apt install --assume-yes --no-install-recommends default-jdk libssl1.1
 
+      - name: Install sccache
+        uses: mozilla-actions/sccache-action@v0.0.3
+        with:
+          version: "v0.5.4"
+
       - uses: actions/setup-python@v4
         with:
           python-version: ${{ env.PYTHON_VERSION }}
@@ -143,14 +147,14 @@ jobs:
             -S ${OPENVINO_REPO} \
             -B ${BUILD_DIR}
 
-      - name: Clean ccache stats
-        run: ccache --zero-stats --show-config
+      - name: Clean sccache stats
+        run: sccache --zero-stats
 
       - name: Cmake build - OpenVINO
         run: cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }}
 
-      - name: Show ccache stats
-        run: ccache --show-stats
+      - name: Show sccache stats
+        run: ${SCCACHE_PATH} --show-stats
 
       - name: Cmake install - OpenVINO
         run: |
@@ -497,16 +501,15 @@ jobs:
       image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04
       volumes:
         - /mount/caches:/mount/caches
+      options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING
     env:
       DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input
       CMAKE_GENERATOR: 'Ninja Multi-Config'
-      CMAKE_CXX_COMPILER_LAUNCHER: ccache
-      CMAKE_C_COMPILER_LAUNCHER: ccache
+      CMAKE_CXX_COMPILER_LAUNCHER: sccache
+      CMAKE_C_COMPILER_LAUNCHER: sccache
       OPENVINO_REPO: /__w/openvino/openvino/openvino
       INSTALL_DIR: /__w/openvino/openvino/install
-      CCACHE_DIR: /mount/caches/ccache/ubuntu20_x86_64_onnxruntime
-      CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp
-      CCACHE_MAXSIZE: 50G
+      SCCACHE_AZURE_KEY_PREFIX: ubuntu20_x86_64_onnxruntime
       ONNX_RUNTIME_REPO: /__w/openvino/openvino/onnxruntime
       ONNX_RUNTIME_UTILS: /__w/openvino/openvino/install/onnxruntime
       ONNX_RUNTIME_BUILD_DIR: /__w/openvino/openvino/onnxruntime/build
@@ -561,6 +564,11 @@ jobs:
       - name: Install Build Dependencies
         run: bash ${OPENVINO_REPO}/install_build_dependencies.sh
 
+      - name: Install sccache
+        uses: mozilla-actions/sccache-action@v0.0.3
+        with:
+          version: "v0.5.4"
+
       - name: Build Lin ONNX Runtime
         run: |
           source ${INSTALL_DIR}/setupvars.sh
@@ -576,6 +584,9 @@ jobs:
         env:
           CXXFLAGS: "-Wno-error=deprecated-declarations"
 
+      - name: Show sccache stats
+        run: ${SCCACHE_PATH} --show-stats
+
       - name: Run onnxruntime_test_all
         run: |
           source ${INSTALL_DIR}/setupvars.sh
@@ -760,6 +771,18 @@ jobs:
           ${INSTALL_TEST_DIR}/ov_cpu_unit_tests --gtest_print_time=1 \
                 --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-CPUUnitTests.xml
 
+      - name: SubgraphsDumper tests
+        run: |
+          source ${INSTALL_DIR}/setupvars.sh
+          ${INSTALL_TEST_DIR}/subgraphsDumperTests --gtest_print_time=1 \
+                --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-SubgraphsDumperTests.xml
+
+      - name: Template OpImpl tests
+        run: |
+          source ${INSTALL_DIR}/setupvars.sh
+          ${INSTALL_TEST_DIR}/conformanceTests --gtest_print_time=1 --device=TEMPLATE --gtest_filter=*OpImpl*\
+                --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OpImplTests.xml
+
       - name: AUTO unit tests
         run: |
           source ${INSTALL_DIR}/setupvars.sh
@@ -1334,21 +1357,20 @@ jobs:
       image: openvinogithubactions.azurecr.io/dockerhub/nvidia/cuda:11.8.0-runtime-ubuntu20.04
       volumes:
         - /mount/caches:/mount/caches
+      options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING
     env:
       CMAKE_BUILD_TYPE: 'Release'
       CMAKE_GENERATOR: 'Ninja Multi-Config'
-      CMAKE_CUDA_COMPILER_LAUNCHER: ccache
-      CMAKE_CXX_COMPILER_LAUNCHER: ccache
-      CMAKE_C_COMPILER_LAUNCHER: ccache
+      CMAKE_CUDA_COMPILER_LAUNCHER: sccache
+      CMAKE_CXX_COMPILER_LAUNCHER: sccache
+      CMAKE_C_COMPILER_LAUNCHER: sccache
       INSTALL_DIR: /__w/openvino/openvino/install
       OPENVINO_DEVELOPER_PACKAGE: /__w/openvino/openvino/install/developer_package
       OPENVINO_REPO: /__w/openvino/openvino/openvino
       OPENVINO_CONTRIB_REPO: /__w/openvino/openvino/openvino_contrib
       NVIDIA_BUILD_DIR: /__w/openvino/openvino/nvidia_plugin_build
       DEBIAN_FRONTEND: 'noninteractive'
-      CCACHE_DIR: /mount/caches/ccache/ubuntu20_x86_64_Release
-      CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp
-      CCACHE_MAXSIZE: 50G
+      SCCACHE_AZURE_KEY_PREFIX: ubuntu20_x86_64_Release
 
     steps:
       - name: Install Prerequisites
@@ -1405,7 +1427,12 @@ jobs:
       - name: Install build dependencies
         run: |
           ${OPENVINO_REPO}/install_build_dependencies.sh
-          apt -y --no-install-recommends install software-properties-common
+          apt -y --no-install-recommends install software-properties-common curl
+
+      - name: Install sccache
+        uses: mozilla-actions/sccache-action@v0.0.3
+        with:
+          version: "v0.5.4"
 
       - name: Install CUDA
         run: |
@@ -1440,4 +1467,4 @@ jobs:
           cmake --build ${NVIDIA_BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --verbose -- ov_nvidia_func_tests ov_nvidia_unit_tests
 
       - name: Show ccache stats
-        run: ccache --show-stats
+        run: ${SCCACHE_PATH} --show-stats
diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml
index c8fb34cca85244..15acba9d441696 100644
--- a/.github/workflows/linux_conditional_compilation.yml
+++ b/.github/workflows/linux_conditional_compilation.yml
@@ -39,21 +39,20 @@ jobs:
       image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:22.04
       volumes:
         - /mount/caches:/mount/caches
+      options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING
     env:
       DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input
       CMAKE_BUILD_TYPE: 'Release'
       CMAKE_GENERATOR: 'Ninja Multi-Config'
-      CMAKE_CXX_COMPILER_LAUNCHER: ccache
-      CMAKE_C_COMPILER_LAUNCHER: ccache
+      CMAKE_CXX_COMPILER_LAUNCHER: sccache
+      CMAKE_C_COMPILER_LAUNCHER: sccache
       GITHUB_WORKSPACE: '/__w/openvino/openvino'
       OPENVINO_REPO: /__w/openvino/openvino/openvino
       INSTALL_DIR: /__w/openvino/openvino/openvino_install
       BUILD_DIR: /__w/openvino/openvino/openvino_build
       SELECTIVE_BUILD_STAT_DIR: /__w/openvino/openvino/selective_build_stat
       MODELS_PATH: /__w/openvino/openvino/testdata
-      CCACHE_DIR: /mount/caches/ccache/ubuntu22_x86_64_itt_clang_Release
-      CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp
-      CCACHE_MAXSIZE: 20G
+      SCCACHE_AZURE_KEY_PREFIX: ubuntu22_x86_64_itt_clang_Release
 
     steps:
       - name: Install git
@@ -88,6 +87,11 @@ jobs:
           update-alternatives --install /usr/bin/cc cc /usr/bin/clang 100
           update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang++ 100
 
+      - name: Install sccache
+        uses: mozilla-actions/sccache-action@v0.0.3
+        with:
+          version: "v0.5.4"
+
       - uses: actions/setup-python@v4
         with:
           python-version: ${{ env.PYTHON_VERSION }}
@@ -141,8 +145,8 @@ jobs:
           cmake --build ${BUILD_DIR} --parallel 8 --config ${{ env.CMAKE_BUILD_TYPE }}
           cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --target sea_itt_lib
 
-      - name: Show ccache stats
-        run: ccache --show-stats
+      - name: Show sccache stats
+        run: ${SCCACHE_PATH} --show-stats
 
       - name: Cmake install - OpenVINO
         run: cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -P ${BUILD_DIR}/cmake_install.cmake
@@ -205,18 +209,17 @@ jobs:
       image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:22.04
       volumes:
         - /mount/caches:/mount/caches
+      options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING
     env:
       DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input
-      CMAKE_CXX_COMPILER_LAUNCHER: ccache
-      CMAKE_C_COMPILER_LAUNCHER: ccache
+      CMAKE_CXX_COMPILER_LAUNCHER: sccache
+      CMAKE_C_COMPILER_LAUNCHER: sccache
       GITHUB_WORKSPACE: '/__w/openvino/openvino'
       OPENVINO_REPO: /__w/openvino/openvino/openvino
       BUILD_DIR: /__w/openvino/openvino/openvino_build
       SELECTIVE_BUILD_STAT_DIR: /__w/openvino/openvino/selective_build_stat
       MODELS_PATH: /__w/openvino/openvino/testdata
-      CCACHE_DIR: /mount/caches/ccache/ubuntu22_x86_64_cc_Release
-      CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp
-      CCACHE_MAXSIZE: 20G
+      SCCACHE_AZURE_KEY_PREFIX: ubuntu22_x86_64_cc_Release
 
     steps:
       - name: Install git
@@ -252,6 +255,10 @@ jobs:
       - name: Install build dependencies
         run: bash ${OPENVINO_REPO}/install_build_dependencies.sh
 
+      - name: Install sccache
+        uses: mozilla-actions/sccache-action@v0.0.3
+        with:
+          version: "v0.5.4"
       #
       # Build
       #
@@ -281,7 +288,7 @@ jobs:
         run: cmake --build ${BUILD_DIR} --parallel 8 --target benchmark_app
 
       - name: Show ccache stats
-        run: ccache --show-stats
+        run: ${SCCACHE_PATH} --show-stats
 
       - name: Run with CC-ed runtime
         run: ${OPENVINO_REPO}/bin/intel64/Release/benchmark_app -niter 1 -nireq 1 -m ${MODELS_PATH}/models/test_model/test_model_fp32.xml -d CPU
@@ -308,7 +315,7 @@ jobs:
           name: openvino_tests
           path: ${{ env.INSTALL_TEST_DIR }}
 
-      - name: Extract OpenVINO packages
+      - name: Extract OpenVINO tests package
         run: tar -xvzf ${INSTALL_TEST_DIR}/openvino_tests.tar.gz -C ${INSTALL_TEST_DIR}
 
       - name: Install OpenVINO dependencies
diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
index d5084d7a5d19c6..487536f615a8a6 100644
--- a/.github/workflows/mac.yml
+++ b/.github/workflows/mac.yml
@@ -1,6 +1,9 @@
-name: macOS (macOS 12, Python 3.11)
+name: macOS (Python 3.11)
 on:
   workflow_dispatch:
+  schedule:
+    # at 00:00 on workdays
+    - cron: '0 0 * * 1,2,3,4,5'
 #  pull_request:
 #    paths-ignore:
 #      - '**/docs/**'
@@ -9,17 +12,17 @@ on:
 #      - '**.md'
 #      - '**/layer_tests_summary/**'
 #      - '**/conformance/**'
-  push:
-    paths-ignore:
-      - '**/docs/**'
-      - 'docs/**'
-      - '**/**.md'
-      - '**.md'
-      - '**/layer_tests_summary/**'
-      - '**/conformance/**'
-    branches:
-      - master
-      - 'releases/**'
+#  push:
+#    paths-ignore:
+#      - '**/docs/**'
+#      - 'docs/**'
+#      - '**/**.md'
+#      - '**.md'
+#      - '**/layer_tests_summary/**'
+#      - '**/conformance/**'
+#    branches:
+#      - master
+#      - 'releases/**'
 
 concurrency:
   # github.ref is not unique in post-commit
@@ -34,11 +37,22 @@ jobs:
     defaults:
       run:
         shell: bash
-    runs-on: macos-12-large
+    strategy:
+      max-parallel: 2
+      fail-fast: false
+      matrix:
+        include:
+          - arhitecture: 'x86_64'
+            machine: 'macos-13-large'
+            macos_deployment_target: '10.12'
+          - arhitecture: 'arm64'
+            machine: 'macos-13-xlarge'
+            macos_deployment_target: '11.0'
+    runs-on: ${{ matrix.machine }}
     env:
       CMAKE_BUILD_TYPE: 'Release'
       CMAKE_GENERATOR: 'Ninja Multi-Config'
-      MACOSX_DEPLOYMENT_TARGET: '10.12'
+      MACOSX_DEPLOYMENT_TARGET: ${{ matrix.macos_deployment_target }}
       CMAKE_CXX_COMPILER_LAUNCHER: ccache
       CMAKE_C_COMPILER_LAUNCHER: ccache
       OPENVINO_REPO: ${{ github.workspace }}/openvino
@@ -100,9 +114,9 @@ jobs:
           # github.ref_name is 'ref/PR_#' in case of the PR, and 'branch_name' when executed on push
           save: ${{ github.ref_name == 'master' && 'true' || 'false'  }}
           verbose: 2
-          key: ${{ runner.os }}-main
+          key: ${{ runner.os }}-${{ matrix.arhitecture }}-main
           restore-keys: |
-            ${{ runner.os }}-main
+            ${{ runner.os }}-${{ matrix.arhitecture }}-main
 
       - name: CMake configure
         run: |
@@ -144,6 +158,7 @@ jobs:
         run: |
           cmake \
             -DBUILD_nvidia_plugin=OFF \
+            -DBUILD_java_api=OFF \
             -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose" \
             -DOPENVINO_EXTRA_MODULES=${{ env.OPENVINO_CONTRIB_REPO }}/modules \
             -S ${{ env.OPENVINO_REPO }} \
@@ -158,7 +173,7 @@ jobs:
         if: ${{ always() }}
         uses: actions/upload-artifact@v3
         with:
-          name: openvino_package
+          name: openvino_package_${{ matrix.arhitecture }}
           path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz
           if-no-files-found: 'error'
 
@@ -166,7 +181,7 @@ jobs:
         if: ${{ always() }}
         uses: actions/upload-artifact@v3
         with:
-          name: openvino_tests
+          name: openvino_tests_${{ matrix.arhitecture }}
           path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz
           if-no-files-found: 'error'
 
@@ -175,7 +190,16 @@ jobs:
     defaults:
       run:
         shell: bash
-    runs-on: macos-12
+    strategy:
+      max-parallel: 2
+      fail-fast: false
+      matrix:
+        include:
+          - arhitecture: 'x86_64'
+            machine: 'macos-13'
+          - arhitecture: 'arm64'
+            machine: 'macos-13-xlarge'
+    runs-on: ${{ matrix.machine }}
     env:
       INSTALL_DIR: ${{ github.workspace }}/install
       INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests
@@ -189,13 +213,13 @@ jobs:
       - name: Download OpenVINO package
         uses: actions/download-artifact@v3
         with:
-          name: openvino_package
+          name: openvino_package_${{ matrix.arhitecture }}
           path: ${{ env.INSTALL_DIR }}
 
       - name: Download OpenVINO tests package
         uses: actions/download-artifact@v3
         with:
-          name: openvino_tests
+          name: openvino_tests_${{ matrix.arhitecture }}
           path: ${{ env.INSTALL_TEST_DIR }}
 
       - name: Extract OpenVINO packages
@@ -248,7 +272,7 @@ jobs:
         uses: actions/upload-artifact@v3
         if: ${{ !cancelled() }}
         with:
-          name: test-results-samples
+          name: test-results-samples-${{ matrix.arhitecture }}
           path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml
           if-no-files-found: 'error'
 
@@ -258,7 +282,16 @@ jobs:
     defaults:
       run:
         shell: bash
-    runs-on: macos-12
+    strategy:
+      max-parallel: 2
+      fail-fast: false
+      matrix:
+        include:
+          - arhitecture: 'x86_64'
+            machine: 'macos-13'
+          - arhitecture: 'arm64'
+            machine: 'macos-13-xlarge'
+    runs-on: ${{ matrix.machine }}
     env:
       INSTALL_DIR: ${{ github.workspace }}/install
       INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests
@@ -271,13 +304,13 @@ jobs:
       - name: Download OpenVINO package
         uses: actions/download-artifact@v3
         with:
-          name: openvino_package
+          name: openvino_package_${{ matrix.arhitecture }}
           path: ${{ env.INSTALL_DIR }}
 
       - name: Download OpenVINO tests package
         uses: actions/download-artifact@v3
         with:
-          name: openvino_tests
+          name: openvino_tests_${{ matrix.arhitecture }}
           path: ${{ env.INSTALL_TEST_DIR }}
 
       - name: Extract OpenVINO packages
@@ -314,7 +347,11 @@ jobs:
       - name: Low Precision Transformations Tests
         run: |
           source ${{ env.INSTALL_DIR }}/setupvars.sh
-          ${{ env.INSTALL_TEST_DIR }}/ov_lp_transformations_tests --gtest_print_time=1 \
+          
+          # Skips under Ticket: 122660
+          skip_filter=${{ matrix.arhitecture == 'arm64' && '--gtest_filter=-*smoke_LPT/FoldFakeQuantizeInTransformations.CompareFunctions*' || '' }}
+          
+          ${{ env.INSTALL_TEST_DIR }}/ov_lp_transformations_tests --gtest_print_time=1 "$skip_filter" \
                 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-LpTransformations.xml
 
       - name: OpenVINO Conditional compilation tests
@@ -337,8 +374,10 @@ jobs:
                 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-PaddleTests.xml
 
       - name: ONNX frontend tests
+        if: ${{ matrix.arhitecture == 'x86_64' }} # Ticket for ARM64: 122663
         run: |
           source ${{ env.INSTALL_DIR }}/setupvars.sh
+          
           ${{ env.INSTALL_TEST_DIR }}/ov_onnx_frontend_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* \
                --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ONNXFrontend.xml
 
@@ -351,7 +390,11 @@ jobs:
       - name: TensorFlow frontend tests
         run: |
           source ${{ env.INSTALL_DIR }}/setupvars.sh
-          ${{ env.INSTALL_TEST_DIR }}/ov_tensorflow_frontend_tests --gtest_print_time=1 \
+          
+          # Skips under Ticket: 122666
+          skip_filter=${{ matrix.arhitecture == 'arm64' && '--gtest_filter=-*CompileModelsTests.ModelWithSplitConvConcat*:*NgramCompilation*' || '' }}
+          
+          ${{ env.INSTALL_TEST_DIR }}/ov_tensorflow_frontend_tests --gtest_print_time=1 "$skip_filter" \
                 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TensorFlowFrontend.xml
 
       - name: TensorFlow Lite frontend tests
@@ -363,7 +406,11 @@ jobs:
       - name: Transformations func tests
         run: |
           source ${{ env.INSTALL_DIR }}/setupvars.sh
-          ${{ env.INSTALL_TEST_DIR }}/ov_transformations_tests --gtest_print_time=1 \
+          
+          # Skips under Ticket: 122668
+          skip_filter=${{ matrix.arhitecture == 'arm64' && '--gtest_filter=-*TransformationTestsF.CompressQuantizeWeights*:*TransformationTests/CompressQuantizeWeightsTests.FusionTest*' || '' }}
+          
+          ${{ env.INSTALL_TEST_DIR }}/ov_transformations_tests --gtest_print_time=1 "$skip_filter" \
                 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-Transformations.xml
 
       - name: Common test utils tests
@@ -384,6 +431,18 @@ jobs:
           ${{ env.INSTALL_TEST_DIR }}/ov_cpu_unit_tests --gtest_print_time=1 \
                 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-CPUUnitTests.xml
 
+      - name: SubgraphsDumper tests
+        run: |
+          source ${{ env.INSTALL_DIR }}/setupvars.sh
+          ${{ env.INSTALL_TEST_DIR }}/subgraphsDumperTests --gtest_print_time=1 \
+                --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-SubgraphsDumperTests.xml
+
+      - name: Template OpImpl tests
+        run: |
+          source ${{ env.INSTALL_DIR }}/setupvars.sh
+          ${{ env.INSTALL_TEST_DIR }}/conformanceTests --gtest_print_time=1 --device=TEMPLATE --gtest_filter="*OpImpl*" \
+                --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-TemplateOpImplTests.xml
+
       - name: AUTO unit tests
         run: |
           source ${{ env.INSTALL_DIR }}/setupvars.sh
@@ -444,7 +503,7 @@ jobs:
         uses: actions/upload-artifact@v3
         if: ${{ always() }}
         with:
-          name: test-results-cpp
+          name: test-results-cpp-${{ matrix.arhitecture }}
           path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml
           if-no-files-found: 'error'
 
@@ -454,7 +513,16 @@ jobs:
     defaults:
       run:
         shell: bash
-    runs-on: macos-12
+    strategy:
+      max-parallel: 2
+      fail-fast: false
+      matrix:
+        include:
+          - arhitecture: 'x86_64'
+            machine: 'macos-13'
+          - arhitecture: 'arm64'
+            machine: 'macos-13-xlarge'
+    runs-on: ${{ matrix.machine }}
     env:
       OPENVINO_REPO: ${{ github.workspace }}/openvino
       OPENVINO_CONTRIB_REPO: ${{ github.workspace }}/openvino_contrib
@@ -479,13 +547,13 @@ jobs:
       - name: Download OpenVINO package
         uses: actions/download-artifact@v3
         with:
-          name: openvino_package
+          name: openvino_package_${{ matrix.arhitecture }}
           path: ${{ env.INSTALL_DIR }}
 
       - name: Download OpenVINO tests package
         uses: actions/download-artifact@v3
         with:
-          name: openvino_tests
+          name: openvino_tests_${{ matrix.arhitecture }}
           path: ${{ env.INSTALL_TEST_DIR }}
 
       - name: Extract OpenVINO packages
@@ -511,10 +579,16 @@ jobs:
           # Install the core OV wheel
           python3 -m pip install ${{ env.INSTALL_DIR }}/tools/openvino-*.whl
           
+          # mxnet is only available on x86_64
+          extras_to_install="caffe,kaldi,onnx,tensorflow2,pytorch"
+          if [[ "${{ matrix.arhitecture }}" == "x86_64" ]]; then
+            extras_to_install="mxnet,$extras_to_install"
+          fi
+          
           # Find and install OV dev wheel
           pushd ${{ env.INSTALL_DIR }}/tools
             ov_dev_wheel_name=$(find . -name 'openvino_dev*.whl')
-            python3 -m pip install $ov_dev_wheel_name[mxnet,caffe,kaldi,onnx,tensorflow2,pytorch]
+            python3 -m pip install $ov_dev_wheel_name[$extras_to_install]
           popd
 
       - name: Python API 1.0 Tests
@@ -597,6 +671,7 @@ jobs:
           TEST_DEVICE: CPU
 
       - name: TensorFlow 2 Layer Tests - TF FE
+        if: ${{ 'false' }} # Ticket: 123322
         run: |
           python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt
           export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH
@@ -634,6 +709,7 @@ jobs:
           TEST_PRECISION: FP16
 
       - name: Python ONNX operators tests
+        if: ${{ 'false' }} # Ticket: 123325
         run: |
           # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time - ONNX Model Zoo tests are run separately
           python3 -m pytest -sv ${{ env.OPENVINO_REPO }}/src/frontends/onnx/tests -k 'not cuda' \
@@ -657,18 +733,27 @@ jobs:
         uses: actions/upload-artifact@v3
         if: ${{ always() }}
         with:
-          name: test-results-python
+          name: test-results-python-${{ matrix.arhitecture }}
           path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml
           if-no-files-found: 'error'
 
   CPU_Functional_Tests:
     name: CPU functional tests
-    if: ${{ 'false' }} # Ticket: 122001
     needs: Build
     defaults:
       run:
         shell: bash
-    runs-on: macos-12
+    strategy:
+      max-parallel: 2
+      fail-fast: false
+      matrix:
+        include:
+        # ticket: 122001
+        #  - arhitecture: 'x86_64'
+        #    machine: 'macos-13'
+          - arhitecture: 'arm64'
+            machine: 'macos-13-xlarge'
+    runs-on: ${{ matrix.machine }}
     env:
       INSTALL_DIR: ${{ github.workspace }}/install
       INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests
@@ -680,33 +765,37 @@ jobs:
       - name: Download OpenVINO package
         uses: actions/download-artifact@v3
         with:
-          name: openvino_package
+          name: openvino_package_${{ matrix.arhitecture }}
           path: ${{ env.INSTALL_DIR }}
 
       - name: Download OpenVINO tests package
         uses: actions/download-artifact@v3
         with:
-          name: openvino_tests
+          name: openvino_tests_${{ matrix.arhitecture }}
           path: ${{ env.INSTALL_TEST_DIR }}
 
       - name: Extract OpenVINO packages
         run: |
           pushd ${{ env.INSTALL_DIR }}
-            tar -xzf openvino_package.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_package.tar.gz || exit 1
+            tar -xzf openvino_package.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_package.tar.gz
           popd
           pushd ${{ env.INSTALL_TEST_DIR }}
-            tar -xzf openvino_tests.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_tests.tar.gz || exit 1
+            tar -xzf openvino_tests.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_tests.tar.gz
           popd
 
-      - name: Intel CPU plugin func tests
+      - name: CPU plugin func tests
         run: |
           source ${{ env.INSTALL_DIR }}/setupvars.sh
-          ${{ env.INSTALL_TEST_DIR }}/ov_cpu_func_tests --gtest_print_time=1 --gtest_filter=*smoke* --gtest_output=xml:"${{ env.INSTALL_TEST_DIR }}/TEST-CPUFuncTests.xml"
+          
+          # Skips under Ticket: 122769
+          skip_filter=${{ matrix.arhitecture == 'arm64' && '--gtest_filter=-*smoke_nonzero/NonZeroLayerTest.Inference/IS*:*smoke_NormalizeL2_*:*Extension.XmlModelWithExtensionFromDSO*:*Extension.OnnxModelWithExtensionFromDSO*:*ONNXQuantizedModels/QuantizedModelsTests.MaxPool*:*ONNXQuantizedModels/QuantizedModelsTests.Convolution*:**' || '' }}
+          
+          ${{ env.INSTALL_TEST_DIR }}/ov_cpu_func_tests --gtest_print_time=1 --gtest_filter=*smoke* "$skip_filter" --gtest_output=xml:"${{ env.INSTALL_TEST_DIR }}/TEST-CPUFuncTests.xml"
 
       - name: Upload Test Results
         uses: actions/upload-artifact@v3
         if: ${{ always() }}
         with:
-          name: test-results-functional-cpu
+          name: test-results-functional-cpu-${{ matrix.arhitecture }}
           path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml
           if-no-files-found: 'error'
diff --git a/.github/workflows/webassembly.yml b/.github/workflows/webassembly.yml
index 1cedaa107bf63d..0dc685275d2f84 100644
--- a/.github/workflows/webassembly.yml
+++ b/.github/workflows/webassembly.yml
@@ -36,15 +36,14 @@ jobs:
       image: emscripten/emsdk
       volumes:
         - /mount/caches:/mount/caches
+      options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING
     env:
       CMAKE_BUILD_TYPE: 'Release'
-      CMAKE_CXX_COMPILER_LAUNCHER: ccache
-      CMAKE_C_COMPILER_LAUNCHER: ccache
+      CMAKE_CXX_COMPILER_LAUNCHER: sccache
+      CMAKE_C_COMPILER_LAUNCHER: sccache
       OPENVINO_REPO: /__w/openvino/openvino/openvino
       OPENVINO_BUILD_DIR: /__w/openvino/openvino/openvino_build
-      CCACHE_DIR: /mount/caches/ccache/webassembly_Release
-      CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp
-      CCACHE_MAXSIZE: 50G
+      SCCACHE_AZURE_KEY_PREFIX: webassembly_Release
     steps:
       - name: Install git
         run: apt-get update && apt-get install --assume-yes --no-install-recommends git ca-certificates
@@ -55,8 +54,10 @@ jobs:
           path: 'openvino'
           submodules: 'true'
 
-      - name: Install ccache
-        run: apt-get install --assume-yes --no-install-recommends ccache
+      - name: Install sccache
+        uses: mozilla-actions/sccache-action@v0.0.3
+        with:
+          version: "v0.5.4"
 
       - name: emcmake cmake - configure
         run: |
@@ -64,8 +65,8 @@ jobs:
             -DCMAKE_CXX_FLAGS="-Wno-deprecated-declarations" \
             -DCMAKE_C_FLAGS="-Wno-deprecated-declarations" \
             -DCMAKE_COMPILE_WARNING_AS_ERROR=ON \
-            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-            -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+            -DCMAKE_CXX_COMPILER_LAUNCHER=${{ env.CMAKE_CXX_COMPILER_LAUNCHER }} \
+            -DCMAKE_C_COMPILER_LAUNCHER=${{ env.CMAKE_C_COMPILER_LAUNCHER }} \
             -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
             -S ${OPENVINO_REPO} \
             -B ${OPENVINO_BUILD_DIR}
@@ -74,4 +75,4 @@ jobs:
         run: emmake make -j$(nproc) hello_query_device -C ${OPENVINO_BUILD_DIR}
 
       - name: Show ccache stats
-        run: ccache --show-stats
+        run: ${SCCACHE_PATH} --show-stats
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 6aed320376c21e..e6763d2a696377 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -1,6 +1,9 @@
 name: Windows (VS 2022, Python 3.11)
 on:
-  workflow_dispatch:
+  schedule:
+    # at 00:00 on workdays
+    - cron: '0 0 * * 1,2,3,4,5'
+#  workflow_dispatch:
 #  pull_request:
 #    paths-ignore:
 #      - '**/docs/**'
@@ -9,16 +12,16 @@ on:
 #      - '**.md'
 #      - '**/layer_tests_summary/**'
 #      - '**/conformance/**'
-  push:
-    paths-ignore:
-      - '**/docs/**'
-      - 'docs/**'
-      - '**/**.md'
-      - '**.md'
-      - '**/layer_tests_summary/**'
-      - '**/conformance/**'
-    branches:
-      - master
+# push:
+#   paths-ignore:
+#     - '**/docs/**'
+#     - 'docs/**'
+#     - '**/**.md'
+#     - '**.md'
+#     - '**/layer_tests_summary/**'
+#     - '**/conformance/**'
+#  branches:
+#    - master
 
 concurrency:
   # github.ref is not unique in post-commit
@@ -336,7 +339,7 @@ jobs:
         shell: cmd
         run: |
           python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt
-          
+
           :: requires 'unit_tests' from 'tools/mo'
           set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH%
           python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/onnx_tests -m "not launch_only_if_manually_specified and precommit" --junitxml=${INSTALL_TEST_DIR}/TEST-onnx.xml
@@ -404,7 +407,7 @@ jobs:
         shell: cmd
         run: |
           python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt
-          
+
           :: TODO: remove setupvars.bat from here; currently, it's used for 'test_utils' installed in '<package>/python/openvino'
           call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/mo_python_api_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_mo_convert.xml
         env:
@@ -415,7 +418,7 @@ jobs:
         shell: cmd
         run: |
           python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt
-          
+
           :: TODO: remove setupvars.sh from here; currently, it's used for 'test_utils' installed in '<package>/python/openvino'
           call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/ovc_python_api_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_ovc_convert.xml
         env:
@@ -561,6 +564,14 @@ jobs:
         run: |
           call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/ov_cpu_unit_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-CPUUnitTests.xml
 
+      - name: SubgraphsDumper tests
+        run: |
+          call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/subgraphsDumperTests --gtest_print_time=1  --gtest_print_time=1 --device=TEMPLATE --gtest_filter="*OpImpl*" --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-SubgraphsDumperTests.xml
+
+      - name: Template OpImpl tests
+        run: |
+          call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/conformanceTests --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TemplateOpImplTests.xml
+
       - name: GNA plugin unit tests
         shell: cmd
         run: |
diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml
index 93f947ee071df1..f0a9741aee9537 100644
--- a/.github/workflows/windows_conditional_compilation.yml
+++ b/.github/workflows/windows_conditional_compilation.yml
@@ -1,4 +1,4 @@
-name: Tests on Windows Conditional Compilation (VS 2022, Python 3.11)
+name: Windows Conditional Compilation (VS 2022, Python 3.11)
 on:
   workflow_dispatch:
   schedule:
@@ -24,36 +24,30 @@ on:
 #      - master
 
 concurrency:
-  group: ${{ github.head_ref || github.run_id }}-windows-cc
+  # github.ref is not unique in post-commit
+  group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-windows-cc
   cancel-in-progress: true
 
 env:
-  CMAKE_BUILD_TYPE: 'Release'
-  CMAKE_GENERATOR: 'Ninja'
-  CMAKE_CXX_COMPILER_LAUNCHER: sccache
-  CMAKE_C_COMPILER_LAUNCHER: sccache
-  OPENVINO_REPO: "${{ github.workspace }}\\openvino"
-  OPENVINO_CONTRIB_REPO: "${{ github.workspace }}\\openvino_contrib"
-  INSTALL_DIR: "${{ github.workspace }}\\install_pkg"
-  INSTALL_TEST_DIR: "${{ github.workspace }}\\install\\tests"
-  SAMPLES_INSTALL_DIR: "${{ github.workspace }}\\install\\samples"
-  LAYER_TESTS_INSTALL_DIR: "${{ github.workspace }}\\install\\tests\\layer_tests"
-  BUILD_DIR: "${{ github.workspace }}\\build"
-  BUILD_DIR_2: "${{ github.workspace }}\\build_s"
-  MODELS_PATH: "${{ github.workspace }}\\testdata"
-  OV_TEMP: "${{ github.workspace }}\\openvino_temp"
-  BUILD_TYPE: "Release"
-  PYTHON_STATIC_ARGS: -m "not dynamic_library and not template_plugin"
-  VCVARSPATH: "C:\\Program Files\\Microsoft Visual Studio\\2022\\Enterprise\\VC\\Auxiliary\\Build\\vcvarsall.bat"
+  PYTHON_VERSION: '3.11'
 
 jobs:
   Build:
-    # TODO: remove. Temporary measure to prevent the workflow from scheduling on forks.
-    if: ${{ github.repository_owner == 'openvinotoolkit' }}
     defaults:
       run:
         shell: pwsh
     runs-on: windows-latest-8-cores
+    env:
+      CMAKE_BUILD_TYPE: 'Release'
+      CMAKE_GENERATOR: 'Ninja Multi-Config'
+      CMAKE_CXX_COMPILER_LAUNCHER: sccache
+      CMAKE_C_COMPILER_LAUNCHER: sccache
+      OPENVINO_REPO: "${{ github.workspace }}\\openvino"
+      INSTALL_DIR: "${{ github.workspace }}\\openvino_install"
+      INSTALL_TEST_DIR: "${{ github.workspace }}\\tests_install"
+      BUILD_DIR: "${{ github.workspace }}\\openvino_build"
+      MODELS_PATH: "${{ github.workspace }}\\testdata"
+      SELECTIVE_BUILD_STAT_DIR: "${{ github.workspace }}\\selective_build_stat"
     steps:
       - name: Clone OpenVINO
         uses: actions/checkout@v4
@@ -75,21 +69,17 @@ jobs:
 
       - uses: actions/setup-python@v4
         with:
-          python-version: '3.11'
+          python-version: ${{ env.PYTHON_VERSION }}
 
       - name: Install build dependencies
-        run: |
-          choco install --no-progress ninja
+        run: choco install --no-progress ninja
 
       #
       # Build
       #
 
-      - name: Get number of CPU cores
-        uses: SimenB/github-actions-cpu-cores@v2
-        id: cpu-cores
-
-      - uses: ilammy/msvc-dev-cmd@v1
+      - name: Configure Developer Command Prompt for Microsoft Visual C++
+        uses: ilammy/msvc-dev-cmd@v1
 
       - name: Setup sccache
         uses: hendrikmuhs/ccache-action@v1.2
@@ -99,71 +89,227 @@ jobs:
           # Should save cache only if run in the master branch of the base repo
           # github.ref_name is 'ref/PR_#' in case of the PR, and 'branch_name' when executed on push
           save: ${{ github.ref_name == 'master' && 'true' || 'false'  }}
-          key: ${{ github.job }}-windows-cc
+          key: ${{ github.job }}-${{ runner.os }}-itt
           restore-keys: |
-            ${{ github.job }}-windows-cc
+            ${{ github.job }}-${{ runner.os }}-itt
 
-      - name: CMake CC COLLECT
+      - name: CMake configure - CC COLLECT
         run: |
-          & "${{ env.VCVARSPATH }}" x64 && cmake -G Ninja `
+          cmake -G "${{ env.CMAKE_GENERATOR }}" `
+            -DBUILD_SHARED_LIBS=OFF `
+            -DENABLE_TESTS=ON `
             -DENABLE_CPPLINT=OFF `
-            -DENABLE_GAPI_PREPROCESSING=OFF `
-            -DENABLE_PLUGINS_XML=ON `
-            -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF `
-            -DCMAKE_BUILD_TYPE=${{ env.BUILD_TYPE }} `
+            -DENABLE_NCC_STYLE=OFF `
+            -DENABLE_INTEL_GNA=OFF `
+            -DCMAKE_COMPILE_WARNING_AS_ERROR=ON `
             -DENABLE_PROFILING_ITT=ON `
             -DSELECTIVE_BUILD=COLLECT `
+            -DCMAKE_DISABLE_FIND_PACKAGE_PkgConfig=ON `
             -S ${{ env.OPENVINO_REPO }} `
             -B ${{ env.BUILD_DIR }}
 
-      - name: Build CC COLLECT
+      - name: Cmake build - CC COLLECT
         run: |
-          & "${{ env.VCVARSPATH }}" x64 && cmake --build ${{ env.BUILD_DIR }} --parallel ${{ steps.cpu-cores.outputs.count }} --config ${{ env.BUILD_TYPE }} `
-            --target openvino_intel_cpu_plugin openvino_ir_frontend benchmark_app sea_itt_lib
+          cmake --build ${{ env.BUILD_DIR }} --parallel --config ${{ env.CMAKE_BUILD_TYPE }}
+          cmake --build ${{ env.BUILD_DIR }} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --target sea_itt_lib
 
-      - name: List bin files
+      - name: Cmake install - OpenVINO
+        run: cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake
+
+      - name: Build C++ samples - OpenVINO build tree
+        run: |
+          cmake -G "${{ env.CMAKE_GENERATOR }}" -DOpenVINO_DIR=${{ env.BUILD_DIR }} -S ${{ env.INSTALL_DIR }}/samples/cpp -B ${{ env.BUILD_DIR }}/cpp_samples
+          cmake --build ${{ env.BUILD_DIR }}/cpp_samples --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --target hello_query_device
+
+      - name: Build C samples - OpenVINO install tree
+        run: |
+          & ${{ env.INSTALL_DIR }}/samples/c/build_samples_msvc.bat -i ${{ env.INSTALL_DIR }} -b ${{ env.BUILD_DIR }}/c_samples
+
+      - name: Ctest - OpenVINO unit tests
         shell: cmd
-        run: dir ${{ env.OPENVINO_REPO }}\bin\ /s
+        run: |
+          set path=%path%;${{ env.OPENVINO_REPO }}\temp\tbb\bin
+          ctest -C ${{ env.CMAKE_BUILD_TYPE }} --test-dir ${{ env.BUILD_DIR }} -V -L UNIT
 
-      - name: Code usage analysis
+      - name: Perform code tracing via ITT collector
         shell: cmd
-        working-directory: ${{ env.OPENVINO_REPO }}
         run: |
           set path=%path%;${{ env.OPENVINO_REPO }}\temp\tbb\bin
-          call "${{ env.VCVARSPATH }}" && python thirdparty\itt_collector\runtool\sea_runtool.py --bindir ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.BUILD_TYPE }} -o ${{ env.BUILD_DIR }}\itt_stat ! ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.BUILD_TYPE }}\benchmark_app.exe -niter 1 -nireq 1 -m ${{ env.MODELS_PATH }}\models\test_model\test_model_fp32.xml -d CPU
+          
+          python3 ${{ env.OPENVINO_REPO }}\thirdparty\itt_collector\runtool\sea_runtool.py ^
+          --bindir ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.CMAKE_BUILD_TYPE }} ^
+          -o ${{ env.SELECTIVE_BUILD_STAT_DIR }}\itt_stat ! ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.CMAKE_BUILD_TYPE }}\benchmark_app.exe ^
+          -niter 1 ^
+          -nireq 1 ^
+          -m ${{ env.MODELS_PATH }}\models\test_model\test_model_fp32.xml ^
+          -d CPU
+
+      - name: List bin files
+        shell: cmd
+        run: dir ${{ env.OPENVINO_REPO }}\bin\ /s
 
-      - name: List csv files
+      - name: List install files
         shell: cmd
-        run: dir ${{ env.BUILD_DIR }}\*.csv /s /p
+        run: dir ${{ env.INSTALL_DIR }} /s
+
+      - name: Pack Artifacts
+        run: |
+          $file=Get-ChildItem -Path "${{ env.SELECTIVE_BUILD_STAT_DIR }}"
+          $compress = @{
+            Path = $file
+            CompressionLevel = "Optimal"
+            DestinationPath = "${{ env.BUILD_DIR }}/openvino_selective_build_stat.zip"
+          }
+          Compress-Archive @compress
+
+          $compress = @{
+            Path = "${{ env.OPENVINO_REPO }}/bin/intel64/${{ env.CMAKE_BUILD_TYPE }}/ov_cpu_func_tests.exe", "${{ env.OPENVINO_REPO }}/src/tests/test_utils/functional_test_utils/layer_tests_summary", "${{ env.INSTALL_DIR }}/runtime/3rdparty/tbb"
+            CompressionLevel = "Optimal"
+            DestinationPath = "${{ env.BUILD_DIR }}/openvino_tests.zip"
+          }
+          Compress-Archive @compress
+
+      - name: Upload selective build statistics package
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: openvino_selective_build_stat
+          path: ${{ env.BUILD_DIR }}/openvino_selective_build_stat.zip
+          if-no-files-found: 'error'
+
+      - name: Upload OpenVINO tests package
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: openvino_tests
+          path: ${{ env.BUILD_DIR }}/openvino_tests.zip
+          if-no-files-found: 'error'
+
+  CC_Build:
+    name: Conditional Compilation
+    needs: Build
+    defaults:
+      run:
+        shell: pwsh
+    runs-on: windows-latest-8-cores
+    env:
+      CMAKE_BUILD_TYPE: 'Release'
+      CMAKE_CXX_COMPILER_LAUNCHER: sccache
+      CMAKE_C_COMPILER_LAUNCHER: sccache
+      OPENVINO_REPO: "${{ github.workspace }}\\openvino"
+      BUILD_DIR: "${{ github.workspace }}\\openvino_build"
+      MODELS_PATH: "${{ github.workspace }}\\testdata"
+      SELECTIVE_BUILD_STAT_DIR: "${{ github.workspace }}\\selective_build_stat"
+    steps:
+      - name: Clone OpenVINO
+        uses: actions/checkout@v4
+        with:
+          path: 'openvino'
+          submodules: 'true'
 
-      - name: CMake CC ON
+      - name: Clone test models
+        uses: actions/checkout@v4
+        with:
+          repository: 'openvinotoolkit/testdata'
+          path: 'testdata'
+          lfs: 'true'
+          ref: 'master'
+
+      - name: Download selective build statistics package
+        uses: actions/download-artifact@v3
+        with:
+          name: openvino_selective_build_stat
+          path: ${{ env.SELECTIVE_BUILD_STAT_DIR }}
+
+      - name: Extract selective build statistics package
+        run: Expand-Archive ${{ env.SELECTIVE_BUILD_STAT_DIR }}/openvino_selective_build_stat.zip -DestinationPath "${{ env.SELECTIVE_BUILD_STAT_DIR }}"
+
+      - name: CMake configure - CC ON
         run: |
-          & "${{ env.VCVARSPATH }}" x64 && cmake -G "Visual Studio 17 2022" `
-            -DCMAKE_VERBOSE_MAKEFILE=ON `
+          cmake `
+            -DBUILD_SHARED_LIBS=OFF `
             -DENABLE_CPPLINT=OFF `
-            -DENABLE_GAPI_PREPROCESSING=OFF `
-            -DENABLE_PROFILING_ITT=OFF `
             -DSELECTIVE_BUILD=ON `
-            -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF `
-            -DSELECTIVE_BUILD_STAT=${{ env.BUILD_DIR }}\*.csv `
+            -DENABLE_TEMPLATE=OFF `
+            -DENABLE_INTEL_GPU=OFF `
+            -DENABLE_INTEL_GNA=OFF `
+            -DENABLE_OV_TF_FRONTEND=OFF `
+            -DENABLE_OV_TF_LITE_FRONTEND=OFF `
+            -DENABLE_OV_PADDLE_FRONTEND=OFF `
+            -DENABLE_OV_PYTORCH_FRONTEND=OFF `
+            -DENABLE_OV_ONNX_FRONTEND=OFF `
+            -DSELECTIVE_BUILD_STAT=${{ env.SELECTIVE_BUILD_STAT_DIR }}\*.csv `
             -S ${{ env.OPENVINO_REPO }} `
-            -B ${{ env.BUILD_DIR_2 }}
+            -B ${{ env.BUILD_DIR }}
 
-      - name: Build CC ON
-        run: |
-          & "${{ env.VCVARSPATH }}" x64 && cmake --build ${{ env.BUILD_DIR_2 }} --parallel ${{ steps.cpu-cores.outputs.count }} --config ${{ env.BUILD_TYPE }} `
-            --target openvino_intel_cpu_plugin openvino_ir_frontend benchmark_app
+      - name: Cmake build - CC ON
+        run: cmake --build ${{ env.BUILD_DIR }} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --target benchmark_app
 
-      - name: List bin files ON
+      - name: List bin files
         shell: cmd
         run: dir ${{ env.OPENVINO_REPO }}\bin\ /s
 
-      - name: Check conditional_compilation_gen.h header
+      - name: Run with CC-ed runtime
         shell: cmd
-        run: type ${{ env.BUILD_DIR_2 }}\src\common\conditional_compilation\conditional_compilation_gen.h
+        run: |
+          set path=%path%;${{ env.OPENVINO_REPO }}\temp\tbb\bin
+          ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.CMAKE_BUILD_TYPE }}\benchmark_app.exe -niter 1 -nireq 1 -m ${{ env.MODELS_PATH }}\models\test_model\test_model_fp32.xml -d CPU
 
-      - name: Use OpenVINO after CC
+  CPU_Functional_Tests:
+    name: CPU functional tests
+    needs: Build
+    defaults:
+      run:
+        shell: pwsh
+    runs-on: windows-latest-8-cores
+    env:
+      INSTALL_TEST_DIR: "${{ github.workspace }}\\tests_install"
+      PARALLEL_TEST_SCRIPT: "${{ github.workspace }}\\tests_install\\layer_tests_summary\\run_parallel.py"
+      PARALLEL_TEST_CACHE: "${{ github.workspace }}\\tests_install\\test_cache.lst"
+
+    steps:
+      - name: Download OpenVINO tests package
+        uses: actions/download-artifact@v3
+        with:
+          name: openvino_tests
+          path: ${{ env.INSTALL_TEST_DIR }}
+
+      - name: Extract OpenVINO tests package
+        run: Expand-Archive ${{ env.INSTALL_TEST_DIR }}/openvino_tests.zip -DestinationPath "${{ env.INSTALL_TEST_DIR }}"
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Install python dependencies for run_parallel.py
+        run: python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/layer_tests_summary/requirements.txt
+
+      # Windows pipeline is in nightly mode, uncomment once there is a consistent cache creation
+      # - name: Restore tests execution time
+      #   uses: actions/cache/restore@v3
+      #   with:
+      #     path: ${{ env.PARALLEL_TEST_CACHE }}
+      #     key: ${{ runner.os }}-tests-functional-cpu-stamp-${{ github.sha }}
+      #     restore-keys: |
+      #      ${{ runner.os }}-tests-functional-cpu-stamp
+
+      - name: Intel CPU plugin func tests (parallel)
         shell: cmd
         run: |
-          set path=%path%;${{ env.OPENVINO_REPO }}\temp\tbb\bin
-          ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.BUILD_TYPE }}\benchmark_app.exe -niter 1 -nireq 1 -m ${{ env.MODELS_PATH }}\models\test_model\test_model_fp32.xml -d CPU
+          set path=%path%;${{ env.INSTALL_TEST_DIR }}\tbb\bin;${{ env.INSTALL_TEST_DIR }}\tbb
+          python3 ${{ env.PARALLEL_TEST_SCRIPT }} -e ${{ env.INSTALL_TEST_DIR }}\ov_cpu_func_tests.exe -w ${{ env.INSTALL_TEST_DIR }} -s suite -rf 0 -- --gtest_print_time=1 --gtest_filter=*smoke*
+        timeout-minutes: 45
+
+      - name: Upload Test Results
+        uses: actions/upload-artifact@v3
+        if: ${{ !cancelled() }}
+        with:
+          name: test-results-functional-cpu
+          path: |
+            ${{ env.INSTALL_TEST_DIR }}/TEST*.xml
+            ${{ env.INSTALL_TEST_DIR }}/logs/failed/*.log
+            ${{ env.INSTALL_TEST_DIR }}/logs/crashed/*.log
+            ${{ env.INSTALL_TEST_DIR }}/logs/hanged/*.log
+            ${{ env.INSTALL_TEST_DIR }}/logs/interapted/*.log
+            ${{ env.INSTALL_TEST_DIR }}/logs/disabled_tests.log
+          if-no-files-found: 'error'
diff --git a/README.md b/README.md
index bfc4a722c2680d..489ef7803ccd80 100644
--- a/README.md
+++ b/README.md
@@ -128,6 +128,16 @@ OpenVINO™ Toolkit also contains several plugins which simplify loading models
 OpenVINO™ Toolkit is licensed under [Apache License Version 2.0](LICENSE).
 By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms.
 
+## Telemetry
+OpenVINO™ collects software performance and usage data for the purpose of improving OpenVINO™ tools. This data is collected directly by OpenVINO™ or through the use of Google Analytics 4.
+You can opt-out at any time by running the command:
+
+``` bash
+opt_in_out --opt_out
+```
+
+More Information is available at https://docs.openvino.ai/latest/openvino_docs_telemetry_information.html.
+
 ## Documentation
 
 ### User documentation
diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake
index 5c86bdea57620c..257263f663bec6 100644
--- a/cmake/dependencies.cmake
+++ b/cmake/dependencies.cmake
@@ -104,10 +104,10 @@ function(ov_download_tbb)
     elseif(LINUX AND X86_64 AND OV_GLIBC_VERSION VERSION_GREATER_EQUAL 2.17)
         # build oneTBB 2021.2.1 with gcc 4.8 (glibc 2.17)
         RESOLVE_DEPENDENCY(TBB
-                ARCHIVE_LIN "oneapi-tbb-2021.2.3-lin.tgz"
+                ARCHIVE_LIN "oneapi-tbb-2021.2.3-lin-20231012.tgz"
                 TARGET_PATH "${TEMP}/tbb"
                 ENVIRONMENT "TBBROOT"
-                SHA256 "f3f2edd8e7875b02220f11ab5b201411d5af6822e525e8da5444b4a666514e8b"
+                SHA256 "6f39d18783b37fdcc15ca137fbf70bc78206848af1a510cada806279fae49718"
                 USE_NEW_LOCATION TRUE)
     elseif(YOCTO_AARCH64)
         RESOLVE_DEPENDENCY(TBB
@@ -135,10 +135,10 @@ function(ov_download_tbb)
     elseif(LINUX AND AARCH64 AND OV_GLIBC_VERSION VERSION_GREATER_EQUAL 2.17)
         # build oneTBB 2021.2.1 with gcc 4.8 (glibc 2.17)
         RESOLVE_DEPENDENCY(TBB
-                ARCHIVE_LIN "oneapi-tbb-2021.2.1-lin-arm64-canary.tgz"
+                ARCHIVE_LIN "oneapi-tbb-2021.2.1-lin-arm64-20231012.tgz"
                 TARGET_PATH "${TEMP}/tbb"
                 ENVIRONMENT "TBBROOT"
-                SHA256 "042fdac53be65841a970b05d892f4b20b556b06fd3b20d2d0068e49c4fd74f07"
+                SHA256 "cbb239cbda7ea2937cec7008c12fe628dd44488e1eafd9630f8814f9eb2c13e2"
                 USE_NEW_LOCATION TRUE)
     elseif(APPLE AND AARCH64)
         # build oneTBB 2021.2.1 with export MACOSX_DEPLOYMENT_TARGET=11.0
@@ -204,10 +204,10 @@ function(ov_download_tbbbind_2_5)
                 USE_NEW_LOCATION TRUE)
     elseif(LINUX AND X86_64)
         RESOLVE_DEPENDENCY(TBBBIND_2_5
-                ARCHIVE_LIN "tbbbind_2_5_static_lin_v3.tgz"
+                ARCHIVE_LIN "tbbbind_2_5_static_lin_v4.tgz"
                 TARGET_PATH "${TEMP}/tbbbind_2_5"
                 ENVIRONMENT "TBBBIND_2_5_ROOT"
-                SHA256 "d39deb262c06981b5e2d2e3c593e9fc9be62ce4feb91dd4e648e92753659a6b3"
+                SHA256 "4ebf30246530795f066fb9616e6707c6b17be7a65d29d3518b578a769dd54eea"
                 USE_NEW_LOCATION TRUE)
     else()
         # TMP: for Apple Silicon TBB does not provide TBBBind
diff --git a/cmake/developer_package/frontends/frontends.cmake b/cmake/developer_package/frontends/frontends.cmake
index a86c57c6c87845..a20b1665fb7d29 100644
--- a/cmake/developer_package/frontends/frontends.cmake
+++ b/cmake/developer_package/frontends/frontends.cmake
@@ -125,17 +125,24 @@ macro(ov_add_frontend)
     source_group("public include" FILES ${LIBRARY_PUBLIC_HEADERS})
 
     # Generate protobuf file on build time for each '.proto' file in src/proto
-    file(GLOB proto_files ${frontend_root_dir}/src/proto/*.proto)
+    set(protofiles_root_dir "${frontend_root_dir}/src/proto")
+    file(GLOB_RECURSE proto_files ${protofiles_root_dir}/*.proto)
 
     foreach(proto_file IN LISTS proto_files)
+        # filter out standaard google proto files
+        if(proto_file MATCHES ".*google.*")
+            continue()
+        endif()
+
         file(RELATIVE_PATH proto_file_relative "${CMAKE_SOURCE_DIR}" "${proto_file}")
-        get_filename_component(FILE_DIR ${proto_file} DIRECTORY)
         get_filename_component(FILE_WE ${proto_file} NAME_WE)
-        set(OUTPUT_PB_SRC ${CMAKE_CURRENT_BINARY_DIR}/${FILE_WE}.pb.cc)
-        set(OUTPUT_PB_HEADER ${CMAKE_CURRENT_BINARY_DIR}/${FILE_WE}.pb.h)
+        file(RELATIVE_PATH relative_path ${protofiles_root_dir} ${proto_file})
+        get_filename_component(relative_path ${relative_path} DIRECTORY)
+        set(OUTPUT_PB_SRC ${CMAKE_CURRENT_BINARY_DIR}/${relative_path}/${FILE_WE}.pb.cc)
+        set(OUTPUT_PB_HEADER ${CMAKE_CURRENT_BINARY_DIR}/${relative_path}/${FILE_WE}.pb.h)
         add_custom_command(
                 OUTPUT "${OUTPUT_PB_SRC}" "${OUTPUT_PB_HEADER}"
-                COMMAND ${PROTOC_EXECUTABLE} ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} -I ${FILE_DIR} ${FILE_WE}.proto
+                COMMAND ${PROTOC_EXECUTABLE} ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} -I ${protofiles_root_dir} ${proto_file}
                 DEPENDS ${PROTOC_DEPENDENCY} ${proto_file}
                 COMMENT "Running C++ protocol buffer compiler (${PROTOC_EXECUTABLE}) on ${proto_file_relative}"
                 VERBATIM
diff --git a/cmake/developer_package/packaging/common-libraries.cmake b/cmake/developer_package/packaging/common-libraries.cmake
index 4fbce5b4a58ca7..9671d827521d20 100644
--- a/cmake/developer_package/packaging/common-libraries.cmake
+++ b/cmake/developer_package/packaging/common-libraries.cmake
@@ -4,14 +4,6 @@
 
 include(GNUInstallDirs)
 
-if(CPACK_GENERATOR STREQUAL "BREW")
-    # brew relies on RPATHs
-    # set(CMAKE_SKIP_INSTALL_RPATH OFF)
-else()
-    # we don't need RPATHs, because libraries are searched by standard paths
-    set(CMAKE_SKIP_INSTALL_RPATH ON)
-endif()
-
 #
 # ov_common_libraries_cpack_set_dirs()
 #
@@ -115,3 +107,12 @@ macro(ov_define_component_include_rules)
 endmacro()
 
 ov_define_component_include_rules()
+
+if(CPACK_GENERATOR STREQUAL "BREW")
+    # brew relies on RPATHs
+    set(CMAKE_SKIP_INSTALL_RPATH OFF)
+    set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${OV_CPACK_LIBRARYDIR}")
+else()
+    # we don't need RPATHs, because libraries are searched by standard paths
+    set(CMAKE_SKIP_INSTALL_RPATH ON)
+endif()
diff --git a/docs/articles_en/about_openvino/additional_resources/telemetry_information.md b/docs/articles_en/about_openvino/additional_resources/telemetry_information.md
index 4340a40923770b..b23a763ff97e70 100644
--- a/docs/articles_en/about_openvino/additional_resources/telemetry_information.md
+++ b/docs/articles_en/about_openvino/additional_resources/telemetry_information.md
@@ -3,13 +3,11 @@
 @sphinxdirective
 
 .. meta::
-   :description: Learn about OpenVINO™ telemetry, that with your explicit consent 
-                 collects only usage data to simplify debugging and further development.
+   :description: Learn about OpenVINO™ telemetry, that collects anonymous usage data for the purpose of improving OpenVINO™ tools.
 
 
-To facilitate debugging and further development, OpenVINO™ asks its users for 
-a permission to collect telemetry data. It will not be collected 
-without an explicit consent on your part and will cover only OpenVINO™ usage information.
+To facilitate debugging and further development, OpenVINO™ collects anonymous telemetry data. Anonymous telemetry data is collected by default,
+but you can stop data collection anytime by running the command ``opt_in_out --opt_out``.
 It does not extend to any other Intel software, hardware, website usage, or other products. 
 
 Google Analytics is used for telemetry purposes. Refer to 
@@ -18,34 +16,6 @@ Google Analytics is used for telemetry purposes. Refer to
 Enable or disable Telemetry reporting
 ###########################################################
 
-First-run consent
-+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-
-On the first run of an application that collects telemetry data, you will be prompted 
-to opt in or out of telemetry collection with the following telemetry message: 
-
-.. code-block:: console
-
-   Intel would like your permission to collect software performance and usage data
-   for the purpose of improving Intel products and services. This data will be collected
-   directly by Intel or through the use of Google Analytics. This data will be stored 
-   in countries where Intel or Google operate.
-
-   You can opt-out at any time in the future by running ``opt_in_out --opt_in``.
-   
-   More Information is available at docs.openvino.ai.
-
-   Please type ``Y`` to give your consent or ``N`` to decline.
-
-Choose your preference by typing ``Y`` to enable or ``N`` to disable telemetry. Your choice will 
-be confirmed by a corresponding disclaimer. If you do not reply to the telemetry message, 
-your telemetry data will not be collected. 
-
-For the Neural Network Compression Framework (NNCF), which is not a command line application, 
-the telemetry message will not display. Telemetry data will only be collected from NNCF 
-if you have explicitly provided consent in another OpenVINO tool.
-
-
 Changing consent decision
 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
@@ -82,8 +52,8 @@ Telemetry Data Collection Details
    .. tab-item:: Telemetry Data Retention
       :sync: telemetry-data-retention
    
-      Telemetry data is retained in Google Analytics for a maximum of 26 months.
-      Any raw data that has reached the 26-month threshold is deleted from Google Analytics on a monthly basis.  
+      Telemetry data is retained in Google Analytics for a maximum of 14 months.
+      Any raw data that has reached the 14-month threshold is deleted from Google Analytics on a monthly basis.  
 
 
 @endsphinxdirective
\ No newline at end of file
diff --git a/docs/dev/build_windows.md b/docs/dev/build_windows.md
index 28001328ea3013..b6321785c38970 100644
--- a/docs/dev/build_windows.md
+++ b/docs/dev/build_windows.md
@@ -78,7 +78,7 @@ Supported configurations:
 ```sh
 call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\VC\Auxiliary\Build\vcvars64.bat"
 cmake -G Ninja -Wno-dev -DCMAKE_BUILD_TYPE=Release ..
-ninja .
+cmake --build . --parallel
 ```
 
 ## See also
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 69433a40eb64ff..2e643842f24861 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -44,6 +44,6 @@ sphinxcontrib-jsmath==1.0.1
 sphinxcontrib-qthelp==1.0.3
 sphinxcontrib-serializinghtml==1.1.5
 toml==0.10.2
-urllib3==1.26.17
+urllib3==1.26.18
 zipp==3.4.1
 docs/openvino_custom_sphinx_sitemap
diff --git a/src/bindings/python/requirements.txt b/src/bindings/python/requirements.txt
index 72438eeb2ecd91..c4d3c3e35568aa 100644
--- a/src/bindings/python/requirements.txt
+++ b/src/bindings/python/requirements.txt
@@ -1,3 +1,3 @@
 numpy>=1.16.6
 singledispatchmethod; python_version<'3.8'
-openvino-telemetry>=2023.1.0
+openvino-telemetry>=2023.2.1
diff --git a/src/bindings/python/setup.cfg b/src/bindings/python/setup.cfg
index 083c8e1de85cb1..b9b15ef0ca1214 100644
--- a/src/bindings/python/setup.cfg
+++ b/src/bindings/python/setup.cfg
@@ -13,6 +13,7 @@ setenv =
   OV_BACKEND = {env:OV_BACKEND:"CPU"}
   PYTHONPATH = {env:PYTHONPATH}
   OpenVINO_DIR = {env:OpenVINO_DIR}
+  CI = True
 passenv =
   http_proxy
   https_proxy
diff --git a/src/bindings/python/src/compatibility/ngraph/opset3/ops.py b/src/bindings/python/src/compatibility/ngraph/opset3/ops.py
index 82846826111751..7d7c757d9cd5dc 100644
--- a/src/bindings/python/src/compatibility/ngraph/opset3/ops.py
+++ b/src/bindings/python/src/compatibility/ngraph/opset3/ops.py
@@ -550,9 +550,9 @@ def shuffle_channels(data: Node, axis: int, group: int, name: Optional[str] = No
 
     `data_reshaped` = reshape(`data`, [N, group, C / group, H * W])
 
-    `data_trnasposed` = transpose(`data_reshaped`, [0, 2, 1, 3])
+    `data_transposed` = transpose(`data_reshaped`, [0, 2, 1, 3])
 
-    `output` = reshape(`data_trnasposed`, [N, C, H, W])
+    `output` = reshape(`data_transposed`, [N, C, H, W])
 
     For example:
 
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py
index 45a662e4e45fd1..479e1a5cb1c622 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py
@@ -7,10 +7,9 @@
 from openvino.frontend.pytorch.py_pytorch_frontend import _FrontEndPytorchDecoder as Decoder
 from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType
 from openvino.runtime import op, PartialShape, Type as OVType, OVAny, Shape
-from openvino.frontend.pytorch.utils import maybe_convert_max_int, make_constant, fetch_attr, pt_to_ov_type_map, ov_to_c_type_map
+from openvino.frontend.pytorch.utils import maybe_convert_max_int, make_constant, fetch_attr, pt_to_ov_type_map
 
 import torch
-import ctypes
 
 class TorchFXPythonDecoder (Decoder):
 
@@ -224,11 +223,7 @@ def as_constant(self):
         if self.pt_module.op == 'get_attr':
             # Extract Constant from FX module field
             ret = fetch_attr(self.fx_gm, self.pt_module.target)
-            ovshape = PartialShape(ret.size())
-            ovtype = pt_to_ov_type_map[str(ret.type())]
-            c_type = ctypes.POINTER(ov_to_c_type_map[ovtype])
-            data_c_ptr = ctypes.cast(ret.data_ptr(), c_type)
-            ov_const = op.Constant(ovtype, ovshape.get_shape(), data_c_ptr[:ret.nelement()])
+            ov_const = op.Constant(ret.numpy(), shared_memory=True)
             return ov_const.outputs()
 
 
@@ -370,7 +365,7 @@ def inlined_inputs(self, index):
         return result
 
     def may_produce_alias(self, in_index: int, out_index: int) -> bool:
-        if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d"]:
+        if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d", "aten::matmul"]:
             # AliasDB::may_contain_alias sometimes return True for tensors produced by convnd, we have to workaround that
             return False
         try:
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/gptq.py b/src/bindings/python/src/openvino/frontend/pytorch/gptq.py
new file mode 100644
index 00000000000000..b4bd06552b2a1e
--- /dev/null
+++ b/src/bindings/python/src/openvino/frontend/pytorch/gptq.py
@@ -0,0 +1,140 @@
+
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# flake8: noqa
+# mypy: ignore-errors
+
+import torch
+from functools import partial
+
+# Wraps a single tensor to a module to prevent it from jit.freezing
+# It depends on a tensor dtype whether it will be preserved from freezing. Refer to the decoder code to learn which types will be preserved.
+class KeepWeight(torch.nn.Module):
+
+    def __init__(self, weight):
+        super().__init__()
+        self.weight = torch.nn.Parameter(weight, requires_grad=False)
+
+    def forward(self):
+        return self.weight
+
+
+# Produces a pattern that can be captured later and represented as a single u4 constant node
+def decompression_pattern(weights):
+    mask = torch.tensor(15, dtype=torch.uint8).to(weights.device)
+    return torch.stack((torch.bitwise_and(weights, mask), torch.bitwise_right_shift(weights, 4)), dim=-1)
+
+
+def patched_forward(self, *args, **kwargs):
+    if hasattr(self, '_hf_hook'):
+        args, kwargs = self._hf_hook.pre_forward(self, *args, **kwargs)
+
+    x = args[0]
+    dtype = x.dtype
+    outshape = x.shape[:-1] + (self.width,)
+    x = x.view(-1, x.shape[-1])
+    groups = self.qzeros.shape[0]
+    height = self.qweight.shape[0]
+
+    unpacked_weights = decompression_pattern(
+        self._openvino_u4_compression_submodule_qweights()).contiguous().view(height, -1, 8)
+    unpacked_weights = torch.transpose(
+        unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width)
+    unpacked_zp = decompression_pattern(
+        self._openvino_u4_compression_submodule_qzeros()).contiguous().view(groups, 1, -1)
+
+    unpacked_zp = unpacked_zp.to(dtype) + 1
+
+    unpacked_weights = (unpacked_weights.to(dtype) - unpacked_zp) * self.scales
+    unpacked_weights = unpacked_weights.view(-1, self.width)
+
+    out = x @ unpacked_weights
+
+    out = out.view(outshape)
+    if self.bias is not None:
+        out.add_(self.bias)
+
+    if hasattr(self, '_hf_hook'):
+        out = self._hf_hook.post_forward(self, out)
+    return out
+
+
+# All the following AutoGPTQ's quant types are supposed to have the same weights packing schema
+supported_quant_types = ['triton', 'exllama', 'cuda', 'exllamav2', 'cuda-old']
+
+
+def patch_model(model):
+    for name, m in model.named_modules():
+        if hasattr(m, '_openvino_patch_orig_forward'):
+            # already patched, skipping
+            continue
+        # TODO: Check module type
+        is_quantized = getattr(m, 'is_quantized', None)
+        if is_quantized is not None:
+            m.is_quantized = False
+        m.float()  # enables tracing on CPU, applied for all modules
+        if hasattr(m, 'QUANT_TYPE'):
+            if m.QUANT_TYPE not in supported_quant_types:
+                raise ValueError(
+                    f'Unsupported QUANT_TYPE == {m.QUANT_TYPE} is discovered for AutoGPTQ model, only the following types are supported: {supported_quant_types}')
+            if m.bits != 4:
+                raise ValueError(
+                    f'Unsupported bits == {m.bits} is discovered in module {name} in AutoGPTQ model, only bits == 4 is supported.')
+
+            int4_in_int32 = 8
+            groups = m.qzeros.shape[0]
+            m.width = m.qweight.shape[1]
+            assert m.group_size == m.qweight.shape[0] * int4_in_int32 // groups
+
+            m._openvino_patch_orig_forward = m.forward
+            m.forward = partial(patched_forward, m)
+
+            # Keep original field properties to be used when model is returned back to its original state
+            m._openvino_patch_orig_qweights_type = m.qweight.dtype
+            m._openvino_patch_orig_qzeros_type = m.qzeros.dtype
+            m._openvino_patch_orig_scale_shape = m.scales.shape
+
+            m.qweight = m.qweight.view(dtype=torch.uint8)
+            m.qzeros = m.qzeros.view(dtype=torch.uint8)
+
+            # TODO: Redundant tensor copy? Try to remove m.qweigh and m.qzeros after keeping modified values as submodules
+            m.add_module(
+                '_openvino_u4_compression_submodule_qweights', KeepWeight(m.qweight))
+            m.add_module('_openvino_u4_compression_submodule_qzeros',
+                         KeepWeight(m.qzeros))
+
+            m.scales = m.scales.view(-1, 1, m.width)
+
+
+def unpatch_model(model):
+    for _, m in model.named_modules():
+        if hasattr(m, '_openvino_patch_orig_forward'):
+            try:
+                m.forward = m._openvino_patch_orig_forward
+                del m._openvino_patch_orig_forward
+
+                m.qweight = m.qweight.view(
+                    dtype=m._openvino_patch_orig_qweights_type)
+                del m._openvino_patch_orig_qweights_type
+
+                m.qzeros = m.qzeros.view(
+                    dtype=m._openvino_patch_orig_qzeros_type)
+                del m._openvino_patch_orig_qzeros_type
+
+                m.scales = m.scales.view(m._openvino_patch_orig_scale_shape)
+                del m._openvino_patch_orig_scale_shape
+
+                del m._openvino_u4_compression_submodule_qweights
+                del m._openvino_u4_compression_submodule_qzeros
+            except Exception as error:
+                print('[ WARNING ] Exception raised during GPTQ model unpatching. Depending on the exact issue it may lead to broken original model')
+                print(error)
+
+
+def detect_gptq_model_raw(model):
+    return model and getattr(model, 'config', None) and getattr(model.config, 'quantization_config', None) and model.config.quantization_config.quant_method == 'gptq'
+
+
+def detect_gptq_model(model):
+    return detect_gptq_model_raw(model) or getattr(model, 'model', None) and detect_gptq_model_raw(model.model)
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py
index 726f3b598bc15e..4a76d90b160553 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py
@@ -41,6 +41,7 @@ def __init__(self):
             "torch.ops.aten.arange.default": None,
             "torch.ops.aten.argmax.default": None,
             "torch.ops.aten.avg_pool2d.default": None,
+            "torch.ops.aten.baddbmm.default": None,
             "torch.ops.aten.bitwise_and.Tensor": None,
             "torch.ops.aten.bmm.default": None,
             "torch.ops.aten.cat.default": None,
@@ -67,6 +68,7 @@ def __init__(self):
             "torch.ops.aten.hardswish_.default": None,
             "torch.ops.aten.hardtanh_.default": None,
             "torch.ops.aten.index.Tensor": None,
+            "torch.ops.aten.leaky_relu_.default": None,
             "torch.ops.aten.lift_fresh_copy.default": None,
             "torch.ops.aten.linalg_vector_norm.default": None,
             "torch.ops.aten.lt.Tensor": None,
@@ -89,6 +91,7 @@ def __init__(self):
             "torch.ops.aten.relu.default": None,
             "torch.ops.aten.relu_.default": None,
             "torch.ops.aten.rsub.Scalar": None,
+            "torch.ops.aten._scaled_dot_product_flash_attention.default": None,
             "torch.ops.aten.select.int": None,
             "torch.ops.aten.sigmoid.default": None,
             "torch.ops.aten.silu.default": None,
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py
index b6caf22cfc7b68..a57393e7638d67 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py
@@ -9,6 +9,7 @@
 from openvino.runtime import op, PartialShape, Type as OVType, OVAny
 from openvino.frontend.pytorch.utils import ivalue_to_constant, get_value_from_getattr, pt_to_ov_type_map, prepare_example_inputs_and_model, convert_quantized_tensor
 from openvino.runtime import opset11 as ops
+from openvino.frontend.pytorch import gptq
 
 import typing
 import torch
@@ -84,12 +85,32 @@ def _get_scripted_model(self, pt_module, example_inputs=None, skip_freeze=False)
             if example_inputs is None:
                 scripted = torch.jit.script(pt_module)
             else:
-                input_parameters, input_signature, pt_module, self._input_is_list = prepare_example_inputs_and_model(example_inputs, input_params, pt_module)
-                scripted = torch.jit.trace(pt_module, **input_parameters, strict=False)
+                input_parameters, input_signature, pt_module, self._input_is_list = prepare_example_inputs_and_model(
+                    example_inputs, input_params, pt_module)
+                gptq_patched = False
+
+                if gptq.detect_gptq_model(pt_module):
+                    try:
+                        gptq.patch_model(pt_module)
+                        gptq_patched = True
+                    except Exception as error:
+                        print('[ WARNING ] Failed patching of AutoGPTQ model. Error message:\n', error)
+                        print('[ WARNING ] Tracing of the model will likely be unsuccesfull or incorrect')
+                        gptq.unpatch_model(pt_module)
+                        gptq_patched = False
+
+                try:
+                    scripted = torch.jit.trace(
+                        pt_module, **input_parameters, strict=False)
+                finally:
+                    if gptq_patched:
+                        gptq.unpatch_model(pt_module)
+
             if not skip_freeze:
+                ops_kind_no_freeze = ["quantize", "aten::as_strided"]
                 for n in scripted.inlined_graph.nodes():
                     # TODO: switch off freezing for all traced models
-                    if "quantize" in n.kind():
+                    if any(kind in n.kind() for kind in ops_kind_no_freeze):
                         # do not freeze quantized models
                         skip_freeze = True
                         break
@@ -130,6 +151,16 @@ def get_input_shape(self, index: int):
         raw_input = self._raw_input(index)
         return self.get_shape_for_value(raw_input)
 
+    def get_input_strides(self, index: int) -> typing.List[int]:
+        raw_input = self._raw_input(index)
+        if isinstance(raw_input, torch.Value):
+            inp_type = raw_input.type()
+            if isinstance(inp_type, torch.TensorType):
+                strides = inp_type.strides()
+                if strides:
+                    return strides
+        return []
+
     def get_input_type(self, index: int):
         raw_input = self._raw_input(index)
         return self.get_type_for_value(raw_input)
@@ -341,8 +372,8 @@ def input_is_none(self, index: int) -> bool:
         return False
 
     def may_produce_alias(self, in_index: int, out_index: int) -> bool:
-        if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d"]:
-            # AliasDB::may_contain_alias sometimes return True for tensors produced by convnd, we have to workaround that
+        if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d", "aten::_convolution", "aten::matmul"]:
+            # AliasDB::may_contain_alias sometimes return True for tensors produced by convolution or matmul, we have to workaround that
             return False
         try:
             return self.alias_db.may_contain_alias(self._raw_input(in_index), self._raw_output(out_index))
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/utils.py b/src/bindings/python/src/openvino/frontend/pytorch/utils.py
index 97d237fb0efda1..a3ac46e701119b 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/utils.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/utils.py
@@ -7,7 +7,6 @@
 
 import torch
 import numpy as np
-import ctypes
 
 from openvino.runtime import op, Type as OVType, Shape, Tensor
 from openvino.runtime import opset11 as ops
@@ -132,13 +131,6 @@ def get_value_from_getattr(getattr_node, self_module):
     "torch.qint32": OVType.i32
 }
 
-ov_to_c_type_map = {
-    OVType.f32: ctypes.c_float,
-    OVType.f64: ctypes.c_double,
-    OVType.i32: ctypes.c_int,
-    OVType.i64: ctypes.c_int64,
-}
-
 
 wrapper_template = """
 import torch
diff --git a/src/bindings/python/src/openvino/runtime/opset3/ops.py b/src/bindings/python/src/openvino/runtime/opset3/ops.py
index 979fda8a782a02..8a1d81d9703ffb 100644
--- a/src/bindings/python/src/openvino/runtime/opset3/ops.py
+++ b/src/bindings/python/src/openvino/runtime/opset3/ops.py
@@ -575,9 +575,9 @@ def shuffle_channels(data: Node, axis: int, group: int, name: Optional[str] = No
 
     `data_reshaped` = reshape(`data`, [N, group, C / group, H * W])
 
-    `data_trnasposed` = transpose(`data_reshaped`, [0, 2, 1, 3])
+    `data_transposed` = transpose(`data_reshaped`, [0, 2, 1, 3])
 
-    `output` = reshape(`data_trnasposed`, [N, C, H, W])
+    `output` = reshape(`data_transposed`, [N, C, H, W])
 
     For example:
 
diff --git a/src/bindings/python/src/openvino/runtime/utils/types.py b/src/bindings/python/src/openvino/runtime/utils/types.py
index 5eeeb021a7c724..aa986d4f873c9c 100644
--- a/src/bindings/python/src/openvino/runtime/utils/types.py
+++ b/src/bindings/python/src/openvino/runtime/utils/types.py
@@ -23,6 +23,7 @@
 
 openvino_to_numpy_types_map = [
     (Type.boolean, bool),
+    (Type.boolean, np.bool_),
     (Type.f16, np.float16),
     (Type.f32, np.float32),
     (Type.f64, np.float64),
@@ -39,6 +40,7 @@
 
 openvino_to_numpy_types_str_map = [
     ("boolean", bool),
+    ("boolean", np.bool_),
     ("f16", np.float16),
     ("f32", np.float32),
     ("f64", np.float64),
diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp
index b78d3ea4c37bff..7b473929a63396 100644
--- a/src/bindings/python/src/pyopenvino/core/common.cpp
+++ b/src/bindings/python/src/pyopenvino/core/common.cpp
@@ -8,6 +8,7 @@
 
 #include "Python.h"
 #include "openvino/core/except.hpp"
+#include "openvino/runtime/shared_buffer.hpp"
 #include "openvino/util/common_util.hpp"
 
 #define C_CONTIGUOUS py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_
@@ -170,13 +171,12 @@ ov::op::v0::Constant create_copied(ov::Tensor& tensor) {
     return ov::op::v0::Constant(tensor.get_element_type(), tensor.get_shape(), const_cast<void*>(tensor.data()));
 }
 
-OPENVINO_SUPPRESS_DEPRECATED_START
 template <>
 ov::op::v0::Constant create_shared(py::array& array) {
     // Check if passed array has C-style contiguous memory layout.
     // If memory is going to be shared it needs to be contiguous before passing to the constructor.
     if (array_helpers::is_contiguous(array)) {
-        auto memory = std::make_shared<ngraph::runtime::SharedBuffer<py::array>>(
+        auto memory = std::make_shared<ov::SharedBuffer<py::array>>(
             static_cast<char*>(array.ndim() == 0 ? array.mutable_data() : array.mutable_data(0)),
             array.ndim() == 0 ? array.itemsize() : array.nbytes(),
             array);
@@ -185,7 +185,6 @@ ov::op::v0::Constant create_shared(py::array& array) {
     // If passed array is not C-style, throw an error.
     OPENVINO_THROW("SHARED MEMORY MODE FOR THIS CONSTANT IS NOT APPLICABLE! Passed numpy array must be C contiguous.");
 }
-OPENVINO_SUPPRESS_DEPRECATED_END
 
 template <>
 ov::op::v0::Constant create_shared(ov::Tensor& tensor) {
diff --git a/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp b/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp
index a1136e4cda6f66..024b03b2ff4cd9 100644
--- a/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp
+++ b/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp
@@ -34,6 +34,10 @@ class PyDecoder : public ov::frontend::pytorch::TorchDecoder {
         PYBIND11_OVERRIDE_PURE(ov::PartialShape, TorchDecoder, get_input_shape, index);
     }
 
+    const std::vector<size_t>& get_input_strides(size_t index) const override {
+        PYBIND11_OVERRIDE_PURE(const std::vector<size_t>&, TorchDecoder, get_input_strides, index);
+    }
+
     ov::Any get_input_type(size_t index) const override {
         PYBIND11_OVERRIDE_PURE(ov::Any, TorchDecoder, get_input_type, index);
     }
diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_onnx_external_data.py b/src/bindings/python/tests_compatibility/test_onnx/test_onnx_external_data.py
index ec8f6c49e7ffb6..025c438fedf5d2 100644
--- a/src/bindings/python/tests_compatibility/test_onnx/test_onnx_external_data.py
+++ b/src/bindings/python/tests_compatibility/test_onnx/test_onnx_external_data.py
@@ -1,15 +1,19 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
 import os
 
 import numpy as np
 import ngraph as ng
+import pytest
 from openvino.inference_engine import IECore
 
 from tests_compatibility.runtime import get_runtime
 
 
+@pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                   reason='Ticket - 122712')
 def test_import_onnx_with_external_data():
     model_path = os.path.join(os.path.dirname(__file__), "models/external_data.onnx")
     ie = IECore()
diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_ops_nonlinear.py b/src/bindings/python/tests_compatibility/test_onnx/test_ops_nonlinear.py
index 60ab593d097250..7b1ebc7295ce96 100644
--- a/src/bindings/python/tests_compatibility/test_onnx/test_ops_nonlinear.py
+++ b/src/bindings/python/tests_compatibility/test_onnx/test_ops_nonlinear.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import onnx
 import pytest
@@ -45,6 +47,8 @@ def relu(x):
     assert_onnx_import_equals_callable("Relu", relu, [[-3, -2, -1], [1, 2, 3]])
 
 
+@pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                   reason='Ticket - 122712')
 def test_leaky_relu():
     def leaky_relu(x, alpha=0.01):
         return np.maximum(alpha * x, x)
@@ -79,6 +83,8 @@ def parametic_relu(x, slope):
     assert np.allclose(output, expected_output)
 
 
+@pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                   reason='Ticket - 122712')
 def test_selu():
     # f(x) = gamma * (alpha * exp(x) - alpha) for x <= 0, y = gamma * x for x > 0
     def selu(x, alpha=1.67326319217681884765625, gamma=1.05070102214813232421875):
diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_ops_unary.py b/src/bindings/python/tests_compatibility/test_onnx/test_ops_unary.py
index ddbd8dd53e4a4a..ad7b8e8ffbaf85 100644
--- a/src/bindings/python/tests_compatibility/test_onnx/test_ops_unary.py
+++ b/src/bindings/python/tests_compatibility/test_onnx/test_ops_unary.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import onnx
 import onnx.mapping
@@ -210,6 +212,8 @@ def hardmax_2d(data):
     assert np.allclose(ng_results, [expected])
 
 
+@pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                   reason='Ticket - 122712')
 def test_hardsigmoid():
     def hardsigmoid(data, alpha=0.2, beta=0.5):
         return np.clip(alpha * data + beta, 0, 1)
@@ -447,6 +451,8 @@ def test_cast_errors():
 @pytest.mark.parametrize("value_type",
                          [pytest.param(np.float64),
                           pytest.param(np.float32)])
+@pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                   reason='Ticket - 122712')
 def test_constant(value_type):
     values = np.random.randn(5, 5).astype(value_type)
     node = onnx.helper.make_node(
diff --git a/src/common/offline_transformations/src/compress_quantize_weigths.cpp b/src/common/offline_transformations/src/compress_quantize_weigths.cpp
index 6c9e4554782a96..2b0687c86cde53 100644
--- a/src/common/offline_transformations/src/compress_quantize_weigths.cpp
+++ b/src/common/offline_transformations/src/compress_quantize_weigths.cpp
@@ -357,7 +357,7 @@ static void compute_scale_and_zero_point_internal(const std::shared_ptr<ov::op::
                                                                        float output_high_value) mutable {
             float output_range = output_high_value - output_low_value;
             float scale = output_range / input_range;
-            float zero_point_value = (new_output_low - output_low_value / scale) * (scale != 0);
+            float zero_point_value = (scale != 0) ? (new_output_low - output_low_value / scale) : 0;
             zero_point_is_zero =
                 zero_point_is_zero && std::fabs(zero_point_value) < std::numeric_limits<float>::epsilon();
             *zero_point++ = zero_point_value;
diff --git a/src/common/snippets/include/snippets/emitter.hpp b/src/common/snippets/include/snippets/emitter.hpp
index e1ff08abbf7da2..a2aa4923c2eef4 100644
--- a/src/common/snippets/include/snippets/emitter.hpp
+++ b/src/common/snippets/include/snippets/emitter.hpp
@@ -12,7 +12,6 @@
 namespace ov {
 namespace snippets {
 
-using code = const uint8_t *;
 using RegInfo = std::pair<std::vector<size_t>, std::vector<size_t>>;
 
 /**
diff --git a/src/common/snippets/include/snippets/generator.hpp b/src/common/snippets/include/snippets/generator.hpp
index b0d30f602a5a88..32b44b9e6abc81 100644
--- a/src/common/snippets/include/snippets/generator.hpp
+++ b/src/common/snippets/include/snippets/generator.hpp
@@ -11,12 +11,32 @@
 #include "snippets_isa.hpp"
 
 #include "snippets/lowered/linear_ir.hpp"
-#include "snippets/lowered/pass/pass.hpp"
 #include "snippets/shape_types.hpp"
+#include "target_machine.hpp"
 
 namespace ov {
 namespace snippets {
 
+
+class Generator;
+/**
+ * @interface LoweringResult
+ * @brief Holds all relevant information produced during lowering
+ * @param compiled_snippet pointer to interface class that encapsulates compiled binary code
+ * @param buffer_scratchpad_size the amount of additional memory required by the binary code to execute.
+ * Must be allocated and freed by the backend.
+ */
+class LoweringResult {
+    friend class Generator;
+    // Some emitters rely on other precompiled kernels.
+    // We need to keep the pointers to such emitters alive, so the kernels would still be accessible at runtime.
+    std::vector<std::shared_ptr<Emitter>> m_saved_emitters{};
+
+public:
+    std::shared_ptr<CompiledSnippet> compiled_snippet = nullptr;
+    size_t buffer_scratchpad_size = 0;
+};
+
 /**
  * @interface Schedule
  * @brief Return scheduling information and pointer to generated kernel code
@@ -26,20 +46,21 @@ class Schedule {
 public:
     Schedule() = default;
     /**
-     * @brief Default to create schedule out of specific parameters
-     * @param wd work domain for kernel execution
-     * @param p pointer to generated code
+     * @brief Create schedule out of specific parameters
+     * @param domain work domain for kernel execution
+     * @param lr lowering result produced during code generation
      */
-    Schedule(const VectorDims& wd, code p) : parallel_exec_domain(wd), ptr(p) {}
+    Schedule(std::vector<size_t>&& domain, LoweringResult&& lr) : parallel_exec_domain(domain), lowering_result(lr) {}
+    Schedule(std::vector<size_t> domain, LoweringResult&& lr) : parallel_exec_domain(std::move(domain)), lowering_result(lr) {}
     /**
      * @brief Returns callable instanse of code pointer
      */
     template<typename K> K get_callable() const {
-        return reinterpret_cast<K>(const_cast<unsigned char*>(ptr));
+        return reinterpret_cast<K>(const_cast<unsigned char*>(lowering_result.compiled_snippet->get_code()));
     }
 
     VectorDims parallel_exec_domain {};
-    code ptr {nullptr};
+    LoweringResult lowering_result {};
 };
 
 /**
@@ -52,7 +73,7 @@ class Generator {
     /**
      * @brief Default constructor
      */
-    Generator(const std::shared_ptr<TargetMachine>& t) : target(t), lowered_saved{} {}
+    Generator(const std::shared_ptr<TargetMachine>& t) : target(t) {}
     /**
      * @brief Default destructor
      */
@@ -62,17 +83,13 @@ class Generator {
     * @brief Allows to tweak the lowering process.
     */
     /**
-     * @brief virtual method any specific implementation should implement
-     * @param m model in canonical for for table-based code generation
-     * @param config config with transformation and optimization parameters
-     * @param compile_params parameters for generated code
-     * @return pointer to generated code
+     * @brief generates executable code
+     * @param linear_ir lowered IR for code generation
+     * @param result variable to hande the result, only compiled_snippet and m_saved_emitters field will be modified
+     * @param compile_params compile-time parameters used for code generation
+     * @return void
      */
-     struct LoweringResult {
-         LoweringResult(code c) : binary_code(c) {}
-         code binary_code = nullptr;
-     };
-    LoweringResult generate(lowered::LinearIR& linear_ir, const lowered::Config& config, const void* compile_params = nullptr);
+    void generate(lowered::LinearIR& linear_ir, LoweringResult& result, const void* compile_params = nullptr) const;
 
     /**
      * @brief gets target machine
@@ -96,17 +113,21 @@ class Generator {
      */
     opRegType get_op_reg_type(const std::shared_ptr<Node>& op) const;
 
+    virtual std::shared_ptr<Generator> clone() const = 0;
+
 protected:
     /**
     * @brief gets register type by specific plugin op type
     * @return register type
     */
     virtual opRegType get_specific_op_reg_type(const std::shared_ptr<ov::Node>& op) const;
+    /**
+    * @brief returns true if an emitter can use precompiled kernel.
+    * @return bool
+    */
+    virtual bool uses_precompiled_kernel(const std::shared_ptr<Emitter>& emitter) const { return false; }
 
     std::shared_ptr<TargetMachine> target;
-    // todo: we need to save lowered code to access compiled brgemm kernels on execution time (normally lowered is destructed by then).
-    //  This is temporary solution, remove this when kernel caching is implemented. Don't forget to make generate const method.
-    lowered::LinearIR lowered_saved;
 };
 
 } // namespace snippets
diff --git a/src/common/snippets/include/snippets/lowered/expression.hpp b/src/common/snippets/include/snippets/lowered/expression.hpp
index c5a1b2b8cb6f5e..289e52e0f59a73 100644
--- a/src/common/snippets/include/snippets/lowered/expression.hpp
+++ b/src/common/snippets/include/snippets/lowered/expression.hpp
@@ -74,7 +74,6 @@ class Expression : public std::enable_shared_from_this<Expression> {
     std::vector<size_t> m_loop_ids{};
     std::shared_ptr<IShapeInferSnippets> m_shapeInference{nullptr};
 };
-using ExpressionPtr = std::shared_ptr<Expression>;
 
 class IOExpression : public Expression {
     friend class LinearIR;
diff --git a/src/common/snippets/include/snippets/lowered/expression_factory.hpp b/src/common/snippets/include/snippets/lowered/expression_factory.hpp
index bb238356dfa9d2..f179abf746c313 100644
--- a/src/common/snippets/include/snippets/lowered/expression_factory.hpp
+++ b/src/common/snippets/include/snippets/lowered/expression_factory.hpp
@@ -27,6 +27,13 @@ class LinearIR::ExpressionFactory {
         }
         return create(n, params...);
     }
+    template<class ExprType, typename std::enable_if<std::is_base_of<Expression, ExprType>::value, bool>::type = true>
+    static ExpressionPtr shallow_copy(const std::shared_ptr<ExprType>& expr) {
+        if (const auto& io_expr = std::dynamic_pointer_cast<IOExpression>(expr))
+            return std::make_shared<IOExpression>(*io_expr);
+        else
+            return std::make_shared<ExprType>(*expr);
+    }
 
 private:
     /* -- Default Builders - initialize input port connectors from parents and create new output port connectors themselves */
diff --git a/src/common/snippets/include/snippets/lowered/linear_ir.hpp b/src/common/snippets/include/snippets/lowered/linear_ir.hpp
index 8b6a320e18cad7..6d4a357914da39 100644
--- a/src/common/snippets/include/snippets/lowered/linear_ir.hpp
+++ b/src/common/snippets/include/snippets/lowered/linear_ir.hpp
@@ -116,6 +116,7 @@ class LinearIR {
     IShapeInferSnippets::Result shape_infer(const std::vector<VectorDimsRef>& input_shapes);
     const std::shared_ptr<ShapeInferSnippetsNode>& get_shape_infer_instance() const {return m_shape_infer; }
     VectorDims get_master_shape() const;
+    LinearIR deep_copy() const;
 
 private:
     std::shared_ptr<ShapeInferSnippetsNode> m_shape_infer = nullptr;
diff --git a/src/common/snippets/include/snippets/lowered/pass/insert_broadcastmove.hpp b/src/common/snippets/include/snippets/lowered/pass/insert_broadcastmove.hpp
new file mode 100644
index 00000000000000..fe4f9956d81c66
--- /dev/null
+++ b/src/common/snippets/include/snippets/lowered/pass/insert_broadcastmove.hpp
@@ -0,0 +1,28 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "pass.hpp"
+
+namespace ov {
+namespace snippets {
+namespace lowered {
+namespace pass {
+
+/**
+ * @interface InsertMovebroadcast
+ * @brief Injects explicit Movebroadcast operations when the most varying dim is broadcasted
+ * @ingroup snippets
+ */
+class InsertBroadcastMove : public Pass {
+public:
+    OPENVINO_RTTI("InsertBroadcastMove", "Pass")
+    bool run(LinearIR& linear_ir) override;
+};
+
+} // namespace pass
+} // namespace lowered
+} // namespace snippets
+} // namespace ov
diff --git a/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp b/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp
index 8b5634ebb29fa4..795dc0d3725f1c 100644
--- a/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp
+++ b/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp
@@ -18,8 +18,8 @@ namespace pass {
  */
 class SoftmaxDecomposition : public Pass {
 public:
-    explicit SoftmaxDecomposition(size_t vector_size);
     OPENVINO_RTTI("SoftmaxDecomposition", "Pass")
+    explicit SoftmaxDecomposition(size_t vector_size);
     bool run(LinearIR& linear_ir) override;
 
 private:
diff --git a/src/common/snippets/include/snippets/lowered/pass/validate_shapes.hpp b/src/common/snippets/include/snippets/lowered/pass/validate_shapes.hpp
new file mode 100644
index 00000000000000..08243c96beedf5
--- /dev/null
+++ b/src/common/snippets/include/snippets/lowered/pass/validate_shapes.hpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "pass.hpp"
+
+#include "snippets/lowered/loop_manager.hpp"
+
+namespace ov {
+namespace snippets {
+namespace lowered {
+namespace pass {
+
+/**
+ * @interface ValidateShapes
+ * @brief The pass checks that there are no dynamic shapes in the IR
+ * @ingroup snippets
+ */
+class ValidateShapes : public Pass {
+public:
+    OPENVINO_RTTI("ValidateShapes", "Pass")
+    ValidateShapes() = default;
+    bool run(LinearIR& linear_ir) override;
+};
+
+} // namespace pass
+} // namespace lowered
+} // namespace snippets
+} // namespace ov
diff --git a/src/common/snippets/include/snippets/op/rank_normalization.hpp b/src/common/snippets/include/snippets/op/rank_normalization.hpp
new file mode 100644
index 00000000000000..c1ed530ce05832
--- /dev/null
+++ b/src/common/snippets/include/snippets/op/rank_normalization.hpp
@@ -0,0 +1,54 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/op/op.hpp"
+#include "snippets/shape_inference/shape_inference.hpp"
+
+namespace ov {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface RankNormalization
+ * @brief Generated by Canonicalization for rank normalization purposes. It can prepend input shapes with seve1s only first or last dimensions.
+ * @arg num_prepend - num `1`s that will be inserted at the beginning of the input shape. Any value is allowed.
+ * @arg num_append - num `1`s that will be inserted at the end of the input shape. Could be either 0 (default) or 1;
+ * @ingroup snippets
+ */
+ // Note that technically the same goal could be achieved using op::Unsqueeze operation,
+ // but RankNormalization has a much narrower semantics, and hence allows for an easier control and a more efficient shape infer.
+ //
+class  RankNormalization : public ov::op::Op {
+public:
+    OPENVINO_OP("RankNormalization", "SnippetsOpset");
+
+    RankNormalization() = default;
+    RankNormalization(const Output<Node>& data, size_t num_prepend, size_t num_append);
+
+    void validate_and_infer_types() override;
+    bool visit_attributes(AttributeVisitor& visitor) override;
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+
+    size_t get_num_append() const { return m_num_append; }
+    size_t get_num_prepend() const { return m_num_prepend; }
+
+    class ShapeInfer : public IShapeInferSnippets {
+        size_t m_num_prepend = 0;
+        size_t m_num_append = 0;
+    public:
+        explicit ShapeInfer(const std::shared_ptr<ov::Node>& n);
+        IShapeInferSnippets::Result
+        infer(const std::vector<VectorDimsRef>& input_shapes) override;
+    };
+
+private:
+    size_t m_num_prepend = 0;
+    size_t m_num_append = 0;
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ov
diff --git a/src/common/snippets/include/snippets/op/subgraph.hpp b/src/common/snippets/include/snippets/op/subgraph.hpp
index a9321e957e273c..b17031e2a67d1c 100644
--- a/src/common/snippets/include/snippets/op/subgraph.hpp
+++ b/src/common/snippets/include/snippets/op/subgraph.hpp
@@ -12,6 +12,7 @@
 #include "openvino/core/rt_info.hpp"
 #include "snippets/pass_manager.hpp"
 #include "snippets/shape_inference/shape_inference.hpp"
+#include "snippets/lowered/pass/pass.hpp"
 
 #include "snippets/generator.hpp"
 
@@ -68,7 +69,8 @@ class Subgraph : public ov::op::util::SubGraphOp {
     //
     // D = < 1, 3, 17, 15, 32> < 0, 1, 2, 3, 4>
     // E = < 1, 3, 17,  1, 32> < 0, 1, 2, 3, 4>
-    using BlockedShape = std::tuple<ov::PartialShape, ov::AxisVector, ov::element::Type>;
+    using Layout = std::vector<size_t>;
+    using BlockedShape = std::pair<VectorDims, Layout>;
     using BlockedShapeVector = std::vector<BlockedShape>;
 
     Subgraph() = default;
@@ -94,43 +96,36 @@ class Subgraph : public ov::op::util::SubGraphOp {
     const std::shared_ptr<ov::snippets::Generator>& get_generator() const { return m_generator; }
     std::shared_ptr<ov::snippets::Generator>& get_generator() { return m_generator; }
 
-    size_t get_buffer_scratchpad_size() const { return m_buffer_scratchpad; }
     size_t get_virtual_port_count() const { return m_virtual_port_count; }
     bool is_quantized() const { return config.m_is_quantized; }
     bool has_domain_sensitive_ops() const { return config.m_has_domain_sensitive_ops; }
-    snippets::Schedule generate(const BlockedShapeVector& output_shapes,
-                                const BlockedShapeVector& input_shapes,
-                                const std::vector<pass::Manager::PositionedPass>& data_flow_passes,
-                                const lowered::pass::PassPipeline& control_flow_passes_pre_common,
-                                const lowered::pass::PassPipeline& control_flow_passes_post_common,
-                                const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory = nullptr,
-                                const void* compile_params = nullptr);
-    snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, const void* compile_params = nullptr);
-    snippets::Schedule generate(const std::vector<pass::Manager::PositionedPass>& data_flow_passes,
-                                const lowered::pass::PassPipeline& control_flow_passes_pre_common,
-                                const lowered::pass::PassPipeline& control_flow_passes_post_common,
-                                const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory = nullptr,
+
+    snippets::Schedule generate(const BlockedShapeVector& blocked_input_shapes = {},
+                                const std::vector<ov::element::Type>& input_precisions = {},
+                                const std::vector<ov::element::Type>& output_precisions = {},
+                                const std::vector<pass::Manager::PositionedPass>& data_flow_passes = {},
+                                const lowered::pass::PassPipeline& control_flow_passes_pre_common = {},
+                                const lowered::pass::PassPipeline& control_flow_passes_post_common = {},
+                                const std::shared_ptr<IShapeInferSnippetsFactory>& factory = nullptr,
                                 const void* compile_params = nullptr);
-    snippets::Schedule generate(const void* compile_params = nullptr);
 
-    ov::PartialShape canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes);
-    ov::PartialShape canonicalized_body_shape_infer(const BlockedShapeVector& input_shapes);
-    std::vector<PartialShape> reshape_body(const std::vector<PartialShape>& input_shapes);
-    std::vector<Shape> reshape_body(const std::vector<Shape>& input_shapes);
+    snippets::Schedule generate_from_linear_ir(const lowered::pass::PassPipeline& backend_passes_pre_common = {},
+                                               const lowered::pass::PassPipeline& backend_passes_post_common = {},
+                                               const void* compile_params = nullptr) const;
     IShapeInferSnippets::Result shape_infer(const std::vector<VectorDimsRef>& input_shapes);
 
     // plugin sets generator for a snippet to some specific generator.
     // it's going to be replaced with Jitters table later
     void set_generator(std::shared_ptr<ov::snippets::Generator> generator);
     void set_tile_rank(size_t newRank) {tileRank = newRank;}
-    void set_virtual_port_count(const size_t count);
-    void set_min_jit_work_amount(const size_t jit_work_amount);
-    void set_min_parallel_work_amount(const size_t parallel_work_amount);
+    void set_virtual_port_count(size_t count);
+    void set_min_jit_work_amount(size_t jit_work_amount);
+    void set_min_parallel_work_amount(size_t parallel_work_amount);
 
     void print() const;
 
     void serialize() const;
-    void set_master_shape(ov::PartialShape new_shape) {master_shape = std::move(new_shape);}
+    VectorDims infer_master_shape();
 
     static auto wrap_node_as_subgraph(const std::shared_ptr<ov::Node>& node) -> std::shared_ptr<Subgraph>;
     static void fill_empty_output_names(const Output<Node>& target_output_node, const Output<Node>& replacement_output_node);
@@ -143,28 +138,30 @@ class Subgraph : public ov::op::util::SubGraphOp {
     // Return estimated unique buffer count (upper bound). It's needed for tokenization
     static auto get_estimated_buffer_count(const ov::NodeVector& ops) -> size_t;
     static auto is_domain_sensitive_op(const std::shared_ptr<ov::Node>& op) -> bool;
+
+    void data_flow_transformations(const BlockedShapeVector& blocked_input_shapes = {},
+                                   const std::vector<ov::element::Type>& input_precisions = {},
+                                   const std::vector<ov::element::Type>& output_precisions = {},
+                                   const std::vector<snippets::pass::Manager::PositionedPass>& = {});
     std::shared_ptr<lowered::LinearIR>
-    convert_body_to_linear_ir(const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory = std::make_shared<IShapeInferSnippetsFactory>()) const;
+    convert_body_to_linear_ir(const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory = std::make_shared<IShapeInferSnippetsFactory>());
+    std::shared_ptr<Subgraph> clone() const;
 
 private:
-    void align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes);
-    void data_flow_transformations(const std::vector<snippets::pass::Manager::PositionedPass>& backend_passes);
     void control_flow_transformations(lowered::LinearIR& linear_ir,
+                                      LoweringResult& lowering_result,
                                       const lowered::pass::PassPipeline& backend_passes_pre_common,
-                                      const lowered::pass::PassPipeline& backend_passes_post_common);
+                                      const lowered::pass::PassPipeline& backend_passes_post_common) const;
     void init_config();
     // Count of Subgraph virtual ports:
     //  - Potential non-scalar Constants that will be created after some transformations (At the moment it's relevant only for FakeQuantize decomposition)
     // NOTE: To avoid overheads in each calculation of this count (for example, in validate_and_type_infer()),
     //       we should MANUALLY calculate it where it needed.
     size_t m_virtual_port_count = 0;
-    size_t m_buffer_scratchpad = 0lu;
     Shape exec_domain = {};
     std::shared_ptr<ov::snippets::Generator> m_generator = nullptr;
 
-    ov::PartialShape master_shape;
     size_t tileRank = 0; // set by plugin to specify the number of dimensions processed in a single kernel call
-    size_t maxInputRank = 0;
     std::vector<size_t> appendOnesForCanonical;
     std::shared_ptr<lowered::LinearIR> m_linear_ir = nullptr;
 
diff --git a/src/common/snippets/include/snippets/pass/align_element_types.hpp b/src/common/snippets/include/snippets/pass/align_element_types.hpp
new file mode 100644
index 00000000000000..9a8a5ff880aeab
--- /dev/null
+++ b/src/common/snippets/include/snippets/pass/align_element_types.hpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/pass.hpp"
+#include "transformations_visibility.hpp"
+#include "snippets/op/subgraph.hpp"
+
+namespace ov {
+namespace snippets {
+namespace pass {
+
+/**
+ * @interface AlignElementTypes
+ * @brief Align body precision with expected input/output precision. Insert op::ConvertSaturation if necessary.
+ * @ingroup snippets
+ */
+class AlignElementTypes: public ov::pass::ModelPass {
+public:
+    OPENVINO_RTTI("AlignElementTypes");
+    AlignElementTypes(std::vector<ov::element::Type> input_precisions,
+                      std::vector<ov::element::Type> output_precisions);
+    bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
+
+private:
+    std::vector<ov::element::Type> m_input_precisions;
+    std::vector<ov::element::Type> m_output_precisions;
+};
+
+}  // namespace pass
+}  // namespace snippets
+}  // namespace ov
diff --git a/src/common/snippets/include/snippets/pass/canonicalization.hpp b/src/common/snippets/include/snippets/pass/canonicalization.hpp
new file mode 100644
index 00000000000000..f57218328ca57c
--- /dev/null
+++ b/src/common/snippets/include/snippets/pass/canonicalization.hpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/pass.hpp"
+#include "transformations_visibility.hpp"
+#include "snippets/op/subgraph.hpp"
+#include "snippets/shape_types.hpp"
+
+namespace ov {
+namespace snippets {
+namespace pass {
+
+/**
+ * @interface Canonicalization
+ * @brief Canonicalization inserts RankNormalization (ov::op::Unsqueeze analogue) operations to account for:
+ *  - input ranks mismatch, then inputs with smaller ranks are prepeneded with 1
+ *  - layouts mismatch (only planar + blocked is supported), planar shapes are postpended with 1
+ *  @ingroup snippets
+ */
+class Canonicalization: public ov::pass::ModelPass {
+public:
+    OPENVINO_RTTI("Canonicalization");
+    using BlockedShapeVector = op::Subgraph::BlockedShapeVector;
+    using Layout = std::vector<size_t>;
+    explicit Canonicalization(const BlockedShapeVector& blocked_input_shapes);
+    bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
+
+private:
+    std::vector<VectorDims> m_in_shapes;
+    std::vector<Layout> m_in_layouts;
+    bool m_has_dynamic_inputs = false;
+};
+
+}  // namespace pass
+}  // namespace snippets
+}  // namespace ov
diff --git a/src/common/snippets/include/snippets/snippets_isa.hpp b/src/common/snippets/include/snippets/snippets_isa.hpp
index 87579feebb1796..ba85ae68eeb634 100644
--- a/src/common/snippets/include/snippets/snippets_isa.hpp
+++ b/src/common/snippets/include/snippets/snippets_isa.hpp
@@ -24,6 +24,7 @@
 #include "op/loop.hpp"
 #include "op/brgemm.hpp"
 #include "op/vector_buffer.hpp"
+#include "op/rank_normalization.hpp"
 
 namespace ov {
 namespace snippets {
diff --git a/src/common/snippets/include/snippets/snippets_isa_tbl.hpp b/src/common/snippets/include/snippets/snippets_isa_tbl.hpp
index b0a87a8a82a1f9..351770bdab746f 100644
--- a/src/common/snippets/include/snippets/snippets_isa_tbl.hpp
+++ b/src/common/snippets/include/snippets/snippets_isa_tbl.hpp
@@ -22,6 +22,7 @@ OV_OP(Store, ov::snippets::op)
 OV_OP(BroadcastMove, ov::snippets::op)
 OV_OP(Scalar, ov::snippets::op)
 OV_OP(Nop, ov::snippets::op)
+OV_OP(RankNormalization, ov::snippets::op)
 
 // Layout-oblivious from opset1
 
diff --git a/src/common/snippets/include/snippets/target_machine.hpp b/src/common/snippets/include/snippets/target_machine.hpp
index a4d15463f2972a..d42779bcd7153c 100644
--- a/src/common/snippets/include/snippets/target_machine.hpp
+++ b/src/common/snippets/include/snippets/target_machine.hpp
@@ -13,6 +13,15 @@
 
 namespace ov {
 namespace snippets {
+
+struct CompiledSnippet {
+    virtual const uint8_t* get_code() const = 0;
+    virtual size_t get_code_size() const = 0;
+    virtual bool empty() const = 0;
+    virtual ~CompiledSnippet() = default;
+};
+using CompiledSnippetPtr = std::shared_ptr<CompiledSnippet>;
+
 typedef std::pair<std::function<std::shared_ptr<Emitter>(const lowered::ExpressionPtr&)>,
         std::function<std::set<ov::element::TypeVector>(const std::shared_ptr<ov::Node>&)>> jitters_value;
 
@@ -33,7 +42,7 @@ class TargetMachine {
      * @brief finalizes code generation
      * @return generated kernel binary
      */
-    virtual code get_snippet() const = 0;
+    virtual CompiledSnippetPtr get_snippet() = 0;
 
     /**
      * @brief gets number of lanes supported by target's vector ISA
diff --git a/src/common/snippets/include/snippets/utils.hpp b/src/common/snippets/include/snippets/utils.hpp
index 525de3e03b2118..d10930125e0ed0 100644
--- a/src/common/snippets/include/snippets/utils.hpp
+++ b/src/common/snippets/include/snippets/utils.hpp
@@ -58,6 +58,7 @@ constexpr inline bool implication(bool cause, bool cond) {
 VectorDims get_planar_vdims(const VectorDims& shape, const std::vector<size_t>& layout);
 VectorDims get_planar_vdims(const snippets::lowered::PortDescriptorPtr& port_desc);
 VectorDims get_planar_vdims(const snippets::lowered::ExpressionPort& expr_port);
+bool is_dynamic_vdims(const VectorDims& shape);
 
 } // namespace utils
 } // namespace snippets
diff --git a/src/common/snippets/src/generator.cpp b/src/common/snippets/src/generator.cpp
index 1d1d733277f99b..cede4c4a6e532c 100644
--- a/src/common/snippets/src/generator.cpp
+++ b/src/common/snippets/src/generator.cpp
@@ -15,7 +15,7 @@
 namespace ov {
 namespace snippets {
 
-Generator::LoweringResult Generator::generate(lowered::LinearIR& linear_ir, const lowered::Config& config, const void* compile_params) {
+void Generator::generate(lowered::LinearIR& linear_ir, LoweringResult& result, const void* compile_params) const {
     OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::Generator::generate")
     OV_ITT_TASK_CHAIN(GENERATE, ov::pass::itt::domains::SnippetsTransform, "Snippets::Generator", "::Transformations")
     if (!target->is_supported())
@@ -28,7 +28,6 @@ Generator::LoweringResult Generator::generate(lowered::LinearIR& linear_ir, cons
     lowered_pipeline.register_pass<lowered::pass::AssignRegisters>(reg_type_mapper);
     lowered_pipeline.register_pass<lowered::pass::InsertTailLoop>();
     lowered_pipeline.run(linear_ir);
-
     linear_ir.init_emitters(target);
 
     OV_ITT_TASK_NEXT(GENERATE, "::EmitCode")
@@ -45,12 +44,15 @@ Generator::LoweringResult Generator::generate(lowered::LinearIR& linear_ir, cons
     }
     OV_ITT_TASK_NEXT(GENERATE, "::GetSnippet")
 
-    // todo: we save lowered to access compiled brgemm kernels on execution time (normally lowered is destructed by then)
-    //  remove this when kernel caching is implemented. Don't forget to make generate const method.
-    if (config.m_save_expressions)
-        lowered_saved = linear_ir;
-
-    return { target->get_snippet() };
+    // Note: some emitters use precompiled kernels. They need to be saved, so the kernels are accessible at runtime.
+    if (linear_ir.get_config().m_save_expressions) {
+        for (const auto& expr : linear_ir) {
+            const auto& emitter = expr->get_emitter();
+            if (uses_precompiled_kernel(emitter))
+                result.m_saved_emitters.emplace_back(emitter);
+        }
+    }
+    result.compiled_snippet = target->get_snippet();
 }
 
 std::shared_ptr<const TargetMachine> Generator::get_target_machine() const {
@@ -63,7 +65,8 @@ Generator::opRegType Generator::get_op_reg_type(const std::shared_ptr<Node>& op)
         std::dynamic_pointer_cast<op::LoopBegin>(op) ||
         std::dynamic_pointer_cast<op::LoopEnd>(op) ||
         std::dynamic_pointer_cast<op::Brgemm>(op) ||
-        std::dynamic_pointer_cast<op::Buffer>(op))
+        std::dynamic_pointer_cast<op::Buffer>(op) ||
+        std::dynamic_pointer_cast<op::RankNormalization>(op))
         return gpr2gpr;
     else if (std::dynamic_pointer_cast<snippets::op::Load>(op) ||
              std::dynamic_pointer_cast<snippets::op::BroadcastLoad>(op))
diff --git a/src/common/snippets/src/lowered/linear_ir.cpp b/src/common/snippets/src/lowered/linear_ir.cpp
index cc0ace467dd6e3..adf3894f71b8b7 100644
--- a/src/common/snippets/src/lowered/linear_ir.cpp
+++ b/src/common/snippets/src/lowered/linear_ir.cpp
@@ -122,6 +122,59 @@ LinearIR::container LinearIR::deep_copy_range(LinearIR::container::const_iterato
     return result;
 }
 
+LinearIR LinearIR::deep_copy() const {
+    // todo: implement the same functionality using standard copy constructor
+    auto clone_ports_descriptors = [](std::vector<PortDescriptorPtr>& ports) {
+        std::for_each(ports.begin(), ports.end(), [](PortDescriptorPtr& pd) { pd = pd->clone(); });
+    };
+    const auto& original_lir = *this;
+    LinearIR new_lir;
+    new_lir.m_config = original_lir.m_config;
+    new_lir.m_shape_infer = original_lir.m_shape_infer;
+    NodeVector original_nodes;
+    original_nodes.reserve(original_lir.m_expressions.size());
+    std::unordered_map<PortConnectorPtr, PortConnectorPtr> connectors_map;
+    for (const auto& orig_expr : original_lir) {
+        original_nodes.push_back(orig_expr->get_node());
+        const auto& copy_expr = ExpressionFactory::shallow_copy(orig_expr);
+        clone_ports_descriptors(copy_expr->m_input_port_descriptors);
+        clone_ports_descriptors(copy_expr->m_output_port_descriptors);
+
+        for (auto& orig_con : copy_expr->m_output_port_connectors) {
+            const auto& copy_source = copy_expr->get_output_port(orig_con->get_source().get_index());
+            const auto& copy_con = std::make_shared<PortConnector>(copy_source);
+            connectors_map[orig_con] = copy_con;
+            orig_con = copy_con;
+        }
+        for (size_t i = 0; i < copy_expr->get_input_count(); i++) {
+            const auto& copy_connector = connectors_map[copy_expr->get_input_port_connector(i)];
+            const auto& copy_consumer = copy_expr->get_input_port(i);
+            copy_connector->add_consumer(copy_consumer);
+            copy_expr->replace_input(i, copy_connector);
+        }
+
+        if (auto io_expr = std::dynamic_pointer_cast<IOExpression>(copy_expr))
+            new_lir.m_io_expressions.push_back(io_expr);
+        new_lir.m_expressions.push_back(copy_expr);
+    }
+    // node_map and expr_map map original node pointer (expression) to a new pointer (expression)
+    ngraph::NodeMap node_map;
+    OPENVINO_SUPPRESS_DEPRECATED_START
+    ngraph::clone_nodes(original_nodes,  node_map);
+    OPENVINO_SUPPRESS_DEPRECATED_END
+    new_lir.m_node2expression_map.clear();
+    for (const auto& copy_expr : new_lir.m_expressions) {
+        copy_expr->m_source_node = node_map[copy_expr->m_source_node.get()];
+        new_lir.m_node2expression_map[copy_expr->m_source_node] = copy_expr;
+    }
+    new_lir.m_loop_manager = std::make_shared<LoopManager>();
+    // It's Ok to share shapeInfer factory, since LIR doesn't change it
+    new_lir.m_shape_infer_factory = m_shape_infer_factory;
+    // Note: shapeInfer stores expression pointers. we re-create it, so shape inference is performed on cloned exprs.
+    new_lir.m_shape_infer = std::make_shared<LIRShapeInfer>(new_lir.m_expressions, new_lir.m_io_expressions);
+    return new_lir;
+}
+
 void LinearIR::debug_print(bool tds_as_pointers) const {
     auto print_rinfo = [](const RegInfo& rinfo) {
         std::cerr << " : {";
@@ -320,7 +373,7 @@ VectorDims LinearIR::get_master_shape() const {
         for (const auto& oe : out_exprs) {
             const auto& port_desc = oe->get_input_port_descriptor(0);
             OPENVINO_ASSERT(ov::snippets::broadcast_merge_into(master_shape, port_desc->get_shape()),
-                            "Failed to merge input shapes in OptimizeDomain pass");
+                            "Failed to merge input shapes in infer_master_shape");
         }
     }
     return master_shape;
@@ -339,6 +392,19 @@ LinearIR::LIRShapeInfer::LIRShapeInfer(container& body_exprs, io_container& io_e
             OPENVINO_THROW("Invalid io expression type detected");
         }
     }
+    // Note that if all output shapes are static, as in the case when the first shape infer was performed on nGraph,
+    // we can treat them as the last result
+    std::vector<VectorDims> outputDims;
+    outputDims.reserve(m_output_exprs.size());
+    for (const auto& expr : m_output_exprs) {
+        const auto &shape = expr->get_input_port_descriptor(0)->get_shape();
+        if (utils::is_dynamic_vdims(shape)) {
+            outputDims.clear();
+            break;
+        }
+        outputDims.push_back(shape);
+    }
+    m_last_result = {outputDims, ShapeInferStatus::success};
 }
 
 IShapeInferSnippets::Result LinearIR::LIRShapeInfer::infer(const std::vector<VectorDimsRef>& input_shapes) {
diff --git a/src/common/snippets/src/lowered/pass/assign_registers.cpp b/src/common/snippets/src/lowered/pass/assign_registers.cpp
index 638845ec6929ad..7755cfebe7cc38 100644
--- a/src/common/snippets/src/lowered/pass/assign_registers.cpp
+++ b/src/common/snippets/src/lowered/pass/assign_registers.cpp
@@ -46,12 +46,21 @@ bool AssignRegisters::run(LinearIR& linear_ir) {
     for (const auto& expr : expressions) {
         auto op = expr->get_node();
         if (const auto io_expr = std::dynamic_pointer_cast<IOExpression>(expr)) {
-            if (io_expr->get_type() == IOExpression::io_type::INPUT)
-                manually_assigned_gprs[expr->get_output_port_connector(0)] = io_expr->get_index();
-            else if (io_expr->get_type() == IOExpression::io_type::OUTPUT)
+            if (io_expr->get_type() == IOExpression::io_type::INPUT) {
+                const auto& out_connector = expr->get_output_port_connector(0);
+                manually_assigned_gprs[out_connector] = io_expr->get_index();
+                const auto& consumer_inputs = out_connector->get_consumers();
+                const auto& first_consumer = consumer_inputs.begin()->get_expr();
+                // TODO [96434]: Support RankNormalization (Reshape) in arbitrary place in pipeline, not just after inputs
+                if (ov::is_type<op::RankNormalization>(first_consumer->get_node())) {
+                    OPENVINO_ASSERT(consumer_inputs.size() == 1, "RankNormalization is supposed to be the only consumer");
+                    manually_assigned_gprs[first_consumer->get_output_port_connector(0)] = io_expr->get_index();
+                }
+            } else if (io_expr->get_type() == IOExpression::io_type::OUTPUT) {
                 manually_assigned_gprs[expr->get_input_port_connector(0)] = num_parameters + io_expr->get_index();
-            else
+            } else {
                 OPENVINO_THROW("Unsupported io_type detected");
+            }
         } else if (const auto& buffer = ov::as_type_ptr<op::Buffer>(op)) {
             const auto buffer_id = buffer->get_id();
             // All buffers have one common data pointer
diff --git a/src/common/snippets/src/lowered/pass/insert_broadcastmove.cpp b/src/common/snippets/src/lowered/pass/insert_broadcastmove.cpp
new file mode 100644
index 00000000000000..a70698580a61e3
--- /dev/null
+++ b/src/common/snippets/src/lowered/pass/insert_broadcastmove.cpp
@@ -0,0 +1,90 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/lowered/pass/insert_broadcastmove.hpp"
+#include "snippets/utils.hpp"
+#include "snippets/lowered/linear_ir.hpp"
+#include "snippets/lowered/loop_manager.hpp"
+#include "snippets/snippets_isa.hpp"
+#include "snippets/itt.hpp"
+
+namespace ov {
+namespace snippets {
+namespace lowered {
+namespace pass {
+
+bool InsertBroadcastMove::run(LinearIR& linear_ir) {
+    OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::InsertBroadcastMove")
+    bool modified = false;
+    const auto& loop_manager = linear_ir.get_loop_manager();
+
+    auto supports_broadcasting = [](const std::shared_ptr<ov::Node>& n) {
+      return ov::op::util::supports_auto_broadcast(n) ||
+             n->get_autob().m_type == ov::op::AutoBroadcastType::NUMPY ||
+             is_type<ov::op::v0::PRelu>(n);
+    };
+    auto dont_need_broadcasting = [](const ov::Output<ov::Node>& v){
+        // We don't need to insert BroadcastMove after the following operations:
+        // - Scalar has emitter with explicit broadcasting
+        // - VectorBuffer has scalar output shape to avoid broadcast conflicts and manually shape insertion.
+        // - Fill can be inserted only after VectorBuffer, and should be ignored as well.
+        return utils::is_scalar_constant(v.get_node_shared_ptr()) ||
+               ov::is_type<ov::snippets::op::VectorBuffer>(v.get_node_shared_ptr()) ||
+               ov::is_type<ov::snippets::op::Fill>(v.get_node_shared_ptr());
+    };
+    for (auto expr_it = linear_ir.begin(); expr_it != linear_ir.end(); expr_it++) {
+        const auto& expr = *expr_it;
+        const auto& node = expr->get_node();
+        const auto& descriptors = expr->get_input_port_descriptors();
+        if (!supports_broadcasting(node) || descriptors.size() < 2)
+            continue;
+        const auto& connectors = expr->get_input_port_connectors();
+        OPENVINO_ASSERT(connectors.size() == descriptors.size(),
+                        "Invalid expression configuration: connectors and descriptors size mismatch");
+
+        std::vector<size_t> last_dims(descriptors.size());
+        std::transform(descriptors.begin(), descriptors.end(), last_dims.begin(),
+                       [](const std::shared_ptr<PortDescriptor>& d){
+                           return d->get_shape().back();
+                       });
+        const auto broadcasted_dim = *std::max_element(last_dims.begin(), last_dims.end());
+        for (size_t i = 0; i < last_dims.size(); i++) {
+            const auto& parent_port = connectors[i]->get_source();
+            if (last_dims[i] != broadcasted_dim &&
+                !dont_need_broadcasting(parent_port.get_expr()->get_node())) {
+                OPENVINO_ASSERT(last_dims[i] == 1,
+                                "Attempt to broadcast non-1 dimension. Target dim: ", broadcasted_dim,
+                                " This dim: ", last_dims[i]);
+                auto input_shape = descriptors[i]->get_shape();
+                // Note that input_shape could be empty (aka ngraph scalar), so we can't just replace the last dim
+                if (input_shape.empty())
+                    input_shape.resize(1);
+                input_shape.back() = last_dims[i];
+                const auto broadcast = std::make_shared<op::BroadcastMove>(node->get_input_source_output(i), utils::vdims_to_pshape(input_shape));
+
+                PortDescriptorUtils::set_port_descriptor_ptr(broadcast->output(0), connectors[i]->get_source().get_descriptor_ptr()->clone());
+                const auto broadcast_expr = linear_ir.create_expression(broadcast, {connectors[i]});
+                linear_ir.insert(expr_it, broadcast_expr);
+                linear_ir.replace_input(expr->get_input_port(i), broadcast_expr->get_output_port_connector(0));
+                // Note that BroadcastMove modified the next expr input shape, so we need to set update
+                // expr's input port descriptor to reflect the changes
+                expr->get_input_port_descriptor(i)->set_shape(broadcast_expr->get_output_port_descriptor(0)->get_shape());
+
+                // Copy Loop identifies
+                const auto& loop_ids = expr->get_loop_ids();
+                broadcast_expr->set_loop_ids(loop_ids);
+                loop_manager->update_loops_port(loop_ids, expr->get_input_port(0), {broadcast_expr->get_input_port(0)}, true);
+
+                modified = true;
+            }
+        }
+    }
+    return modified;
+}
+
+} // namespace pass
+} // namespace lowered
+} // namespace snippets
+} // namespace ov
+
diff --git a/src/common/snippets/src/lowered/pass/insert_buffers.cpp b/src/common/snippets/src/lowered/pass/insert_buffers.cpp
index 91cbe55ef98b34..da5ffc11c3169d 100644
--- a/src/common/snippets/src/lowered/pass/insert_buffers.cpp
+++ b/src/common/snippets/src/lowered/pass/insert_buffers.cpp
@@ -35,10 +35,9 @@ std::vector<size_t> get_buffer_loop_ids(const std::vector<size_t>& lhs, const st
 ov::Shape compute_allocation_shape(const LinearIR::LoopManagerPtr& loop_manager,
                                    const std::vector<size_t>& buffer_loop_ids,
                                    const std::vector<size_t>& parent_loop_ids,
-                                   const ov::Output<ov::Node>& parent_output,
+                                   const ExpressionPort& expr_port,
                                    const int allocation_rank) {
-    const auto& port = lowered::PortDescriptorUtils::get_port_descriptor_ptr(parent_output);
-    const auto planar_shape = utils::get_planar_vdims(port);
+    const auto& planar_shape = utils::get_planar_vdims(expr_port);
 
     const size_t rank = allocation_rank >= 0 ? std::min(static_cast<size_t>(allocation_rank), planar_shape.size()) : planar_shape.size();
     ov::Shape allocation_shape(rank);
@@ -123,9 +122,9 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt&
     for (const auto& entry_point : loop_entries) {
         const auto& entry_port = entry_point.expr_port;
         const auto& expr = entry_port->get_expr();
-        const auto port = entry_port->get_index();
+        const auto port_idx = entry_port->get_index();
         const auto node = expr->get_node();
-        const auto& input_connector = expr->get_input_port_connector(port);
+        const auto& input_connector = expr->get_input_port_connector(port_idx);
         const auto& parent_expr_output = input_connector->get_source();
         const auto& parent_expr = parent_expr_output.get_expr();
         const auto parent_port = parent_expr_output.get_index();
@@ -140,7 +139,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt&
         const auto parent_ma = ov::as_type_ptr<op::MemoryAccess>(parent);
         const auto node_ma = ov::as_type_ptr<op::MemoryAccess>(node);
         bool is_buffer_needed = (parent_ma && parent_ma->is_memory_access_output_port(parent_port)) ||
-                                (node_ma && node_ma->is_memory_access_input_port(port));
+                                (node_ma && node_ma->is_memory_access_input_port(port_idx));
         const auto current_loops = expr->get_loop_ids();
         const auto parent_loops = parent_expr->get_loop_ids();
         const auto buffer_loop_ids = get_buffer_loop_ids(current_loops, parent_loops, is_buffer_needed);
@@ -154,7 +153,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt&
             const auto allocation_shape = compute_allocation_shape(loop_manager,
                                                                    buffer_loop_ids,
                                                                    parent_loops,
-                                                                   parent->output(parent_port),
+                                                                   parent_expr_output,
                                                                    m_buffer_allocation_rank);
             const auto buffer = std::make_shared<op::Buffer>(parent->output(parent_port), allocation_shape);
             PortDescriptorUtils::set_port_descriptor_ptr(buffer->output(0), parent_expr_output.get_descriptor_ptr()->clone());
@@ -169,7 +168,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt&
     for (const auto& exit_point : loop_exits) {
         const auto& exit_port = exit_point.expr_port;
         const auto& expr = exit_port->get_expr();
-        const auto port = exit_port->get_index();
+        const auto port_idx = exit_port->get_index();
         const auto node = expr->get_node();
         const auto output_connector = exit_port->get_port_connector_ptr();
         const auto child_exprs_inputs = output_connector->get_consumers();
@@ -200,7 +199,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt&
             const auto child_ma = ov::as_type_ptr<op::MemoryAccess>(child);
             const auto node_ma = ov::as_type_ptr<op::MemoryAccess>(node);
             bool is_buffer_needed = (child_ma && child_ma->is_memory_access_input_port(child_port)) ||
-                                    (node_ma && node_ma->is_memory_access_output_port(port));
+                                    (node_ma && node_ma->is_memory_access_output_port(port_idx));
             const auto local_buffer_loop_ids = get_buffer_loop_ids(current_loops, child_expr->get_loop_ids(), is_buffer_needed);
 
             if (is_buffer_needed) {
@@ -247,9 +246,9 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt&
             const auto allocation_shape = compute_allocation_shape(loop_manager,
                                                                    buffer_loop_ids,
                                                                    current_loops,
-                                                                   node->output(port),
+                                                                   *exit_port,
                                                                    m_buffer_allocation_rank);
-            auto buffer = std::make_shared<op::Buffer>(node->output(port), allocation_shape);
+            auto buffer = std::make_shared<op::Buffer>(node->output(port_idx), allocation_shape);
             PortDescriptorUtils::set_port_descriptor_ptr(buffer->output(0), exit_port->get_descriptor_ptr()->clone());
             // We cannot insert Node output connector on Buffer output because not all consumers of Node needs Buffer
             //  Example:
diff --git a/src/common/snippets/src/lowered/pass/insert_load_store.cpp b/src/common/snippets/src/lowered/pass/insert_load_store.cpp
index 47fa93f699354b..ff75a5be0e6c5c 100644
--- a/src/common/snippets/src/lowered/pass/insert_load_store.cpp
+++ b/src/common/snippets/src/lowered/pass/insert_load_store.cpp
@@ -3,7 +3,7 @@
 //
 
 #include "snippets/lowered/pass/insert_load_store.hpp"
-
+#include "snippets/op/rank_normalization.hpp"
 #include "snippets/lowered/linear_ir.hpp"
 #include "snippets/lowered/loop_manager.hpp"
 #include "snippets/snippets_isa.hpp"
@@ -30,14 +30,18 @@ size_t InsertLoadStore::get_count(const PortDescriptorPtr& port_desc) const {
 }
 
 bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it) {
+    std::shared_ptr<Expression> data_expr = *data_expr_it;
+    auto consumer_inputs = data_expr->get_output_port_connector(0)->get_consumers();
+    const auto& first_consumer = consumer_inputs.begin()->get_expr();
+    if (is_type<op::RankNormalization>(first_consumer->get_node())) {
+        OPENVINO_ASSERT(consumer_inputs.size() == 1, "RankNormalization is supposed to be the only consumer");
+        data_expr = first_consumer;
+    }
     const auto& loop_manager = linear_ir.get_loop_manager();
-    const auto& data_expr = *data_expr_it;
-    const auto& data_node = data_expr->get_node();
+    const auto& data_ngraph_output = data_expr->get_node()->output(0);
     const auto& output_connector = data_expr->get_output_port_connector(0);
-    const auto consumer_inputs = output_connector->get_consumers();
-
     bool was_inserted = false;
-    for (const auto& consumer_input : consumer_inputs) {
+    for (const auto& consumer_input : output_connector->get_consumers()) {
         const auto& consumer_expr = consumer_input.get_expr();
         const auto port = consumer_input.get_index();
         const auto& consumer = consumer_expr->get_node();
@@ -46,7 +50,7 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr
             return false;
 
         const auto loop_ids = consumer_expr->get_loop_ids();
-        const auto load = std::make_shared<op::Load>(data_node->output(0), get_count(data_expr->get_output_port_descriptor(0)));
+        const auto load = std::make_shared<op::Load>(data_ngraph_output, get_count(data_expr->get_output_port_descriptor(0)));
         PortDescriptorUtils::set_port_descriptor_ptr(load->output(0), consumer_input.get_descriptor_ptr()->clone());
         const auto load_expr = linear_ir.create_expression(load, {output_connector});
         linear_ir.insert(linear_ir.find_after(data_expr_it, consumer_expr), load_expr);
@@ -55,7 +59,7 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr
         load_expr->set_loop_ids(loop_ids);
 
         // Need to update all the corresponding Loops with the same Entry Point
-        const auto prev_entry_point = consumer_input;
+        const auto& prev_entry_point = consumer_input;
         const auto new_entry_point = load_expr->get_input_port(0);
         loop_manager->update_loops_port(loop_ids, prev_entry_point, {new_entry_point}, true);
         was_inserted = true;
@@ -116,20 +120,14 @@ bool InsertLoadStore::run(LinearIR& linear_ir) {
         const auto& node = expr->get_node();
         if (ov::is_type<ov::op::v0::Parameter>(node)) {
             modified |= insert_load(linear_ir, expr_it);
-            continue;
-        }
-        if (ov::is_type<ov::op::v0::Result>(node)) {
+        } else if (ov::is_type<ov::op::v0::Result>(node)) {
             modified |= insert_store(linear_ir, expr_it);
-            continue;
-        }
-        if (auto buffer = ov::as_type_ptr<op::Buffer>(node)) {
+        } else if (auto buffer = ov::as_type_ptr<op::Buffer>(node)) {
             modified |= insert_load(linear_ir, expr_it);
             if (buffer->is_intermediate_memory())
                 modified |= insert_store(linear_ir, expr_it);
-            continue;
         }
     }
-
     return modified;
 }
 
diff --git a/src/common/snippets/src/lowered/pass/mark_loops.cpp b/src/common/snippets/src/lowered/pass/mark_loops.cpp
index 86246ce61f1be6..05d38e111927c4 100644
--- a/src/common/snippets/src/lowered/pass/mark_loops.cpp
+++ b/src/common/snippets/src/lowered/pass/mark_loops.cpp
@@ -29,7 +29,8 @@ bool MarkLoops::run(LinearIR& linear_ir) {
     auto is_not_start_point = [](const std::shared_ptr<ov::Node>& node) {
         return ov::is_type<ov::op::v0::Result>(node) ||
                ov::is_type<ov::op::v0::Constant>(node) ||
-               ov::is_type<ov::op::v0::Parameter>(node);
+               ov::is_type<ov::op::v0::Parameter>(node) ||
+               ov::is_type<op::RankNormalization>(node);
     };
 
     auto are_conflicted = [](const ExpressionPort& lhs, const ExpressionPort& rhs) {
diff --git a/src/common/snippets/src/lowered/pass/optimize_domain.cpp b/src/common/snippets/src/lowered/pass/optimize_domain.cpp
index 09061e63250464..f2d2fd43baf96c 100644
--- a/src/common/snippets/src/lowered/pass/optimize_domain.cpp
+++ b/src/common/snippets/src/lowered/pass/optimize_domain.cpp
@@ -8,6 +8,7 @@
 #include "snippets/lowered/linear_ir.hpp"
 #include "snippets/snippets_isa.hpp"
 #include "snippets/shape_inference/shape_inference.hpp"
+#include "snippets/utils.hpp"
 
 
 namespace ov {
@@ -79,18 +80,28 @@ bool OptimizeDomain::run(snippets::lowered::LinearIR& linear_ir) {
         return false;
     }
     OPENVINO_ASSERT(config.m_min_parallel_work_amount != 0, "OptimizeDomain: Min parallel work amount can't equal to zero");
-    std::vector<std::shared_ptr<snippets::lowered::IOExpression>> input_exprs;
     std::vector<VectorDims> input_shapes;
     VectorDims master_shape = linear_ir.get_master_shape();
-    for (const auto& expr : linear_ir.get_IO_ops()) {
-        if (expr->get_type() == snippets::lowered::IOExpression::io_type::INPUT) {
-            input_exprs.push_back(expr);
-            const auto& shape = expr->get_output_port_descriptor(0)->get_shape();
+    bool blocked_input_shapes = false;
+    for (const auto& io_expr : linear_ir.get_IO_ops()) {
+        if (io_expr->get_type() == snippets::lowered::IOExpression::io_type::INPUT) {
+            auto consumer_inputs = io_expr->get_output_port_connector(0)->get_consumers();
+            const auto& first_consumer = consumer_inputs.begin()->get_expr();
+            if (auto rank_norm = as_type_ptr<op::RankNormalization>(first_consumer->get_node())) {
+                // If RankNormalization appends dims, then the appended dims will be broadcasted
+                // so collapsing is not allowed. We may increment tile rank though.
+                if (rank_norm->get_num_append() != 0)
+                    blocked_input_shapes = true;
+                // If RankNormalization prepends dims, then the dims should be ignored during domain optimization
+                // to avoid passing already incremented shapes to linear_ir.shape_infer()
+            }
+            const ExpressionPtr& shape_producing_expr = blocked_input_shapes ?
+                                                        first_consumer :
+                                                        io_expr;
+            const auto& shape = utils::get_planar_vdims(shape_producing_expr->get_output_port_descriptor(0));
             OPENVINO_ASSERT(std::none_of(shape.begin(), shape.end(),
                                         [](size_t d) {return d == snippets::IShapeInferSnippets::DYNAMIC_DIMENSION; }),
                             "OptimizeDomain pass does not support dynamic shapes");
-            OPENVINO_ASSERT(ov::snippets::broadcast_merge_into(master_shape, shape),
-                            "Failed to merge input shapes in OptimizeDomain pass");
             input_shapes.emplace_back(shape);
         }
     }
@@ -98,7 +109,9 @@ bool OptimizeDomain::run(snippets::lowered::LinearIR& linear_ir) {
                                                    master_shape.end(),
                                                    (size_t)1,
                                                    std::multiplies<size_t>());
-    const auto num_dims_collapsed = optimize(input_shapes,
+    const auto num_dims_collapsed = blocked_input_shapes ?
+                                    0 :
+                                    optimize(input_shapes,
                                               master_shape,
                                               total_work_amount,
                                               config.m_min_parallel_work_amount,
diff --git a/src/common/snippets/src/lowered/pass/propagate_layout.cpp b/src/common/snippets/src/lowered/pass/propagate_layout.cpp
index 7b69c82777d90e..aea3cf99858622 100644
--- a/src/common/snippets/src/lowered/pass/propagate_layout.cpp
+++ b/src/common/snippets/src/lowered/pass/propagate_layout.cpp
@@ -19,23 +19,25 @@ bool PropagateLayout::run(LinearIR& linear_ir) {
     if (linear_ir.empty())
         return false;
 
-    for (auto expr_it = linear_ir.begin(); expr_it != linear_ir.end(); expr_it++) {
-        const auto& expr = *expr_it;
+    for (const auto& expr : linear_ir) {
         const auto io_expr = std::dynamic_pointer_cast<IOExpression>(expr);
         if (!io_expr)
             continue;
 
         const bool is_input = io_expr->get_type() == IOExpression::io_type::INPUT;
         const auto& connectors = is_input ? expr->get_output_port_connectors() : expr->get_input_port_connectors();
-        if (connectors.size() != 1)
-            OPENVINO_THROW("Parameter/Results should have exactly one output/input");
+        OPENVINO_ASSERT(connectors.size() == 1, "Parameter/Results should have exactly one output/input");
 
         // If input - we should be looking downstream, if output - upstream
         const auto& target_connector = connectors.front();
         if (is_input) {
-            const auto consumer_inputs = target_connector->get_consumers();
             // Note that here we consider only the first child (which is usually load),
             // but often there is another child - LoopEnd
+            auto consumer_inputs = target_connector->get_consumers();
+            const auto& first_consumer = consumer_inputs.begin()->get_expr();
+            // If there is a RankNormalization op after a parameter - we should skip it
+            if (is_type<op::RankNormalization>(first_consumer->get_node()))
+                consumer_inputs = first_consumer->get_output_port_connector(0)->get_consumers();
             std::set<std::vector<size_t>> child_layouts;
             for (const auto& child_input : consumer_inputs) {
                 const auto& child = child_input.get_expr();
diff --git a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp
index e868d75e5dd5ea..b434e0f974beb3 100644
--- a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp
+++ b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp
@@ -44,13 +44,15 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) {
             // Float constant values in byte representation
             const auto float_min_constant = uint32_t(0xff7fffff);
             const auto zero_constant = uint32_t(0x00000000);
-
+            const bool is_dynamic = softmax->is_dynamic();
             // We need an iterator to the inserted element
-            auto push_node = [&linear_ir, &expr_it](const std::shared_ptr<Node>& n) {
+            auto push_node = [&linear_ir, &expr_it, is_dynamic](const std::shared_ptr<Node>& n) {
                 const auto expr = linear_ir.insert(expr_it, n);
+                if (is_dynamic)
+                    expr->get()->updateShapes();
                 return std::make_pair(expr, n);
             };
-
+            const ov::PartialShape broadcasted_shape(softmax_expr->get_input_port_descriptor(0)->get_shape());
             // Note: VectorBuffer is a special case, since it should go before the initial Load. So we handle it separately
             const auto& vector_buffer_max = push_node(std::make_shared<op::VectorBuffer>());
             // Init value of vector buffer for ReduceMax is -FLOAT_MIN.
@@ -65,9 +67,8 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) {
                                     std::vector<ExpressionPort>{(*max.first)->get_input_port(0),
                                                                 (*max.first)->get_input_port(1)},
                                     std::vector<ExpressionPort>{(*max.first)->get_output_port(0)});
-
             const auto broadcast_horizon_max = push_node(
-                    std::make_shared<op::BroadcastMove>(horizon_max.second, horizon_max.second->get_input_partial_shape(0)));
+                    std::make_shared<op::BroadcastMove>(horizon_max.second, broadcasted_shape));
             const auto vector_buffer_sum = push_node(std::make_shared<op::VectorBuffer>());
             // Init value of vector buffer for ReduceSum is zero.
             const auto fill_sum = push_node(std::make_shared<op::Fill>(vector_buffer_sum.second, 0, zero_constant));
@@ -89,7 +90,7 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) {
 
             // Divide is expensive operation, so we decompose it into 1 / x * y, where 1 / x is executed outside loop
             const auto pow = push_node(std::make_shared<op::PowerStatic>(horizon_sum.second, -1.f));
-            const auto broadcast_pow = push_node(std::make_shared<op::BroadcastMove>(pow.second, horizon_sum.second->get_input_partial_shape(0)));
+            const auto broadcast_pow = push_node(std::make_shared<op::BroadcastMove>(pow.second, broadcasted_shape));
 
             // Mul (pseudo-Divide loop)
             const auto mul = push_node(std::make_shared<ov::op::v1::Multiply>(exp.second, broadcast_pow.second));
diff --git a/src/common/snippets/src/lowered/pass/validate_shapes.cpp b/src/common/snippets/src/lowered/pass/validate_shapes.cpp
new file mode 100644
index 00000000000000..8d12004313e0bf
--- /dev/null
+++ b/src/common/snippets/src/lowered/pass/validate_shapes.cpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/lowered/pass/validate_shapes.hpp"
+
+#include "snippets/lowered/linear_ir.hpp"
+#include "snippets/shape_inference/shape_inference.hpp"
+#include "snippets/itt.hpp"
+
+namespace ov {
+namespace snippets {
+namespace lowered {
+namespace pass {
+
+bool ValidateShapes::run(LinearIR& linear_ir) {
+    OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::ValidateShapes")
+
+    for (const auto& expr : linear_ir) {
+        const auto num_inputs = expr->get_input_count();
+        const auto& port_connectors = expr->get_input_port_connectors();
+        const auto& port_descriptors = expr->get_input_port_descriptors();
+        OPENVINO_ASSERT(port_connectors.size() == num_inputs, "Invalid number of port connectors detected");
+        OPENVINO_ASSERT(port_descriptors.size() == num_inputs, "Invalid number of port descriptors detected");
+        for (size_t i = 0; i < num_inputs; i++) {
+            const auto& descr = port_descriptors[i];
+            const auto& layout = descr->get_layout();
+            const auto& shape = descr->get_shape();
+            const auto& n = expr->get_node();
+            OPENVINO_ASSERT(std::none_of(shape.begin(), shape.end(),
+                            [](size_t d) {return d == IShapeInferSnippets::DYNAMIC_DIMENSION;}),
+                            "Dynamic dimensions are not allowed at this point of pipeline. ",
+                            "Check the expr for node ", n->get_friendly_name());
+            OPENVINO_ASSERT(layout.size() == shape.size(), "Layout and shape sizes must match. ",
+                            "Check the expr for node ", n->get_friendly_name());
+            const auto& parent_desc = port_connectors[i]->get_source().get_descriptor_ptr();
+            const auto& parent_shape = parent_desc->get_shape();
+            OPENVINO_ASSERT(parent_shape == shape, "Parent shape must be equal to the expression shape. ",
+                           "Check the expr for node ", n->get_friendly_name());
+        }
+    }
+    return false;
+}
+
+} // namespace pass
+} // namespace lowered
+} // namespace snippets
+} // namespace ov
diff --git a/src/common/snippets/src/lowered/port_descriptor.cpp b/src/common/snippets/src/lowered/port_descriptor.cpp
index 96e8c718cc972b..e8c4bdd0626b47 100644
--- a/src/common/snippets/src/lowered/port_descriptor.cpp
+++ b/src/common/snippets/src/lowered/port_descriptor.cpp
@@ -3,6 +3,7 @@
 //
 
 #include "snippets/lowered/port_descriptor.hpp"
+#include <snippets/utils.hpp>
 
 namespace ov {
 namespace snippets {
@@ -12,13 +13,15 @@ size_t PortDescriptor::ServiceDimensions::FULL_DIM = SIZE_MAX;
 
 PortDescriptor::PortDescriptor(const ov::Input<ov::Node>& in, VectorDims subtensor_shape, std::vector<size_t> layout)
         : PortDescriptor(ov::Input<const Node>(in.get_node(), in.get_index()), std::move(subtensor_shape), std::move(layout)) {}
-PortDescriptor::PortDescriptor(const ov::Input<const ov::Node>& in, VectorDims subtensor_shape, std::vector<size_t> layout)
-        : PortDescriptor(in.get_shape(), std::move(subtensor_shape), std::move(layout)) {}
+
+PortDescriptor::PortDescriptor(const ov::Input<const ov::Node>& in, std::vector<size_t> subtensor_shape, std::vector<size_t> layout)
+        : PortDescriptor(utils::pshape_to_vdims(in.get_partial_shape()), std::move(subtensor_shape), std::move(layout)) {}
 
 PortDescriptor::PortDescriptor(const ov::Output<ov::Node>& out, VectorDims subtensor_shape, std::vector<size_t> layout)
         : PortDescriptor(ov::Output<const Node>(out.get_node(), out.get_index()), std::move(subtensor_shape), std::move(layout)) {}
-PortDescriptor::PortDescriptor(const ov::Output<const ov::Node>& out, VectorDims subtensor_shape, std::vector<size_t> layout)
-        : PortDescriptor(out.get_shape(), std::move(subtensor_shape), std::move(layout)) {}
+
+PortDescriptor::PortDescriptor(const ov::Output<const ov::Node>& out, std::vector<size_t> subtensor_shape, std::vector<size_t> layout)
+        : PortDescriptor(utils::pshape_to_vdims(out.get_partial_shape()), std::move(subtensor_shape), std::move(layout)) {}
 
 PortDescriptor::PortDescriptor(VectorDims shape, VectorDims subtensor_shape, std::vector<size_t> layout)
     : m_tensor_shape(std::move(shape)), m_layout(std::move(layout)), m_subtensor_shape(std::move(subtensor_shape)) {
@@ -30,13 +33,12 @@ void PortDescriptor::validate_arguments() {
         m_layout.resize(m_tensor_shape.size());
         // NCHW layout by default
         std::iota(m_layout.begin(), m_layout.end(), 0);
-    } else if (m_layout.size() != m_tensor_shape.size()) {
-        OPENVINO_THROW("Snippets tensor descriptor: Layout size must be equal to the shape size");
     }
+    OPENVINO_ASSERT(m_layout.size() == m_tensor_shape.size(), "Snippets tensor descriptor: Layout size must be equal to the shape size");
 }
 
 PortDescriptorPtr PortDescriptor::clone() const {
-    const auto desc = std::make_shared<PortDescriptor>(m_tensor_shape, m_subtensor_shape, m_layout);
+    auto desc = std::make_shared<PortDescriptor>(m_tensor_shape, m_subtensor_shape, m_layout);
     desc->set_reg(m_reg);
     return desc;
 }
diff --git a/src/common/snippets/src/op/buffer.cpp b/src/common/snippets/src/op/buffer.cpp
index 8b703fa0c29a16..615979ec5e3281 100644
--- a/src/common/snippets/src/op/buffer.cpp
+++ b/src/common/snippets/src/op/buffer.cpp
@@ -46,15 +46,13 @@ bool Buffer::visit_attributes(AttributeVisitor& visitor) {
 
 void Buffer::validate_and_infer_types() {
     INTERNAL_OP_SCOPE(Buffer_validate_and_infer_types);
-    ov::Shape output_shape;
+    ov::PartialShape output_shape;
     if (m_type == Type::NewMemory) {
         OPENVINO_ASSERT(get_input_size() == 0, "Buffer with new allocated memory must to not have arguments!");
         output_shape = m_shape;
     } else if (m_type == Type::IntermediateMemory) {
-        const auto& input_shape = get_input_partial_shape(0);
-        OPENVINO_ASSERT(input_shape.is_static(), "Buffer supports only static input shape");
         m_element_type = get_input_element_type(0);
-        output_shape = input_shape.get_shape();
+        output_shape = get_input_partial_shape(0);
     } else {
         OPENVINO_THROW("Buffer supports only the following types: NewMemory and IntermediateMemory");
     }
diff --git a/src/common/snippets/src/op/rank_normalization.cpp b/src/common/snippets/src/op/rank_normalization.cpp
new file mode 100644
index 00000000000000..5dfd46492a1946
--- /dev/null
+++ b/src/common/snippets/src/op/rank_normalization.cpp
@@ -0,0 +1,57 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/op/rank_normalization.hpp"
+#include "snippets/utils.hpp"
+
+namespace ov {
+namespace snippets {
+namespace op {
+
+RankNormalization::RankNormalization(const Output<Node>& data, size_t num_prepend, size_t num_append) :
+    Op({data}), m_num_prepend(num_prepend), m_num_append(num_append) {
+    constructor_validate_and_infer_types();
+}
+
+
+std::shared_ptr<ov::Node> RankNormalization::clone_with_new_inputs(const OutputVector& new_args) const {
+    check_new_args_count(this, new_args);
+    return std::make_shared<RankNormalization>(new_args[0], m_num_prepend, m_num_append);
+}
+
+void RankNormalization::validate_and_infer_types() {
+    auto new_shape = get_input_partial_shape(0);
+    // Note: other values are not allowed, only planar + blocked layout combination can be normalized.
+    NODE_VALIDATION_CHECK(this, utils::one_of(m_num_append, 0lu, 1lu),
+                          "num_append could be only 0 or 1, other values are not allowed.");
+    new_shape.insert(new_shape.begin(), m_num_prepend, Dimension(1));
+    new_shape.insert(new_shape.end(), m_num_append, Dimension(1));
+    set_output_type(0, get_input_element_type(0), new_shape);
+}
+
+bool RankNormalization::visit_attributes(AttributeVisitor& visitor) {
+    visitor.on_attribute("num_prepend", m_num_prepend);
+    visitor.on_attribute("num_append", m_num_append);
+    return true;
+}
+
+RankNormalization::ShapeInfer::ShapeInfer(const std::shared_ptr<ov::Node>& n) {
+    const auto& rank_norm = as_type_ptr<RankNormalization>(n);
+    OPENVINO_ASSERT(rank_norm, "Invalid operation passed to RankNormalization::ShapeInfer: ", n->get_type_info().name);
+    m_num_append = rank_norm->m_num_append;
+    m_num_prepend = rank_norm->m_num_prepend;
+}
+
+IShapeInferSnippets::Result
+RankNormalization::ShapeInfer::infer(const std::vector<VectorDimsRef>& input_shapes) {
+    OPENVINO_ASSERT(input_shapes.size() == 1, "Invalid number of input shapes passed to RankNormalization::ShapeInfer::infer");
+    VectorDims out_shape = input_shapes[0].get();
+    out_shape.insert(out_shape.begin(), m_num_prepend, 1);
+    out_shape.insert(out_shape.end(), m_num_append, 1);
+    return {{out_shape}, ShapeInferStatus::success};
+}
+
+} // namespace op
+} // namespace snippets
+} // namespace ov
\ No newline at end of file
diff --git a/src/common/snippets/src/op/scalar.cpp b/src/common/snippets/src/op/scalar.cpp
index 029a2e613f28d2..4efd1716a6fb94 100644
--- a/src/common/snippets/src/op/scalar.cpp
+++ b/src/common/snippets/src/op/scalar.cpp
@@ -4,14 +4,17 @@
 
 #include "snippets/op/scalar.hpp"
 
+namespace ov {
+namespace snippets {
+namespace op {
 
-std::shared_ptr<ov::Node> ov::snippets::op::Scalar::clone_with_new_inputs(const OutputVector& new_args) const {
+std::shared_ptr<ov::Node> Scalar::clone_with_new_inputs(const OutputVector& new_args) const {
     check_new_args_count(this, new_args);
     return std::make_shared<Scalar>(*this);
 }
 
 // Scalar currently supports only one-element constants, this could be changed in the future
-void ov::snippets::op::Scalar::validate_and_infer_types() {
+void Scalar::validate_and_infer_types() {
     Constant::validate_and_infer_types();
     auto out_pshape = get_output_partial_shape(0);
     NODE_VALIDATION_CHECK(this, out_pshape.is_static(), "Scalar supports only static input shapes");
@@ -20,7 +23,7 @@ void ov::snippets::op::Scalar::validate_and_infer_types() {
                       " shape");
 }
 
-bool ov::snippets::op::Scalar::visit_attributes(AttributeVisitor& visitor) {
+bool Scalar::visit_attributes(AttributeVisitor& visitor) {
     auto shape = get_output_shape(0);
     auto type = get_output_element_type(0);
     auto value = cast_vector<float>();
@@ -29,3 +32,7 @@ bool ov::snippets::op::Scalar::visit_attributes(AttributeVisitor& visitor) {
     visitor.on_attribute("value", value);
     return true;
 }
+
+} // namespace op
+} // namespace snippets
+} // namespace ov
\ No newline at end of file
diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp
index dc13bb3e8bb716..fccecfa8ab5f32 100644
--- a/src/common/snippets/src/op/subgraph.cpp
+++ b/src/common/snippets/src/op/subgraph.cpp
@@ -6,9 +6,7 @@
 #include "snippets/remarks.hpp"
 
 #include "snippets/op/subgraph.hpp"
-#include "snippets/op/convert_saturation.hpp"
 
-#include "snippets/pass/insert_movebroadcast.hpp"
 #include "snippets/pass/broadcast_to_movebroadcast.hpp"
 #include "snippets/pass/propagate_precision.hpp"
 #include "snippets/pass/convert_constants.hpp"
@@ -17,6 +15,9 @@
 #include "snippets/pass/matmul_to_brgemm.hpp"
 #include "snippets/pass/fuse_transpose_brgemm.hpp"
 #include "snippets/pass/set_softmax_ports.hpp"
+#include "snippets/pass/canonicalization.hpp"
+#include "snippets/pass/align_element_types.hpp"
+#include "snippets/lowered/pass/validate_shapes.hpp"
 
 #include "snippets/utils.hpp"
 
@@ -29,6 +30,7 @@
 #include "snippets/lowered/pass/init_loops.hpp"
 #include "snippets/lowered/pass/insert_buffers.hpp"
 #include "snippets/lowered/pass/insert_load_store.hpp"
+#include "snippets/lowered/pass/insert_broadcastmove.hpp"
 #include "snippets/lowered/pass/load_movebroadcast_to_broadcastload.hpp"
 #include "snippets/lowered/pass/allocate_buffers.hpp"
 #include "snippets/lowered/pass/propagate_layout.hpp"
@@ -61,7 +63,7 @@ namespace snippets {
 namespace op {
 
 void Subgraph::set_generator(std::shared_ptr<ov::snippets::Generator> generator) {
-    m_generator = generator;
+    m_generator = std::move(generator);
 }
 
 void Subgraph::set_virtual_port_count(const size_t count) {
@@ -171,36 +173,6 @@ std::shared_ptr<Node> Subgraph::clone_with_new_inputs(const OutputVector& inputs
     return make_shared<Subgraph>(inputs, body().clone());
 }
 
-std::vector<PartialShape> Subgraph::reshape_body(const std::vector<PartialShape>& input_shapes) {
-    auto& params = body_ptr()->get_parameters();
-    OPENVINO_ASSERT(params.size() == input_shapes.size(), "Got invalid number of input shapes to reshape subgraph body");
-    for (size_t i = 0; i < params.size(); ++i) {
-        params[i]->set_partial_shape(input_shapes[i]);
-    }
-    body_ptr()->validate_nodes_and_infer_types();
-    std::vector<PartialShape> output_shapes;
-    for (const auto& res : body_ptr()->get_results()) {
-        output_shapes.emplace_back(res->get_input_partial_shape(0));
-    }
-    return output_shapes;
-}
-
-std::vector<Shape> Subgraph::reshape_body(const std::vector<Shape>& input_shapes) {
-    auto& params = body_ptr()->get_parameters();
-    OPENVINO_ASSERT(params.size() == input_shapes.size(), "Got invalid number of input shapes to reshape subgraph body");
-    for (size_t i = 0; i < params.size(); ++i) {
-        params[i]->set_partial_shape(input_shapes[i]);
-    }
-    body_ptr()->validate_nodes_and_infer_types();
-    std::vector<Shape> output_shapes;
-    for (const auto& res : body_ptr()->get_results()) {
-        auto pshape = res->get_input_partial_shape(0);
-        OPENVINO_ASSERT(pshape.is_static(), "Subgraph inferred dynamic output shape during reshape with static inputs");
-        output_shapes.emplace_back(res->get_input_partial_shape(0).get_shape());
-    }
-    return output_shapes;
-}
-
 void Subgraph::validate_and_infer_types() {
     INTERNAL_OP_SCOPE(Subgraph);
     OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::validate_and_infer_types")
@@ -311,166 +283,6 @@ auto Subgraph::constant_input_should_be_inside_body(const std::shared_ptr<ov::No
            ov::is_type<ov::op::v1::Reshape>(node);
 }
 
-///
-/// \brief  Canonization transforms original subgraph and to canonical form suitable for code generation. In particular,
-///         it handles supported layout conversions, broadcasts inputs and outputs to a single rank and layout. Canonicalization
-///         returns master-shape (max rank + max dimensions over all outputs) that can be used for scheduling.
-///         Canonicalization currently supports only the following layout conversions:
-///             * None: all inputs have the same layout
-///             * Planar + blocked: some inputs have blocked, and some have planar layouts, e.g. <N, C, H, W, c> + <N, C, H, W>
-///         Also there is precision aligning inside body of subgraph during canonicalization
-ov::PartialShape snippets::op::Subgraph::canonicalize(const BlockedShapeVector& outputShapes,
-                                                      const BlockedShapeVector& inputShapes) {
-    INTERNAL_OP_SCOPE(Subgraph);
-    OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::canonicalize")
-    NODE_VALIDATION_CHECK(this, inputShapes.size() == body_ptr()->get_parameters().size(),
-                          "Number of parameters for snippet doesn't match passed to generate method: ",
-                          inputShapes.size(), " vs ", body_ptr()->get_parameters().size(), ".");
-
-    NODE_VALIDATION_CHECK(this, outputShapes.size() == body_ptr()->get_results().size(),
-                          "number of results for snippet doesn't match passed to generate method: ",
-                          outputShapes.size(), " vs ", body_ptr()->get_results().size(), ".");
-
-    auto getMaxRankBlockedShape = [](const BlockedShapeVector& blockedShapes) -> const BlockedShape& {
-        return *std::max_element(blockedShapes.begin(), blockedShapes.end(),
-                                 [&](const BlockedShape& lhs, const BlockedShape& rhs) {
-                                     return std::get<0>(lhs).size() < std::get<0>(rhs).size();
-                                 });
-    };
-    PartialShape baseShape;
-    AxisVector baseOrder;
-    std::tie(baseShape, baseOrder, std::ignore) = getMaxRankBlockedShape(inputShapes);
-    maxInputRank = baseShape.size();
-    appendOnesForCanonical.resize(inputShapes.size(), 0);
-    const bool baseIsBlocked = baseOrder.size() != std::set<size_t>(baseOrder.begin(), baseOrder.end()).size();
-    for (size_t i = 0; i < inputShapes.size(); i++) {
-        const auto& blockedShape = inputShapes[i];
-        PartialShape inShape;
-        AxisVector inOrder;
-        element::Type inType;
-        std::tie(inShape, inOrder, inType) = blockedShape;
-        const auto inRank = inShape.size();
-        NODE_VALIDATION_CHECK(this, inRank <= maxInputRank, "Input rank can't be larger than output rank in snippets.");
-        if (inRank < maxInputRank) {
-            appendOnesForCanonical[i] = maxInputRank - inRank;
-            PartialShape newShape(ov::Shape(maxInputRank, 1));
-            // todo: more complicated logics is needed if we want to merge smth else than blocked and planar
-            if (baseIsBlocked) {
-                const bool inIsNotBlocked = inOrder.size() == std::set<size_t>(inOrder.begin(), inOrder.end()).size();
-                NODE_VALIDATION_CHECK(this, inIsNotBlocked, "Snippets don't support conversion between blocked layouts of different ranks");
-                inShape.insert(inShape.end(), ov::Dimension(1));
-                appendOnesForCanonical[i]--;
-            }
-            NODE_VALIDATION_CHECK(this, PartialShape::broadcast_merge_into(newShape, inShape, ov::op::AutoBroadcastType::NUMPY),
-                                  "Failed to broadcast_merge inputs in snippets canonicalization");
-            inShape = std::move(newShape);
-        } else {
-            // todo: 4d blocked + 5d planar layouts are not supported: <N, C, H, W, c> + <N, C, D, H, W>
-            NODE_VALIDATION_CHECK(this,
-                                  equal(baseOrder.begin(), baseOrder.end(), inOrder.begin()),
-                                  "Snippets canonicalization got input shapes of equal ranks but different layouts, which is not supported");
-        }
-        ov::PartialShape tmpPShape(baseShape);
-        // todo: we need to generalize canonicalization for domain-sensitive ops. E.g. MatMul inputs can't be broadcasted one to another
-        if (!config.m_has_domain_sensitive_ops)
-            NODE_VALIDATION_CHECK(this,
-                                  PartialShape::broadcast_merge_into(tmpPShape, inShape, ::ov::op::AutoBroadcastType::NUMPY),
-                                  "Failed to create broadcastable shapes in snippets canonicalization");
-        const auto paramShape = body_ptr()->get_parameters()[i]->get_partial_shape();
-        const auto paramType = body_ptr()->get_parameters()[i]->get_element_type();
-        if (paramShape.size() != inShape.size() || !equal(paramShape.begin(), paramShape.end(), inShape.begin()))
-            body_ptr()->replace_parameter(i, std::make_shared<ov::op::v0::Parameter>(paramType, inShape));
-    }
-    body_ptr()->validate_nodes_and_infer_types();
-
-    auto skipStartEndOnes = [](const PartialShape& shape) {
-        auto begin = shape.begin();
-        auto end = shape.end();
-        while (begin != end && *begin == 1)
-            begin++;
-        while (begin != end && *(end - 1) == 1)
-            end--;
-
-        PartialShape trimmedShape(std::vector<ov::Dimension>(end - begin, 1));
-        std::copy(begin, end, trimmedShape.begin());
-        return trimmedShape;
-    };
-
-    // Check that output shapes are broadcastable => can be scheduled
-    const auto& body_results = body_ptr()->get_results();
-    PartialShape outPShape = body_results[0]->get_input_partial_shape(0);
-    // todo: we need a slightly more general approach for backward ROI propagation
-    const auto& result_parent = body_results[0]->get_input_node_shared_ptr(0);
-    if (body_results.size() == 1 &&
-        ov::is_type<ov::op::v1::Transpose>(result_parent) &&
-        ov::is_type<ov::op::v0::MatMul>(result_parent->get_input_node_shared_ptr(0))) {
-        outPShape = result_parent->get_input_partial_shape(0);
-    } else {
-        for (size_t i = 0; i < body_results.size(); i++) {
-            auto shape_i = body_results[i]->get_input_partial_shape(0);
-            auto outputShape_i = std::get<0>(outputShapes[i]);
-            // Check that the produced output shape corresponds to the passed shape
-            // Some produced shapes may have been changed to be broadcastable (e.g. blocked + planar outputs),
-            // so we need to remove leading and trailing "1" before the comparison
-            PartialShape pShape_i(skipStartEndOnes(shape_i));
-            bool compatibleWithPassedShape = PartialShape::broadcast_merge_into(pShape_i,
-                                                                                skipStartEndOnes(outputShape_i),
-                                                                                ::ov::op::AutoBroadcastType::NUMPY);
-            NODE_VALIDATION_CHECK(this, compatibleWithPassedShape,
-                                  "Inferred and passed results shapes are incompatible for snippet ");
-            // Check that output shapes are broadcastable to each other => can be scheduled
-            bool compatibleWithOtherOutputs = PartialShape::broadcast_merge_into(outPShape, shape_i,
-                                                                                 ::ov::op::AutoBroadcastType::NUMPY);
-            NODE_VALIDATION_CHECK(this, compatibleWithOtherOutputs,
-                                  "Snippets output shapes must be numpy broadcastable");
-        }
-    }
-
-    // We should insert Converts after Parameters and Constant and before Results
-    // to align precision inside Subgraph body that is supported by Plugin
-    align_element_types(outputShapes, inputShapes);
-
-    master_shape = outPShape;
-    return master_shape;
-}
-
-ov::PartialShape snippets::op::Subgraph::canonicalized_body_shape_infer(const BlockedShapeVector& inputShapes) {
-    std::vector<Shape> normInputShapes;
-    for (size_t i = 0; i < inputShapes.size(); i++) {
-        PartialShape inShape = std::get<0>(inputShapes[i]);
-        const auto inRank = inShape.size();
-        if (inRank < maxInputRank) {
-            PartialShape newShape(ov::Shape(maxInputRank, 1));
-            for (size_t ir = 0; ir < inRank; ir++) {
-                newShape[appendOnesForCanonical[i] + ir] = inShape[ir];
-            }
-            normInputShapes.push_back(newShape.get_shape());
-        } else {
-            normInputShapes.push_back(inShape.get_shape());
-        }
-    }
-    reshape_body(normInputShapes);
-
-    const auto& body_results = body_ptr()->get_results();
-    PartialShape outPShape = body_results[0]->get_input_partial_shape(0);
-    const auto& result_parent = body_results[0]->get_input_node_shared_ptr(0);
-    if (body_results.size() == 1 &&
-        ov::is_type<ov::op::v1::Transpose>(result_parent) &&
-        ov::is_type<ov::op::v0::MatMul>(result_parent->get_input_node_shared_ptr(0))) {
-        outPShape = result_parent->get_input_partial_shape(0);
-    } else {
-        for (size_t i = 0; i < body_results.size(); i++) {
-            auto shape_i = body_results[i]->get_input_partial_shape(0);
-            bool compatibleWithOtherOutputs = PartialShape::broadcast_merge_into(outPShape, shape_i,
-                                                                                 ::ov::op::AutoBroadcastType::NUMPY);
-            NODE_VALIDATION_CHECK(this, compatibleWithOtherOutputs,
-                                  "Snippets output shapes must be numpy broadcastable");
-        }
-    }
-    master_shape = outPShape;
-    return master_shape;
-}
-
 bool Subgraph::check_broadcast(const std::shared_ptr<const ov::Node>& node) noexcept {
     const auto elementwise = std::dynamic_pointer_cast<const ov::op::util::BinaryElementwiseArithmetic>(node);
     return
@@ -503,8 +315,40 @@ IShapeInferSnippets::Result Subgraph::OVShapeInfer::infer(const std::vector<Vect
     return m_last_result;
 }
 
+VectorDims Subgraph::infer_master_shape() {
+    std::vector<VectorDims> output_dims;
+    if (is_dynamic()) {
+        // Note that in case of dynamic implementation shapeInfer() is called before PrepareParams,
+        // so there must be last_result available
+        // In principle, we can instantiate shape_infer here, but it's not an intended pipeline behavior.
+        OPENVINO_ASSERT(m_shape_infer, "Can't calculate master_shape when shapeInfer is not initialized");
+        output_dims = m_shape_infer->get_last_result().dims;
+        OPENVINO_ASSERT(!output_dims.empty(), "Can't calculate master_shape before the first shape inference");
+    } else {
+        for (const auto& res : body_ptr()->get_results()) {
+            const auto& res_input = res->input(0);
+            OPENVINO_ASSERT(res_input.get_partial_shape().is_static(), "Result have dynamic shape in static pipeline");
+            // We need to account to the shape's layout stored in Output<Node> rt_info
+            const auto& planar_shape = utils::get_planar_pshape(res_input.get_source_output());
+            output_dims.emplace_back(planar_shape.get_shape());
+        }
+    }
+
+    if (output_dims.size() == 1)
+        return output_dims.front();
+
+    const auto& default_broadcasting = std::make_shared<NumpyBroadcastShapeInfer>();
+    // Note: we have to convert vector<VectorDims> to vector<reference_wrapper<const VectorDims>>
+    // because of shape inference interface
+    std::vector<std::reference_wrapper<const VectorDims>> inputs;
+    inputs.reserve(output_dims.size());
+    for (const auto& d : output_dims)
+        inputs.emplace_back(d);
+    return default_broadcasting->infer(inputs).dims.front();
+}
+
 std::shared_ptr<lowered::LinearIR>
-Subgraph::convert_body_to_linear_ir(const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory) const {
+Subgraph::convert_body_to_linear_ir(const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory) {
     lowered::Config lowering_config;
     lowering_config.m_save_expressions = config.m_has_domain_sensitive_ops;
     lowering_config.m_need_fill_tail_register = config.m_has_domain_sensitive_ops;
@@ -513,89 +357,44 @@ Subgraph::convert_body_to_linear_ir(const std::shared_ptr<IShapeInferSnippetsFac
     lowering_config.m_min_parallel_work_amount = config.m_min_parallel_work_amount;
     lowering_config.m_min_kernel_work_amount = config.m_min_jit_work_amount;
 
-    return std::make_shared<lowered::LinearIR>(body_ptr(), shape_infer_factory, lowering_config);
+    m_linear_ir = std::make_shared<lowered::LinearIR>(body_ptr(), shape_infer_factory, lowering_config);
+    m_shape_infer = m_linear_ir->get_shape_infer_instance();
+    return m_linear_ir;
 }
 
-void Subgraph::align_element_types(const BlockedShapeVector& outputShapes,
-                                   const BlockedShapeVector& inputShapes) {
-    // We should insert Convert before Results to set original output element type if needed
-    const auto& body_results = body_ptr()->get_results();
-    for (size_t i = 0; i < outputShapes.size(); i++) {
-        const auto needed_out_type = std::get<2>(outputShapes[i]);
-        if (body_results[i]->get_input_element_type(0) != needed_out_type) {
-            auto parent_output = body_results[i]->get_input_source_output(0);
-            std::shared_ptr<ov::Node> consumer = body_results[i];
-
-            // Snippets supports Transpose only after Parameter or before Result nodes
-            // So we have to insert Convert before Transpose (if there is) on Subgraph outputs
-            const auto transpose = ov::as_type_ptr<ov::op::v1::Transpose>(parent_output.get_node_shared_ptr());
-            if (transpose) {
-                OPENVINO_ASSERT(parent_output.get_target_inputs().size() == 1,
-                                "If Result has Transpose on input, this Result must be single consumer of the Transpose");
-                parent_output = transpose->get_input_source_output(0);
-                consumer = transpose;
-            }
-
-            const auto convert = std::make_shared<ov::snippets::op::ConvertSaturation>(parent_output, needed_out_type);
-            ov::copy_runtime_info(parent_output.get_node_shared_ptr(), convert);
-
-            consumer->set_argument(0, convert);
-            consumer->validate_and_infer_types();
-            if (consumer != body_results[i])
-                body_results[i]->validate_and_infer_types();
-        }
-    }
-
-    // We should change existing element type to original for Parameters if needed
-    const auto& parameters = body_ptr()->get_parameters();
-    for (size_t i = 0; i < inputShapes.size(); ++i) {
-        const auto needed_in_type = std::get<2>(inputShapes[i]);
-        const auto& parameter = parameters[i];
-        const auto original_type = parameter->get_element_type();
-        if (original_type != needed_in_type) {
-            parameter->set_element_type(needed_in_type);
-            parameter->validate_and_infer_types();
-
-            auto parent_output = parameter->output(0);
-            auto consumer_inputs = parent_output.get_target_inputs();
-
-            // Snippets supports Transpose only after Parameter or before Result nodes
-            // So we have to insert Convert after Transpose (if there is) on Subgraph inputs
-            if (std::any_of(consumer_inputs.cbegin(), consumer_inputs.cend(),
-                [](const ov::Input<ov::Node>& input) { return ov::is_type<ov::op::v1::Transpose>(input.get_node()); })) {
-                OPENVINO_ASSERT(consumer_inputs.size() == 1,
-                                "If Parameter has Transpose on output, this Transpose must be single consumer of the Parameter");
-                const auto transpose = consumer_inputs.begin()->get_node()->shared_from_this();
-                transpose->validate_and_infer_types();
-
-                parent_output = transpose;
-                consumer_inputs = parent_output.get_target_inputs();
-            }
-
-            const auto convert = std::make_shared<ov::snippets::op::ConvertSaturation>(parent_output, original_type);
-            ov::copy_runtime_info(parent_output.get_node_shared_ptr(), convert);
-
-            for (const auto input : consumer_inputs) {
-                const auto& input_node = input.get_node();
-                if (input_node == convert.get()) {
-                    continue;
-                }
-                input_node->set_argument(input.get_index(), convert->output(0));
-            }
-        }
+std::shared_ptr<Subgraph> Subgraph::clone() const {
+    ov::OutputVector subgraph_node_inputs;
+    for (const auto &input : input_values()) {
+        auto new_input = std::make_shared<ov::opset1::Parameter>(input.get_element_type(), input.get_partial_shape());
+        subgraph_node_inputs.push_back(new_input);
     }
-}
-
-void Subgraph::data_flow_transformations(const std::vector<snippets::pass::Manager::PositionedPass>& backend_passes) {
+    std::shared_ptr<ov::Model> new_body = body_ptr()->clone();
+    auto result = std::make_shared<snippets::op::Subgraph>(subgraph_node_inputs, new_body);
+    // Note: ov::copy_runtime_info accepts only shared_ptr<ov::Node> as "from" but never modifies it,
+    // so we have to cast away constness to copy runtime info
+    ov::copy_runtime_info(const_pointer_cast<Node>(shared_from_this()), result);
+    result->set_friendly_name(get_friendly_name());
+    if (m_linear_ir)
+        result->m_linear_ir = std::make_shared<lowered::LinearIR>(m_linear_ir->deep_copy());
+    // Note: we don't update shapeInfer here, since it's initialized in the constructor
+    if (m_generator)
+        result->m_generator = m_generator->clone();
+    return result;
+}
+
+void Subgraph::data_flow_transformations(const BlockedShapeVector& blocked_input_shapes,
+                                         const std::vector<ov::element::Type>& input_precisions,
+                                         const std::vector<ov::element::Type>& output_precisions,
+                                         const std::vector<snippets::pass::Manager::PositionedPass>& backend_passes) {
     INTERNAL_OP_SCOPE(Subgraph);
     OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::data_flow_transformations")
 
-    const auto& params = body_ptr()->get_parameters();
-    bool inputs_has_dynamic_last_dims = std::any_of(params.begin(), params.end(),
-                                                    [](const shared_ptr<ov::op::v0::Parameter>& p) {
-                                                        return p->get_partial_shape().rbegin()->is_dynamic();
-                                                    });
-    snippets::pass::Manager manager;
+    ov::snippets::pass::Manager manager;
+    if (!blocked_input_shapes.empty())
+        manager.register_pass<snippets::pass::Canonicalization>(blocked_input_shapes);
+    if (!input_precisions.empty() && !output_precisions.empty())
+        manager.register_pass<snippets::pass::AlignElementTypes>(input_precisions, output_precisions);
+
     if (config.m_has_domain_sensitive_ops) {
         manager.register_pass<snippets::pass::MatMulToBrgemm>();
         manager.register_pass<snippets::pass::FuseTransposeBrgemm>();
@@ -605,14 +404,6 @@ void Subgraph::data_flow_transformations(const std::vector<snippets::pass::Manag
     manager.register_pass<snippets::pass::BroadcastToMoveBroadcast>();
     manager.register_pass<snippets::pass::ConvertConstantsToScalars>();
     manager.register_pass<snippets::pass::ConvertPowerToPowerStatic>();
-    // todo: presently dynamic pipeline is activated even if the last two dimension are static
-    //  In general, we can use static kernels in this case, but several parameters (src and dst memory pointers for example)
-    //  should be passed as run-time args, so it's a mixed mode: kernel is shape-aware, but some additional runtime args are required
-    // Presently Broadcasting is organized in the following way:
-    // * ALL last dims are static => broadcasting is handled via MoveBroadcast and pointer arithmetics (even for dynamic upper dims)
-    if (!inputs_has_dynamic_last_dims) {
-        manager.register_pass<snippets::pass::InsertMoveBroadcast>();
-    }
 
     manager.register_pass<snippets::pass::PropagatePrecision>(m_generator->get_target_machine());
     manager.register_pass<ov::pass::ConstantFolding>();
@@ -623,8 +414,9 @@ void Subgraph::data_flow_transformations(const std::vector<snippets::pass::Manag
 }
 
 void Subgraph::control_flow_transformations(lowered::LinearIR& linear_ir,
+                                            LoweringResult& lowering_result,
                                             const lowered::pass::PassPipeline& backend_passes_pre_common,
-                                            const lowered::pass::PassPipeline& backend_passes_post_common) {
+                                            const lowered::pass::PassPipeline& backend_passes_post_common) const {
     INTERNAL_OP_SCOPE(Subgraph);
     OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::control_flow_transformations")
 
@@ -649,7 +441,11 @@ void Subgraph::control_flow_transformations(lowered::LinearIR& linear_ir,
     common_pipeline.register_pass<lowered::pass::InsertBuffers>(buffer_allocation_rank);
     common_pipeline.register_pass<lowered::pass::InsertLoadStore>(vector_size);
     common_pipeline.register_pass<lowered::pass::MoveScalarToConsumer>();
+    common_pipeline.register_pass<lowered::pass::InsertBroadcastMove>();
     common_pipeline.register_pass<lowered::pass::LoadMoveBroadcastToBroadcastLoad>();
+
+    common_pipeline.register_pass<lowered::pass::ValidateShapes>();
+
     common_pipeline.register_pass<lowered::pass::ValidateLoops>();
     common_pipeline.register_pass<lowered::pass::InitLoops>();
     common_pipeline.register_pass<lowered::pass::InsertLoops>();
@@ -669,57 +465,44 @@ void Subgraph::control_flow_transformations(lowered::LinearIR& linear_ir,
     final_pipeline.register_pass<lowered::pass::CleanupLoopOffsets>();
     final_pipeline.run(linear_ir);
 
-    m_buffer_scratchpad = buffer_allocation_pass->get_scratchpad_size();
+    lowering_result.buffer_scratchpad_size = buffer_allocation_pass->get_scratchpad_size();
 }
 
-snippets::Schedule Subgraph::generate(const BlockedShapeVector& output_shapes,
-                                      const BlockedShapeVector& input_shapes,
+snippets::Schedule Subgraph::generate(const BlockedShapeVector& blocked_input_shapes,
+                                      const std::vector<ov::element::Type>& input_precisions,
+                                      const std::vector<ov::element::Type>& output_precisions,
+                                      const std::vector<snippets::pass::Manager::PositionedPass>& data_flow_backend_passes,
+                                      const lowered::pass::PassPipeline& backend_passes_pre_common,
+                                      const lowered::pass::PassPipeline& backend_passes_post_common,
+                                      const std::shared_ptr<IShapeInferSnippetsFactory>& factory,
                                       const void* compile_params) {
-    canonicalize(output_shapes, input_shapes);
-    return generate(compile_params);
+    data_flow_transformations(blocked_input_shapes, input_precisions, output_precisions, data_flow_backend_passes);
+    convert_body_to_linear_ir(factory);
+    return generate_from_linear_ir(backend_passes_pre_common, backend_passes_post_common, compile_params);
 }
 
-snippets::Schedule Subgraph::generate(const BlockedShapeVector& output_shapes,
-                                      const BlockedShapeVector& input_shapes,
-                                      const std::vector<pass::Manager::PositionedPass>& data_flow_passes,
-                                      const lowered::pass::PassPipeline& control_flow_passes_pre_common,
-                                      const lowered::pass::PassPipeline& control_flow_passes_post_common,
-                                      const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory,
-                                      const void* compile_params) {
-    canonicalize(output_shapes, input_shapes);
-    return generate(data_flow_passes, control_flow_passes_pre_common, control_flow_passes_post_common,
-                    shape_infer_factory, compile_params);
-}
-
-snippets::Schedule Subgraph::generate(const void* compile_params) {
-    return generate({}, {}, {}, nullptr, compile_params);
-}
-
-snippets::Schedule Subgraph::generate(const std::vector<pass::Manager::PositionedPass>& data_flow_passes,
-                                      const lowered::pass::PassPipeline& control_flow_passes_pre_common,
-                                      const lowered::pass::PassPipeline& control_flow_passes_post_common,
-                                      const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory,
-                                      const void* compile_params) {
+snippets::Schedule Subgraph::generate_from_linear_ir(const lowered::pass::PassPipeline& backend_passes_pre_common,
+                                                     const lowered::pass::PassPipeline& backend_passes_post_common,
+                                                     const void* compile_params) const {
     INTERNAL_OP_SCOPE(Subgraph);
     OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::generate")
     OPENVINO_ASSERT(m_generator != nullptr, "generate is called while generator is not set");
 
-    data_flow_transformations(data_flow_passes);
-
-    lowered::LinearIR linear_ir = *convert_body_to_linear_ir(shape_infer_factory);
-    control_flow_transformations(linear_ir, control_flow_passes_pre_common, control_flow_passes_post_common);
-
     // actual code emission
-    const auto& lowering_result = m_generator->generate(linear_ir, linear_ir.get_config(), compile_params);
-    const auto ptr = lowering_result.binary_code;
-
+    // Note: some transformations performed in the generator, e.g. tail insertion, can break shape propagation
+    //  until we fix this behavior, we have to make a copy of LIR before giving it to the generator.
+    OPENVINO_ASSERT(m_linear_ir, "Attempt to call generate, when linear IR was not initialized");
+    auto linear_ir = m_linear_ir->deep_copy();
+    LoweringResult lowering_result;
+    control_flow_transformations(linear_ir, lowering_result, backend_passes_pre_common, backend_passes_post_common);
+    m_generator->generate(linear_ir, lowering_result, compile_params);
 
     VectorDims parallel_exec_domain = linear_ir.get_master_shape();
     const size_t loop_depth = linear_ir.get_config().m_loop_depth;
     for (size_t i = 0; i < loop_depth; i++)
         parallel_exec_domain[parallel_exec_domain.size() - 1 - i] = 1;
 
-    return {parallel_exec_domain, ptr};
+    return {parallel_exec_domain, std::move(lowering_result)};
 }
 
 void Subgraph::print() const {
diff --git a/src/common/snippets/src/pass/align_element_types.cpp b/src/common/snippets/src/pass/align_element_types.cpp
new file mode 100644
index 00000000000000..da1ab1cb2c038f
--- /dev/null
+++ b/src/common/snippets/src/pass/align_element_types.cpp
@@ -0,0 +1,106 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/pass/align_element_types.hpp"
+#include "snippets/itt.hpp"
+
+namespace ov {
+namespace snippets {
+
+pass::AlignElementTypes::AlignElementTypes(std::vector<ov::element::Type> input_precisions,
+                                           std::vector<ov::element::Type> output_precisions) :
+                                           m_input_precisions(std::move(input_precisions)),
+                                           m_output_precisions(std::move(output_precisions)) {
+}
+
+bool pass::AlignElementTypes::run_on_model(const std::shared_ptr<ov::Model>& m) {
+    RUN_ON_MODEL_SCOPE(AlignElementTypes);
+    OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::AlignElementTypes")
+    bool is_modified = false;
+    const auto& results = m->get_results();
+    const auto& params = m->get_parameters();
+    OPENVINO_ASSERT(m_input_precisions.size() == params.size() && m_output_precisions.size() == results.size(),
+                    "Number of parameters for snippet doesn't match passed to the Canonicalization pass. ");
+
+    // We should insert Convert before Results to set original output element type if needed
+    for (size_t i = 0; i < m_output_precisions.size(); i++) {
+        const auto needed_out_type = m_output_precisions[i];
+        if (results[i]->get_input_element_type(0) != needed_out_type) {
+            std::shared_ptr<ov::Node> consumer = results[i];
+            auto parent_output = consumer->get_input_source_output(0);
+
+            // Snippets supports Transpose only after Parameter or before Result nodes
+            // So we have to insert Convert before Transpose (if there is) on Subgraph outputs
+            const auto transpose = ov::as_type_ptr<ov::op::v1::Transpose>(parent_output.get_node_shared_ptr());
+            if (transpose) {
+                OPENVINO_ASSERT(parent_output.get_target_inputs().size() == 1,
+                                "If Result has Transpose on input, this Result must be single consumer of the Transpose");
+                parent_output = transpose->get_input_source_output(0);
+                consumer = transpose;
+            }
+
+            const auto convert = std::make_shared<op::ConvertSaturation>(parent_output, needed_out_type);
+            ov::copy_runtime_info(parent_output.get_node_shared_ptr(), convert);
+
+            consumer->set_argument(0, convert);
+            consumer->validate_and_infer_types();
+            if (transpose)
+                results[i]->validate_and_infer_types();
+            is_modified = true;
+        }
+    }
+
+    // We should change existing element type to original for Parameters if needed
+    for (size_t i = 0; i < m_input_precisions.size(); ++i) {
+        const auto needed_in_type = m_input_precisions[i];
+        const auto& parameter = params[i];
+        const auto original_type = parameter->get_element_type();
+        if (original_type != needed_in_type) {
+            parameter->set_element_type(needed_in_type);
+            parameter->validate_and_infer_types();
+
+            auto parent_output = parameter->output(0);
+            auto consumer_inputs = parent_output.get_target_inputs();
+
+            const auto& first_child = consumer_inputs.begin()->get_node()->shared_from_this();
+            // Note: RankNormalization of is designed for shape-inference purposes only.
+            // It does not process any data (nor does it emit any code), so it doesn't require Convert operations
+            if (is_type<op::RankNormalization>(first_child)) {
+                OPENVINO_ASSERT(consumer_inputs.size() == 1, "RankNormalization is supposed to be the only consumer");
+                parent_output = first_child->output(0);
+                consumer_inputs = parent_output.get_target_inputs();
+            }
+
+            // Snippets supports Transpose only after Parameter or before Result nodes
+            // So we have to insert Convert after Transpose (if there is) on Subgraph inputs
+            if (std::any_of(consumer_inputs.cbegin(), consumer_inputs.cend(),
+                            [](const ov::Input<ov::Node>& input) { return ov::is_type<ov::op::v1::Transpose>(input.get_node()); })) {
+                OPENVINO_ASSERT(consumer_inputs.size() == 1,
+                                "If Parameter has Transpose on output, this Transpose must be single consumer of the Parameter");
+                const auto transpose = consumer_inputs.begin()->get_node()->shared_from_this();
+                transpose->validate_and_infer_types();
+
+                parent_output = transpose;
+                consumer_inputs = parent_output.get_target_inputs();
+            }
+
+            const auto& convert = std::make_shared<ov::snippets::op::ConvertSaturation>(parent_output, original_type);
+            ov::copy_runtime_info(parent_output.get_node_shared_ptr(), convert);
+
+            for (const auto input : consumer_inputs) {
+                const auto& input_node = input.get_node();
+                if (input_node == convert.get()) {
+                    continue;
+                }
+                input_node->set_argument(input.get_index(), convert->output(0));
+            }
+
+            is_modified = true;
+        }
+    }
+    return is_modified;
+}
+
+} // namespace snippets
+} // namespace ov
\ No newline at end of file
diff --git a/src/common/snippets/src/pass/broadcast_to_movebroadcast.cpp b/src/common/snippets/src/pass/broadcast_to_movebroadcast.cpp
index 65fbbc162a8ada..cd803b163b5bbf 100644
--- a/src/common/snippets/src/pass/broadcast_to_movebroadcast.cpp
+++ b/src/common/snippets/src/pass/broadcast_to_movebroadcast.cpp
@@ -5,7 +5,7 @@
 #include "snippets/itt.hpp"
 
 #include "snippets/pass/broadcast_to_movebroadcast.hpp"
-#include "snippets/pass/insert_movebroadcast.hpp"
+#include "snippets/op/broadcastmove.hpp"
 #include "openvino/pass/pattern/op/wrap_type.hpp"
 
 #include "openvino/opsets/opset1.hpp"
@@ -30,15 +30,19 @@ ov::snippets::pass::BroadcastToMoveBroadcast::BroadcastToMoveBroadcast() {
 
         const auto target_shape = root->get_output_partial_shape(0);
         const auto value_shape = root->get_input_partial_shape(0);
-        if (target_shape.is_dynamic() || value_shape.is_dynamic()) {
-            return false;
+        OPENVINO_ASSERT(target_shape.is_static() && value_shape.rank().is_static(), "Broadcast with dynamic target shape is not supported in Snippets");
+        // Insert BroadcastMove only if the last dimension needs to be broadcasted. Higher-level dims broadcasting
+        // will be handled by pointer arithmetics. Note that this behavior should be changed in case of full op::Boradcast support.
+        Output<ov::Node> in_value = root->input_value(0);
+        if (*target_shape.rbegin() != *value_shape.rbegin()) {
+            auto broadcasted_shape = value_shape;
+            *broadcasted_shape.rbegin() = *target_shape.rbegin();
+            const auto& broadcast_node = std::make_shared<ov::snippets::op::BroadcastMove>(in_value, broadcasted_shape);
+            in_value = broadcast_node->output(0);
         }
 
-        const auto broadcast_node = ov::snippets::pass::InsertMoveBroadcast::BroadcastNodeLastDim(root->input_value(0),
-                                                                                                      target_shape.get_shape(),
-                                                                                                      value_shape.get_shape());
-        replace_output_update_name(root->output(0), broadcast_node);
-        ov::copy_runtime_info(root, broadcast_node.get_node_shared_ptr());
+        replace_output_update_name(root->output(0), in_value);
+        ov::copy_runtime_info(root, in_value.get_node_shared_ptr());
 
         return true;
     };
diff --git a/src/common/snippets/src/pass/canonicalization.cpp b/src/common/snippets/src/pass/canonicalization.cpp
new file mode 100644
index 00000000000000..23414d2925bf36
--- /dev/null
+++ b/src/common/snippets/src/pass/canonicalization.cpp
@@ -0,0 +1,84 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/pass/canonicalization.hpp"
+#include "snippets/op/rank_normalization.hpp"
+#include "snippets/itt.hpp"
+#include "snippets/utils.hpp"
+#include "snippets/lowered/port_descriptor.hpp"
+
+namespace ov {
+namespace snippets {
+
+pass::Canonicalization::Canonicalization(const BlockedShapeVector& blocked_input_shapes) {
+    m_in_shapes.reserve(blocked_input_shapes.size());
+    m_in_layouts.reserve(blocked_input_shapes.size());
+    for (const auto& bs : blocked_input_shapes) {
+        m_has_dynamic_inputs |= utils::is_dynamic_vdims(bs.first);
+        m_in_shapes.emplace_back(bs.first);
+        m_in_layouts.emplace_back(bs.second);
+        // Note: Blocking (if any) must be accounted for in input shapes
+        OPENVINO_ASSERT(m_in_shapes.back().size() == m_in_layouts.back().size(), "Input shapes and layouts must have the same rank");
+    }
+}
+
+bool pass::Canonicalization::run_on_model(const std::shared_ptr<ov::Model>& m) {
+    RUN_ON_MODEL_SCOPE(Canonicalization);
+    OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::Canonicalization")
+    bool is_modified = false;
+    const ParameterVector& params = m->get_parameters();
+    OPENVINO_ASSERT(m_in_shapes.size() == params.size(),
+                    "Number of parameters for snippet doesn't match passed to the Canonicalization pass. ",
+                    "Expected: ", m_in_shapes.size(), " Got: ", params.size(), ".");
+
+    // Note that shape rank also incorporates layout, so NCHW16c would have shape rank 5
+    auto is_blocked_layout = [](const Layout& l) {
+        return l.size() != std::set<size_t>(l.begin(), l.end()).size();
+    };
+    auto compare_ranks = [](const Layout& l, const Layout& r) {
+        return l.size() < r.size();
+    };
+    // Layout with the max rank
+    const auto& max_rank_it = std::max_element(m_in_layouts.begin(), m_in_layouts.end(), compare_ranks);
+    Layout base_layout = *max_rank_it;
+    size_t max_rank = base_layout.size();
+    const bool base_is_blocked = is_blocked_layout(base_layout);
+
+    for (size_t i = 0; i < m_in_layouts.size(); i++) {
+        const auto& i_layout = m_in_layouts[i];
+        const auto& i_shape = m_in_shapes[i];
+        const auto i_rank = i_layout.size();
+        const bool i_is_blocked = is_blocked_layout(i_layout);
+        // Canonicalization logic briefly:
+        // * If this input is blocked => Reshape corresponding input parameter, so the following transformations
+        //   will work with a shape of a larger rank. In dynamic case, this shape will be updated during shapeInfer()
+        //   call, but the important thing is that the shape rank won't change.
+        // * If some of the input shapes is blocked (=> base_is_blocked), but this input is planar,
+        //   then insert RankNormalization op after this input. This is needed, so all shapes inside the body have
+        //   similar ranks.
+        if (i_is_blocked) {
+            OPENVINO_ASSERT(base_is_blocked && i_rank == max_rank, "If this shape is blocked, base must also be blocked");
+            params[i]->set_partial_shape(snippets::utils::vdims_to_pshape(i_shape));
+            is_modified = true;
+        } else if (i_rank < max_rank) {
+            size_t num_append = base_is_blocked;
+            OPENVINO_ASSERT(max_rank >= i_rank + num_append, "Unsupported blocked shapes combination in canonicalization");
+            size_t num_prepend = max_rank - i_rank - num_append;
+            const auto& out = params[i]->output(0);
+            const auto& target_inputs = out.get_target_inputs();
+            auto rank_norm = std::make_shared<op::RankNormalization>(out, num_prepend, num_append);
+            for (auto& in : target_inputs)
+                in.replace_source_output(rank_norm);
+            is_modified = true;
+        } else {
+            // todo: 4d blocked + 5d planar layouts are not supported: <N, C, H, W, c> + <N, C, D, H, W>
+            OPENVINO_ASSERT(equal(base_layout.begin(), base_layout.end(), i_layout.begin()),
+                            "Canonicalization got input shapes of equal ranks but different layouts, which is not supported");
+        }
+    }
+    return is_modified;
+}
+
+} // namespace snippets
+} // namespace ov
\ No newline at end of file
diff --git a/src/common/snippets/src/pass/convert_constants.cpp b/src/common/snippets/src/pass/convert_constants.cpp
index b5fb81b77dd98a..c374ee010d3446 100644
--- a/src/common/snippets/src/pass/convert_constants.cpp
+++ b/src/common/snippets/src/pass/convert_constants.cpp
@@ -24,8 +24,7 @@ ov::snippets::pass::ConvertConstantsToScalars::ConvertConstantsToScalars() {
         //  Note that all Constants {1,1,1,1} are converted to Scalar {1} here
         //  This is needed to simplify shape inference, otherwise {1,1,1,1} Constants can increase output rank
         //  Also some operations support only scalar shapes, so we need separate scalars and shape [1]
-        const auto shape = constant->get_output_shape(0).size() == 0 ? ov::Shape{} : ov::Shape{1};
-        auto scalar = std::make_shared<snippets::op::Scalar>(ov::op::v0::Constant(*constant, shape));
+        auto scalar = std::make_shared<snippets::op::Scalar>(ov::op::v0::Constant(*constant, ov::Shape{1}));
         scalar->set_friendly_name(constant->get_friendly_name());
         ov::copy_runtime_info(constant, scalar);
         ov::replace_node(constant, scalar);
diff --git a/src/common/snippets/src/pass/hash.cpp b/src/common/snippets/src/pass/hash.cpp
index 2f975ef2cbccee..cea21e37e861cf 100644
--- a/src/common/snippets/src/pass/hash.cpp
+++ b/src/common/snippets/src/pass/hash.cpp
@@ -15,6 +15,7 @@
 #include "openvino/core/model.hpp"
 #include "openvino/op/util/framework_node.hpp"
 #include "openvino/opsets/opset1.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
 #include "transformations/rt_info/primitives_priority_attribute.hpp"
 
 namespace ov {
@@ -180,6 +181,17 @@ class SnippetsHasher : public ov::AttributeVisitor {
                     m_hash = hash_combine(m_hash, data[i]);
                 }
             }
+        } else if (const auto& a =
+                       ov::as_type<ov::AttributeAdapter<std::shared_ptr<ov::AlignedBuffer>>>(&adapter)) {
+            if (name == "value" && m_node_type_name == "Constant") {
+                m_hash = hash_combine(m_hash, AttrType::constant);
+                const int64_t size = a->get()->size();
+                m_hash = hash_combine(hash_combine(m_hash, AttrType::size), size);
+                auto data = static_cast<const char*>(a->get()->get_ptr());
+                for (int64_t i = 0; i < size; i++) {
+                    m_hash = hash_combine(m_hash, data[i]);
+                }
+            }
         } else if (const auto& a = ov::as_type<ov::AttributeAdapter<ov::op::util::FrameworkNodeAttrs>>(&adapter)) {
             const auto& attrs = a->get();
             // Update node attributes in data field
diff --git a/src/common/snippets/src/pass/set_softmax_ports.cpp b/src/common/snippets/src/pass/set_softmax_ports.cpp
index 1651a6d6217495..f8d38d51ec59b5 100644
--- a/src/common/snippets/src/pass/set_softmax_ports.cpp
+++ b/src/common/snippets/src/pass/set_softmax_ports.cpp
@@ -25,11 +25,9 @@ ov::snippets::pass::SetSoftmaxPorts::SetSoftmaxPorts() {
         auto root = m.get_match_root();
 
         const auto& pshape = root->get_input_partial_shape(0);
-        if (pshape.is_dynamic())
-            return false;
 
-        const auto shape = pshape.get_shape();
-        const auto rank = shape.size();
+        OPENVINO_ASSERT(!pshape.rank().is_dynamic(), "SetSoftmaxPorts doesn't support dynamic ranks");
+        const auto rank =  pshape.rank().get_length();
 
         int64_t axis;
         if (const auto softmax_v8 = ov::as_type_ptr<ov::op::v8::Softmax>(root)) {
@@ -44,7 +42,7 @@ ov::snippets::pass::SetSoftmaxPorts::SetSoftmaxPorts() {
 
         OPENVINO_ASSERT(axis < static_cast<int64_t>(rank), "Softmax has incorrect axis");
         std::vector<size_t> subtensor(rank, 1);
-        for (size_t i = axis; i < rank; ++i)
+        for (auto i = axis; i < rank; ++i)
             subtensor[i] = lowered::PortDescriptor::ServiceDimensions::FULL_DIM;
 
         lowered::PortDescriptorUtils::set_port_descriptor_ptr(root->input(0), std::make_shared<lowered::PortDescriptor>(root->input(0), subtensor));
diff --git a/src/common/snippets/src/shape_inference/shape_inference.cpp b/src/common/snippets/src/shape_inference/shape_inference.cpp
index 22470a13d3443f..0b9117d05d0477 100644
--- a/src/common/snippets/src/shape_inference/shape_inference.cpp
+++ b/src/common/snippets/src/shape_inference/shape_inference.cpp
@@ -63,6 +63,7 @@ const IShapeInferSnippetsFactory::TRegistry IShapeInferSnippetsFactory::registry
         SHAPE_INFER_PREDEFINED(ov::op::v0::Result, EmptyShapeInfer),
         //
         SHAPE_INFER_OP_SPECIFIC(op::LoadReshape),
+        SHAPE_INFER_OP_SPECIFIC(op::RankNormalization),
         SHAPE_INFER_OP_SPECIFIC(op::BroadcastLoad),
         SHAPE_INFER_OP_SPECIFIC(op::BroadcastMove),
 };
diff --git a/src/common/snippets/src/utils.cpp b/src/common/snippets/src/utils.cpp
index df894604d11693..242391b908dc03 100644
--- a/src/common/snippets/src/utils.cpp
+++ b/src/common/snippets/src/utils.cpp
@@ -92,7 +92,8 @@ VectorDims pshape_to_vdims(const PartialShape& pshape) {
     result.reserve(pshape.size());
     for (const auto& d : pshape)
         result.push_back(d.is_dynamic() ? IShapeInferSnippets::DYNAMIC_DIMENSION : d.get_length());
-    return result;
+    // Note: PartialShape could be empty which designates scalar value. However, Scalars are represented as {1} in Snippets
+    return result.empty() ? VectorDims {1} : result;
 }
 
 ov::PartialShape vdims_to_pshape(const VectorDims& vdims) {
@@ -132,6 +133,10 @@ VectorDims get_planar_vdims(const snippets::lowered::ExpressionPort& expr_port)
     return get_planar_vdims(expr_port.get_descriptor_ptr());
 }
 
+bool is_dynamic_vdims(const VectorDims& shape) {
+    return std::any_of(shape.cbegin(), shape.cend(), [](size_t v){ return v == IShapeInferSnippets::DYNAMIC_DIMENSION; });
+}
+
 } // namespace utils
 } // namespace snippets
 } // namespace ov
diff --git a/src/common/snippets/tests/include/lowering_utils.hpp b/src/common/snippets/tests/include/lowering_utils.hpp
index a419e6575a5de5..f2c872f725b7d6 100644
--- a/src/common/snippets/tests/include/lowering_utils.hpp
+++ b/src/common/snippets/tests/include/lowering_utils.hpp
@@ -6,6 +6,8 @@
 #include <common_test_utils/ov_test_utils.hpp>
 #include "snippets/op/subgraph.hpp"
 #include "snippets_helpers.hpp"
+#include "snippets/pass_manager.hpp"
+#include "snippets/shape_inference/shape_inference.hpp"
 
 namespace ov {
 namespace test {
@@ -23,11 +25,17 @@ class DummyEmitter : public ov::snippets::Emitter {
     void emit_data() const override {}
 };
 
+struct DummyCompiledSnippet : public ov::snippets::CompiledSnippet {
+    const uint8_t* get_code() const override { return nullptr; }
+    size_t get_code_size() const override { return 0; }
+    bool empty() const override { return true; }
+};
+
 class DummyTargetMachine : public ov::snippets::TargetMachine {
 public:
     DummyTargetMachine(const std::vector<ov::Node::type_info_t>& custom_opset = {});
     bool is_supported() const override { return true; }
-    ov::snippets::code get_snippet() const override { return nullptr; }
+    ov::snippets::CompiledSnippetPtr get_snippet() override { return std::make_shared<DummyCompiledSnippet>(); }
     size_t get_lanes() const override { return 10; }
 };
 
@@ -35,6 +43,7 @@ class DummyGenerator : public ov::snippets::Generator {
 public:
     DummyGenerator() : ov::snippets::Generator(std::make_shared<DummyTargetMachine>()) {}
     DummyGenerator(const std::shared_ptr<ov::snippets::TargetMachine>& t) : ov::snippets::Generator(t) {}
+    std::shared_ptr<Generator> clone() const override { return std::make_shared<DummyGenerator>(target); }
 
 protected:
     opRegType get_specific_op_reg_type(const std::shared_ptr<ov::Node>& op) const override { return vec2vec; };
@@ -48,13 +57,15 @@ class LoweringTests : public TransformationTestsF {
     void TearDown() override;
 
     static std::shared_ptr<ov::snippets::op::Subgraph> getSubgraph(const std::shared_ptr<Model>& f);
+    using IShapeInferSnippetsFactory = ov::snippets::IShapeInferSnippetsFactory;
     static std::shared_ptr<ov::snippets::op::Subgraph>
             getLoweredSubgraph(const std::shared_ptr<Model>& f,
                                const ov::PartialShape& master_shape,
                                const std::vector<ov::snippets::pass::Manager::PositionedPass>& backend_passes = {},
                                const ov::snippets::lowered::pass::PassPipeline& lowered_pre_common = {},
                                const ov::snippets::lowered::pass::PassPipeline& lowered_post_common = {},
-                               const std::shared_ptr<ov::snippets::Generator>& generator = nullptr);
+                               const std::shared_ptr<ov::snippets::Generator>& generator = nullptr,
+                               const std::shared_ptr<IShapeInferSnippetsFactory>& factory = std::make_shared<IShapeInferSnippetsFactory>());
     static std::shared_ptr<ov::snippets::op::Subgraph> getTokenizedSubgraph(const std::shared_ptr<Model>& f);
 
 protected:
diff --git a/src/common/snippets/tests/include/pass/canonicalization.hpp b/src/common/snippets/tests/include/pass/canonicalization.hpp
index 88f6ebc0336a80..bcb27d08cb1395 100644
--- a/src/common/snippets/tests/include/pass/canonicalization.hpp
+++ b/src/common/snippets/tests/include/pass/canonicalization.hpp
@@ -5,36 +5,25 @@
 #pragma once
 
 #include "lowering_utils.hpp"
+#include "snippets/op/subgraph.hpp"
 #include "snippets_helpers.hpp"
+#include "snippets/shape_types.hpp"
+#include "snippets/pass/canonicalization.hpp"
 
 namespace ov {
 namespace test {
 namespace snippets {
 
-using BlockedShape = ov::snippets::op::Subgraph::BlockedShape;
-using BlockedShapeVector = ov::snippets::op::Subgraph::BlockedShapeVector;
-
-// todo: implement tests with 3 inputs and two outputs (aka SnippetsCanonicalizationParams3Inputs)
-// Note that the expected output shape isn't necessary equal to one of the output blocked_shapes.
-// For example, consider the following graph: (1, 2, 2, 1, 8) + (1, 2, 1, 1, 8) + (1, 2, 1, 5, 8) => (1, 2, 2, 1, 8) + (1, 2, 1, 5, 8).
-typedef std::tuple<
-        std::tuple<Shape, BlockedShape>, // Shape & BlockedShape for input 0
-        std::tuple<Shape, BlockedShape>, // Shape & BlockedShape for input 0
-        BlockedShape, // BlockedShape output shape passed to canonicalize()
-        Shape // expected output Shape
-> canonicalizationParams;
-
-
-class CanonicalizationTests : public LoweringTests, public testing::WithParamInterface<canonicalizationParams> {
+class CanonicalizationTests : public TransformationTestsF {
 public:
-    static std::string getTestCaseName(testing::TestParamInfo<canonicalizationParams> obj);
+    using VectorDims = ov::snippets::VectorDims;
+    using Layout = std::vector<size_t>;
+    virtual void run();
 
 protected:
-    void SetUp() override;
-    std::shared_ptr<SnippetsFunctionBase> snippets_model;
-    Shape expected_output_shape;
-    BlockedShapeVector input_blocked_shapes;
-    BlockedShapeVector output_blocked_shapes;
+    std::vector<VectorDims> m_input_shapes;
+    std::vector<Layout> m_input_layouts;
+    void prepare_functions(const std::vector<VectorDims>& shapes);
 };
 
 }  // namespace snippets
diff --git a/src/common/snippets/tests/src/lowering_utils.cpp b/src/common/snippets/tests/src/lowering_utils.cpp
index 83207244ac031f..5d49d38a6af2e7 100644
--- a/src/common/snippets/tests/src/lowering_utils.cpp
+++ b/src/common/snippets/tests/src/lowering_utils.cpp
@@ -106,13 +106,13 @@ std::shared_ptr<ov::snippets::op::Subgraph>
                                           const std::vector<ov::snippets::pass::Manager::PositionedPass>& backend_passes,
                                           const ov::snippets::lowered::pass::PassPipeline& lowered_pre_common,
                                           const ov::snippets::lowered::pass::PassPipeline& lowered_post_common,
-                                          const std::shared_ptr<ov::snippets::Generator>& generator) {
+                                          const std::shared_ptr<ov::snippets::Generator>& generator,
+                                          const std::shared_ptr<IShapeInferSnippetsFactory>& factory) {
     auto subgraph = getTokenizedSubgraph(f);
     subgraph->set_generator(generator == nullptr ? std::make_shared<DummyGenerator>() : generator);
-    subgraph->set_master_shape(master_shape);
     subgraph->set_tile_rank(2);
     // Note: lowered_pipeline would have no effect on subgraph body, since it's applied on linear IR
-    subgraph->generate(backend_passes, lowered_pre_common, lowered_post_common);
+    subgraph->generate({}, {}, {}, backend_passes, lowered_pre_common, lowered_post_common, factory);
     return subgraph;
 }
 
diff --git a/src/common/snippets/tests/src/pass/canonicalization.cpp b/src/common/snippets/tests/src/pass/canonicalization.cpp
index 4981b4f8d8e139..a9311b67598263 100644
--- a/src/common/snippets/tests/src/pass/canonicalization.cpp
+++ b/src/common/snippets/tests/src/pass/canonicalization.cpp
@@ -5,101 +5,84 @@
 #include <gtest/gtest.h>
 #include "pass/canonicalization.hpp"
 #include "common_test_utils/common_utils.hpp"
-#include <subgraph_lowered.hpp>
+#include "snippets/pass/canonicalization.hpp"
+#include "snippets/op/rank_normalization.hpp"
+#include <subgraph_simple.hpp>
 
 namespace ov {
 namespace test {
 namespace snippets {
-using ov::snippets::op::Subgraph;
-
-class SKIP_CanonicalizationTests : public CanonicalizationTests {
-public:
-    void SetUp() override {
-        GTEST_SKIP();
-    }
-    void TearDown() override{};
-};
-
-std::string CanonicalizationTests::getTestCaseName(testing::TestParamInfo<canonicalizationParams> obj) {
-    std::vector<std::tuple<Shape, Subgraph::BlockedShape>> inputs(2);
-    Subgraph::BlockedShape output;
-    Shape expectedOutput;
-    std::tie(inputs[0], inputs[1], output, expectedOutput) = obj.param;
-    std::ostringstream result;
-    for (size_t i = 0; i < inputs.size(); i++) {
-        const auto& blockedshape = std::get<1>(inputs[i]);
-        // input shape
-        result << "IS[" << i << "]=" << ov::test::utils::vec2str(std::get<0>(inputs[i])) << "_";
-        // input blocked shape
-        result << "IBS[" << i << "]=" << ov::test::utils::partialShape2str({std::get<0>(blockedshape)}) << "_";
-        // input blocked order
-        result << "IBO[" << i << "]=" << ov::test::utils::vec2str(std::get<1>(blockedshape)) << "_";
-    }
-    // output blocked shape
-    result << "OBS[0]=" << ov::test::utils::partialShape2str({std::get<0>(output)}) << "_";
-    // output blocked order
-    result << "OBO[0]=" << ov::test::utils::vec2str(std::get<1>(output)) << "_";
-    result << "ExpOS[0]=" << ov::test::utils::vec2str(expectedOutput) << "_";
-    return result.str();
+namespace {
+void normalizeParameter(const std::shared_ptr<ov::opset1::Parameter>& par, size_t num_prepend, size_t num_append) {
+    auto target_inputs = par->get_output_target_inputs(0);
+    auto rank_norm = std::make_shared<ov::snippets::op::RankNormalization>(par,
+                                                                           num_prepend,
+                                                                           num_append);
+    for (auto& t : target_inputs)
+        t.replace_source_output(rank_norm);
 }
+} // namespace
 
-void CanonicalizationTests::SetUp() {
-    TransformationTestsF::SetUp();
-    std::vector<std::tuple<Shape, Subgraph::BlockedShape>> inputs(2);
-    output_blocked_shapes.resize(1);
-    std::tie(inputs[0], inputs[1], output_blocked_shapes[0], expected_output_shape) = this->GetParam();
+void CanonicalizationTests::prepare_functions(const std::vector<VectorDims>& shapes) {
+    std::vector<PartialShape> pshapes;
+    pshapes.reserve(shapes.size());
+    for (const auto& v : shapes )
+        pshapes.emplace_back(v);
+    const auto &f = AddFunction(pshapes);
+    model = f.getOriginal();
+    model_ref = model->clone();
+}
 
-    input_blocked_shapes = {std::get<1>(inputs[0]), std::get<1>(inputs[1])};
-    snippets_model = std::make_shared<AddFunction>(std::vector<PartialShape>{std::get<0>(inputs[0]), std::get<0>(inputs[1])});
+void CanonicalizationTests::run() {
+    ASSERT_TRUE(model);
+    ASSERT_EQ(m_input_shapes.size(), m_input_layouts.size());
+    BlockedShapeVector blocked_input_shapes;
+    blocked_input_shapes.reserve(m_input_shapes.size());
+    for (size_t i = 0; i < m_input_shapes.size(); i++)
+        blocked_input_shapes.emplace_back(m_input_shapes[i], m_input_layouts[i]);
+    manager.register_pass<ov::snippets::pass::Canonicalization>(blocked_input_shapes);
+    disable_rt_info_check();
 }
 
-TEST_P(CanonicalizationTests, Add) {
-    model = snippets_model->getOriginal();
-    model_ref = snippets_model->getReference();
-    auto subgraph =  getTokenizedSubgraph(model);
-    subgraph->set_generator(std::make_shared<DummyGenerator>());
-    auto canonical_output_shape = subgraph->canonicalize(output_blocked_shapes, input_blocked_shapes);
-    ASSERT_TRUE(canonical_output_shape.is_static());
-    ASSERT_DIMS_EQ(canonical_output_shape.get_shape(), expected_output_shape);
+TEST_F(CanonicalizationTests, smoke_Snippets_Canonicalization_0) {
+    m_input_shapes = {{2, 3, 10, 64}, {2, 3, 10, 64}};
+    m_input_layouts = {{0, 1, 2, 3}, {0, 1, 2, 3}};
+    prepare_functions(m_input_shapes);
+    run();
 }
 
 namespace CanonicalizationTestsInstantiation {
-using ov::snippets::op::Subgraph;
-std::vector<Shape> input_shapes;
-Shape expected_output_shape;
-
-using ov::Shape;
-ov::element::Type_t prec = ov::element::f32;
-std::tuple<Shape, Subgraph::BlockedShape> blockedInput0{{1, 64, 2, 5},
-                                                        {{1, 4, 2, 5, 16}, {0, 1, 2, 3, 1}, prec}};
-Subgraph::BlockedShape output{{1, 4, 2, 5, 16}, {0, 1, 2, 3, 1}, prec};
-Shape canonical_shape{1, 4, 2, 5, 16};
-
-std::vector<std::tuple<Shape, Subgraph::BlockedShape>> blockedInput1{{{1, 1,  2, 5}, {{1, 1, 2, 5, 1},  {0, 1, 2, 3, 1}, prec}},
-                                                                     {{1, 1,  2, 1}, {{1, 1, 2, 1, 1},  {0, 1, 2, 3, 1}, prec}},
-                                                                     {{1, 64, 1, 1}, {{1, 4, 1, 1, 16}, {0, 1, 2, 3, 1}, prec}}};
+TEST_F(CanonicalizationTests, smoke_Snippets_Canonicalization_1) {
+    m_input_shapes = {{2,  3, 10, 64},
+                      {10, 64}};
+    m_input_layouts = {{0, 1, 2, 3},
+                       {0, 1}};
+    prepare_functions(m_input_shapes);
+    normalizeParameter(model_ref->get_parameters()[1], 2, 0);
+    run();
+}
 
-INSTANTIATE_TEST_SUITE_P(smoke_Snippets_BroadcastBlocked,
-                         SKIP_CanonicalizationTests /* CVS-114607 */,
-                         ::testing::Combine(::testing::Values(blockedInput0),
-                                            ::testing::ValuesIn(blockedInput1),
-                                            ::testing::Values(output),
-                                            ::testing::Values(canonical_shape)),
-                         CanonicalizationTests::getTestCaseName);
+TEST_F(CanonicalizationTests, smoke_Snippets_Canonicalization_2) {
+    m_input_shapes = {{2, 3,  10, 64, 16},
+                      {1, 10, 64}};
+    m_input_layouts = {{0, 1, 2, 3, 1},
+                       {0, 1, 2}};
+    prepare_functions({{2, 48, 10, 64},
+                       {1, 10, 64}});
+    const auto& params = model_ref->get_parameters();
+    // Note: We can't create functions with mismatching input shapes,
+    // so we have to set Parameter shapes after the functions were created
+    // This reproduces Snippets pipeline well, since blocked shapes are set after the tokenization
+    params[0]->set_partial_shape(PartialShape(m_input_shapes[0]));
+    model->get_parameters()[0]->set_partial_shape(PartialShape(m_input_shapes[0]));
 
-std::vector<std::tuple<Shape, Subgraph::BlockedShape>> planarInput1{{{1, 1, 2, 5}, {{1, 2, 5}, {0, 1, 2}, prec}},
-                                                                    {{1, 1, 2, 5}, {{2, 5},    {0, 1},    prec}},
-                                                                    {{1, 2, 5},    {{2, 5},    {0, 1},    prec}},
-                                                                    {{2, 5},       {{2, 5},    {0, 1},    prec}},
-                                                                    {{5},          {{5},       {0},       prec}}};
+    normalizeParameter(params[1], 1, 1);
+    // need to trigger validate..(...) manually to propagate new blocked shapes,
+    // this is correct since RankNormalization ops re-enables shape propagation for blocked shapes
+    model_ref->validate_nodes_and_infer_types();
+    run();
+}
 
-INSTANTIATE_TEST_SUITE_P(smoke_Snippets_BroadcastPlanar,
-                         SKIP_CanonicalizationTests /* CVS-114607 */,
-                         ::testing::Combine(::testing::Values(blockedInput0),
-                                            ::testing::ValuesIn(planarInput1),
-                                            ::testing::Values(output),
-                                            ::testing::Values(canonical_shape)),
-                         CanonicalizationTests::getTestCaseName);
 } // namespace CanonicalizationTestsInstantiation
 }  // namespace snippets
 }  // namespace test
diff --git a/src/common/transformations/include/ov_ops/nms_ie_internal.hpp b/src/common/transformations/include/ov_ops/nms_ie_internal.hpp
index 797b89add6d4bb..75f4fa6e1b91fd 100644
--- a/src/common/transformations/include/ov_ops/nms_ie_internal.hpp
+++ b/src/common/transformations/include/ov_ops/nms_ie_internal.hpp
@@ -21,6 +21,10 @@ class TRANSFORMATIONS_API NonMaxSuppressionIEInternal : public Op {
 
     NonMaxSuppressionIEInternal() = default;
 
+    static constexpr int Rotation_None = 0;
+    static constexpr int Rotation_Clockwise = 1;
+    static constexpr int Rotation_Counterclockwise = 2;
+
     NonMaxSuppressionIEInternal(const Output<Node>& boxes,
                                 const Output<Node>& scores,
                                 const Output<Node>& max_output_boxes_per_class,
@@ -29,7 +33,8 @@ class TRANSFORMATIONS_API NonMaxSuppressionIEInternal : public Op {
                                 int center_point_box,
                                 bool sort_result_descending,
                                 const element::Type& output_type = element::i64,
-                                const element::Type& score_output_type = element::f32);
+                                const element::Type& score_output_type = element::f32,
+                                const int rotation = Rotation_None);
 
     NonMaxSuppressionIEInternal(const Output<Node>& boxes,
                                 const Output<Node>& scores,
@@ -40,7 +45,8 @@ class TRANSFORMATIONS_API NonMaxSuppressionIEInternal : public Op {
                                 int center_point_box,
                                 bool sort_result_descending,
                                 const element::Type& output_type = element::i64,
-                                const element::Type& score_output_type = element::f32);
+                                const element::Type& score_output_type = element::f32,
+                                const int rotation = Rotation_None);
 
     void validate_and_infer_types() override;
 
@@ -52,6 +58,7 @@ class TRANSFORMATIONS_API NonMaxSuppressionIEInternal : public Op {
     bool m_sort_result_descending = true;
     element::Type m_output_type;
     element::Type m_scores_output_type;
+    int m_rotation{Rotation_None};
 
 private:
     int64_t max_boxes_output_from_input() const;
diff --git a/src/common/transformations/include/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp b/src/common/transformations/include/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp
new file mode 100644
index 00000000000000..0b8d31b404090e
--- /dev/null
+++ b/src/common/transformations/include/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/graph_rewrite.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+
+class TRANSFORMATIONS_API ConvertU4WeightsZeroPointToScalar;
+
+}  // namespace pass
+}  // namespace ov
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief Converts U4 weights zero point to scalar if all values are equal
+ */
+class ov::pass::ConvertU4WeightsZeroPointToScalar : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ConvertU4WeightsZeroPointToScalar", "0");
+    ConvertU4WeightsZeroPointToScalar();
+};
diff --git a/src/common/transformations/include/transformations/common_optimizations/gelu_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/gelu_fusion.hpp
index 61722260bb5a57..71b5fcafc9fe75 100644
--- a/src/common/transformations/include/transformations/common_optimizations/gelu_fusion.hpp
+++ b/src/common/transformations/include/transformations/common_optimizations/gelu_fusion.hpp
@@ -19,6 +19,7 @@ class TRANSFORMATIONS_API GeluFusionWithErfTwo;
 class TRANSFORMATIONS_API GeluFusionWithErfThree;
 class TRANSFORMATIONS_API GeluFusionWithErfFour;
 class TRANSFORMATIONS_API GeluFusionWithTanh;
+class TRANSFORMATIONS_API GeluFusionWithTanhNoPower;
 
 }  // namespace pass
 }  // namespace ov
@@ -78,6 +79,17 @@ class ov::pass::GeluFusionWithTanh : public ov::pass::MatcherPass {
     GeluFusionWithTanh();
 };
 
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief GeluFusion transformation replaces a sub-graph
+ * x * 0.5 * (1 + tanh((x * 0.044715 * x + 1) * x * sqrt(2 / pi))) with a Gelu (Tanh) op.
+ */
+class ov::pass::GeluFusionWithTanhNoPower : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("GeluFusionWithTanhNoPower", "0");
+    GeluFusionWithTanhNoPower();
+};
+
 /**
  * @ingroup ie_transformation_common_api
  * @brief GeluFusion transformation replaces various sub-graphs with a Gelu op.
@@ -91,5 +103,6 @@ class ov::pass::GeluFusion : public ov::pass::GraphRewrite {
         add_matcher<ov::pass::GeluFusionWithErfThree>();
         add_matcher<ov::pass::GeluFusionWithErfFour>();
         add_matcher<ov::pass::GeluFusionWithTanh>();
+        add_matcher<ov::pass::GeluFusionWithTanhNoPower>();
     }
 };
diff --git a/src/common/transformations/include/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp b/src/common/transformations/include/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp
new file mode 100644
index 00000000000000..5eb3b285365f92
--- /dev/null
+++ b/src/common/transformations/include/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "openvino/pass/graph_rewrite.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+
+class TRANSFORMATIONS_API ConvertNMSRotatedToNMSIEInternal;
+
+}  // namespace pass
+}  // namespace ov
+
+class ov::pass::ConvertNMSRotatedToNMSIEInternal : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ConvertNMSRotatedToNMSIEInternal", "0");
+    ConvertNMSRotatedToNMSIEInternal();
+};
diff --git a/src/common/transformations/src/ov_ops/nms_ie_internal.cpp b/src/common/transformations/src/ov_ops/nms_ie_internal.cpp
index c305304dbf7238..e879224dd935c7 100644
--- a/src/common/transformations/src/ov_ops/nms_ie_internal.cpp
+++ b/src/common/transformations/src/ov_ops/nms_ie_internal.cpp
@@ -20,12 +20,14 @@ op::internal::NonMaxSuppressionIEInternal::NonMaxSuppressionIEInternal(const Out
                                                                        int center_point_box,
                                                                        bool sort_result_descending,
                                                                        const ov::element::Type& output_type,
-                                                                       const ov::element::Type& score_output_type)
+                                                                       const ov::element::Type& score_output_type,
+                                                                       const int rotation)
     : Op({boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold}),
       m_center_point_box(center_point_box),
       m_sort_result_descending(sort_result_descending),
       m_output_type(output_type),
-      m_scores_output_type(score_output_type) {
+      m_scores_output_type(score_output_type),
+      m_rotation(rotation) {
     constructor_validate_and_infer_types();
 }
 
@@ -38,12 +40,14 @@ op::internal::NonMaxSuppressionIEInternal::NonMaxSuppressionIEInternal(const Out
                                                                        int center_point_box,
                                                                        bool sort_result_descending,
                                                                        const ov::element::Type& output_type,
-                                                                       const ov::element::Type& score_output_type)
+                                                                       const ov::element::Type& score_output_type,
+                                                                       const int rotation)
     : Op({boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, soft_nms_sigma}),
       m_center_point_box(center_point_box),
       m_sort_result_descending(sort_result_descending),
       m_output_type(output_type),
-      m_scores_output_type(score_output_type) {
+      m_scores_output_type(score_output_type),
+      m_rotation{rotation} {
     constructor_validate_and_infer_types();
 }
 
@@ -59,7 +63,9 @@ std::shared_ptr<Node> op::internal::NonMaxSuppressionIEInternal::clone_with_new_
                                                         new_args.at(5),
                                                         m_center_point_box,
                                                         m_sort_result_descending,
-                                                        m_output_type);
+                                                        m_output_type,
+                                                        m_scores_output_type,
+                                                        m_rotation);
     } else if (new_args.size() == 5) {
         return make_shared<NonMaxSuppressionIEInternal>(new_args.at(0),
                                                         new_args.at(1),
@@ -68,7 +74,9 @@ std::shared_ptr<Node> op::internal::NonMaxSuppressionIEInternal::clone_with_new_
                                                         new_args.at(4),
                                                         m_center_point_box,
                                                         m_sort_result_descending,
-                                                        m_output_type);
+                                                        m_output_type,
+                                                        m_scores_output_type,
+                                                        m_rotation);
     }
     OPENVINO_THROW("Unsupported number of inputs: " + std::to_string(new_args.size()));
 }
@@ -79,6 +87,7 @@ bool op::internal::NonMaxSuppressionIEInternal::visit_attributes(AttributeVisito
     visitor.on_attribute("sort_result_descending", m_sort_result_descending);
     visitor.on_attribute("output_type", m_output_type);
     visitor.on_attribute("score_output_type", m_scores_output_type);
+    visitor.on_attribute("rotation", m_rotation);
     return true;
 }
 
diff --git a/src/common/transformations/src/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp b/src/common/transformations/src/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp
new file mode 100644
index 00000000000000..6313db127ac406
--- /dev/null
+++ b/src/common/transformations/src/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp
@@ -0,0 +1,80 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp"
+
+#include "itt.hpp"
+#include "openvino/core/rt_info.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/subtract.hpp"
+#include "openvino/pass/pattern/op/or.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "openvino/reference/autobroadcast_binop.hpp"
+#include "transformations/utils/utils.hpp"
+
+ov::pass::ConvertU4WeightsZeroPointToScalar::ConvertU4WeightsZeroPointToScalar() {
+    MATCHER_SCOPE(ConvertU4WeightsZeroPointToScalar);
+    auto weights_m = pattern::wrap_type<ov::op::v0::Constant>(pattern::type_matches(ov::element::u4));
+    auto convert_m = pattern::wrap_type<ov::op::v0::Convert>({weights_m}, pattern::consumers_count(1));
+
+    auto float_zp_predicate = [](ov::Output<ov::Node> output) -> bool {
+        return pattern::type_matches_any({ov::element::f32, ov::element::f16})(output) &&
+               pattern::consumers_count(1)(output);
+    };
+    auto float_zero_point_m = pattern::wrap_type<ov::op::v0::Constant>(float_zp_predicate);
+
+    auto u4_zp_predicate = [](ov::Output<ov::Node> output) -> bool {
+        return pattern::type_matches(ov::element::u4)(output) && pattern::consumers_count(1)(output);
+    };
+    auto u4_zero_point_m = pattern::wrap_type<ov::op::v0::Constant>(u4_zp_predicate);
+    auto zero_point_convert_m = pattern::wrap_type<ov::op::v0::Convert>({u4_zero_point_m}, float_zp_predicate);
+
+    auto zero_point_m = std::make_shared<pattern::op::Or>(OutputVector{float_zero_point_m, zero_point_convert_m});
+    auto subtract_m = pattern::wrap_type<ov::op::v1::Subtract>({convert_m, zero_point_m});
+
+    ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
+        auto& pattern_map = m.get_pattern_value_map();
+        auto weights = ov::as_type_ptr<ov::op::v0::Constant>(pattern_map.at(weights_m).get_node_shared_ptr());
+        std::shared_ptr<ov::op::v0::Constant> zero_point;
+        if (pattern_map.count(float_zero_point_m)) {
+            const auto& float_zp = pattern_map.at(float_zero_point_m);
+            zero_point = ov::as_type_ptr<ov::op::v0::Constant>(float_zp.get_node_shared_ptr());
+        } else {
+            const auto& u4_zp = pattern_map.at(u4_zero_point_m);
+            zero_point = ov::as_type_ptr<ov::op::v0::Constant>(u4_zp.get_node_shared_ptr());
+        }
+        if (!weights || !zero_point)
+            return false;
+        // Due to the matcher specific and Subtract branches similarity,
+        // weights and zero_point might be mixed up with each other
+        if (ov::shape_size(weights->get_shape()) < ov::shape_size(zero_point->get_shape()))
+            std::swap(zero_point, weights);
+
+        auto zero_point_shape = zero_point->get_shape();
+        if (ov::shape_size(zero_point_shape) == 1)
+            return false;
+
+        const auto& weights_shape = weights->get_shape();
+        const size_t weights_rank = weights_shape.size();
+        const size_t zero_point_rank = zero_point_shape.size();
+        // Zero point constant can be converted into scalar only if this does not affect Subtract output shape
+        if (weights_rank < zero_point_rank)
+            return false;
+
+        zero_point_shape.insert(zero_point_shape.begin(), weights_rank - zero_point_rank, 1);
+        for (size_t i = 0; i < weights_rank; ++i) {
+            if (zero_point_shape[i] > weights_shape[i])
+                return false;
+        }
+
+        float zp_value;
+        if (!ov::op::util::get_single_value(zero_point, zp_value))
+            return false;
+        const auto new_zp = ov::op::v0::Constant::create(zero_point->get_element_type(), {}, {zp_value});
+        return ov::replace_node_update_name(zero_point, new_zp);
+    };
+
+    auto m = std::make_shared<ov::pass::pattern::Matcher>(subtract_m, matcher_name);
+    register_matcher(m, callback);
+}
diff --git a/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp
index 90b718f8067cae..7f7915f7965774 100644
--- a/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp
@@ -8,6 +8,7 @@
 
 #include <math.h>
 
+#include <cmath>
 #include <memory>
 
 #include "itt.hpp"
@@ -16,8 +17,8 @@
 #include "openvino/op/constant.hpp"
 #include "openvino/op/divide.hpp"
 #include "openvino/op/erf.hpp"
-#include "openvino/op/gelu.hpp"
 #include "openvino/op/multiply.hpp"
+#include "openvino/op/parameter.hpp"
 #include "openvino/op/power.hpp"
 #include "openvino/op/tanh.hpp"
 #include "openvino/pass/pattern/op/wrap_type.hpp"
@@ -302,11 +303,10 @@ ov::pass::GeluFusionWithTanh::GeluFusionWithTanh() {
             return false;
         }
 
-        constexpr float pi = 3.141592653589793238462643383279502884f;
         bool valid_constant_values =
             op::util::has_constant_value<float>(pow_constant_value, 3.0f) &&
             op::util::has_constant_value<float>(mul_0_constant_value, 0.044715f, 0.001f) &&
-            op::util::has_constant_value<float>(mul_1_constant_value, std::sqrt(2.0f / pi), 0.01f) &&
+            op::util::has_constant_value<double>(mul_1_constant_value, std::sqrt(2.0 / M_PI), 0.01) &&
             op::util::has_constant_value<float>(mul_2_constant_value, 0.5f) &&
             op::util::has_constant_value<float>(add_1_constant_value, 1.0f);
 
@@ -336,3 +336,76 @@ ov::pass::GeluFusionWithTanh::GeluFusionWithTanh() {
     auto m = std::make_shared<ov::pass::pattern::Matcher>(mul_3, matcher_name);
     register_matcher(m, callback);
 }
+
+ov::pass::GeluFusionWithTanhNoPower::GeluFusionWithTanhNoPower() {
+    // Replaces a sub-graph with a Gelu (ov::op::v0::Tanh) op
+    // x * 0.5 * (1 + tanh((x * 0.044715 * x + 1) * x * sqrt(2 / pi)))
+    MATCHER_SCOPE(GeluFusionWithTanhNoPower);
+    auto input = pattern::any_input();
+
+    auto const1 = pattern::wrap_type<ov::op::v0::Constant>();
+    auto mul1 = pattern::wrap_type<ov::op::v1::Multiply>({input, const1});
+
+    auto mul2 = pattern::wrap_type<ov::op::v1::Multiply>({mul1, input});
+
+    auto const2 = pattern::wrap_type<ov::op::v0::Constant>();
+    auto add1 = pattern::wrap_type<ov::op::v1::Add>({const2, mul2});
+
+    auto const3 = pattern::wrap_type<ov::op::v0::Constant>();
+    auto mul3 = pattern::wrap_type<ov::op::v1::Multiply>({input, const3});
+
+    auto mul4 = pattern::wrap_type<ov::op::v1::Multiply>({add1, mul3});
+
+    auto tanh = pattern::wrap_type<ov::op::v0::Tanh>({mul4});
+
+    auto const4 = pattern::wrap_type<ov::op::v0::Constant>();
+    auto add2 = pattern::wrap_type<ov::op::v1::Add>({tanh, const4});
+
+    auto const5 = pattern::wrap_type<ov::op::v0::Constant>();
+    auto mul5 = pattern::wrap_type<ov::op::v1::Multiply>({input, const5});
+
+    auto mul6 = pattern::wrap_type<ov::op::v1::Multiply>({add2, mul5});
+
+    matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        auto& pattern_to_output = m.get_pattern_value_map();
+        auto x_output = pattern_to_output.at(input);
+
+        auto const1_value = pattern_to_output.at(const1).get_node_shared_ptr();
+        auto const2_value = pattern_to_output.at(const2).get_node_shared_ptr();
+        auto const3_value = pattern_to_output.at(const3).get_node_shared_ptr();
+        auto const4_value = pattern_to_output.at(const4).get_node_shared_ptr();
+        auto const5_value = pattern_to_output.at(const5).get_node_shared_ptr();
+
+        bool valid_constant_values = op::util::has_constant_value<float>(const1_value, 0.044715f, 0.001f) &&
+                                     op::util::has_constant_value<float>(const2_value, 1.0f) &&
+                                     op::util::has_constant_value<double>(const3_value, std::sqrt(2.0 / M_PI), 0.01) &&
+                                     op::util::has_constant_value<float>(const4_value, 1.0f) &&
+                                     op::util::has_constant_value<float>(const5_value, 0.5f);
+
+        if (!valid_constant_values) {
+            return false;
+        }
+
+        auto gelu = std::make_shared<ov::op::v7::Gelu>(x_output, op::GeluApproximationMode::TANH);
+
+        gelu->set_friendly_name(m.get_match_root()->get_friendly_name());
+        ov::copy_runtime_info(
+            {
+                pattern_to_output.at(mul1).get_node_shared_ptr(),
+                pattern_to_output.at(mul2).get_node_shared_ptr(),
+                pattern_to_output.at(add1).get_node_shared_ptr(),
+                pattern_to_output.at(mul3).get_node_shared_ptr(),
+                pattern_to_output.at(mul4).get_node_shared_ptr(),
+                pattern_to_output.at(tanh).get_node_shared_ptr(),
+                pattern_to_output.at(add2).get_node_shared_ptr(),
+                pattern_to_output.at(mul5).get_node_shared_ptr(),
+                pattern_to_output.at(mul6).get_node_shared_ptr(),
+            },
+            gelu);
+        ov::replace_node(m.get_match_root(), gelu);
+        return true;
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(mul6, matcher_name);
+    this->register_matcher(m, callback);
+}
diff --git a/src/common/transformations/src/transformations/common_optimizations/gru_cell_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/gru_cell_fusion.cpp
index e5eae04c640553..5b3aaec614ff17 100644
--- a/src/common/transformations/src/transformations/common_optimizations/gru_cell_fusion.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/gru_cell_fusion.cpp
@@ -148,6 +148,15 @@ ov::pass::GRUCellFusion::GRUCellFusion() {
             Bh = rg.make<ov::op::v0::Constant>(WRh.get_element_type(), Shape{1, static_cast<size_t>(hidden_size)}, 0);
         }
 
+        // perform additional check for applicability of the transformation
+        // without this check, process_weights can fail
+        if (WR.get_partial_shape()[1] != (hidden_size + input_size)) {
+            return false;
+        }
+        if (WRh.get_partial_shape()[1] != (hidden_size + input_size)) {
+            return false;
+        }
+
         Output<Node> Wzrh, Rzrh, Bzrh;
         if (cnt_of_consumers_of_zero_out == 1 && cnt_of_consumers_of_first_out == 2) {
             tie(Wzrh, Rzrh) = process_weights(rg, false, WR, WRh, input_size, hidden_size, axis_0, axis_1);
diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
index 068e1f27a291e9..9a3446f2386161 100644
--- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
@@ -21,6 +21,7 @@
 #include "transformations/common_optimizations/conv_to_binary_conv.hpp"
 #include "transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp"
 #include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
+#include "transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp"
 #include "transformations/common_optimizations/convolution_to_group_convolution_fusion.hpp"
 #include "transformations/common_optimizations/depth_to_space_fusion.hpp"
 #include "transformations/common_optimizations/dilated_convolution_converter.hpp"
@@ -86,6 +87,7 @@
 #include "transformations/op_conversions/convert_ti_to_sequences.hpp"
 #include "transformations/resolve_names_collisions.hpp"
 #include "transformations/smart_reshape/lstm_states_broadcast.hpp"
+#include "transformations/smart_reshape/matmul_sr.hpp"
 #include "transformations/smart_reshape/reshape_sinking.hpp"
 
 bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr<ov::Model>& f) {
@@ -165,11 +167,13 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr<ov::Model>
 
     auto transpose_sinking = manager.register_pass<ov::pass::GraphRewrite>();
     ADD_MATCHER(transpose_sinking, TransposeSinking)
-
     // SplitSqueezeConcatFusion should work in same GraphRewrite as TransposesSinking,
     // because it replaces pattern that may contain Transposes which must be optimized before
     // the transformation and it also inserts Transpose that can be optimized by TransposeSinking
     ADD_MATCHER(transpose_sinking, SplitSqueezeConcatFusion)
+
+    REGISTER_PASS(manager, TransposeMatMul)
+
     auto eliminations = manager.register_pass<ov::pass::GraphRewrite>();
     ADD_MATCHER(eliminations, EliminateUnsqueezeGather)
     ADD_MATCHER(eliminations, NopElimination, m_use_shapes)
@@ -212,6 +216,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr<ov::Model>
     ADD_MATCHER(common_fusions, ShuffleChannelsFusion, !m_use_shapes)
     ADD_MATCHER(common_fusions, NonZeroHorizontalFusion)
     ADD_MATCHER(common_fusions, AdaptivePoolToReduce)
+    ADD_MATCHER(common_fusions, ConvertU4WeightsZeroPointToScalar)
     common_fusions->set_name("ov::pass::CommonFusions");
 
     REGISTER_PASS(manager, BinarizeWeights)
diff --git a/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp b/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp
index f9738929931f21..3304ee3718ab57 100644
--- a/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp
@@ -116,7 +116,7 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st
         }
         // Remove inputs
         bool pass_required = false;
-        std::set<ov::Output<ov::Node>> required_inputs;
+        std::set<uint64_t> required_inputs_indices;
         auto op_inputs = multi_subgraph_op->input_values();
         std::vector<std::vector<size_t>> to_remove_descriptors_indexes;
         to_remove_descriptors_indexes.resize(subgraphs_size);
@@ -133,7 +133,7 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st
                 } else {
                     // collecting required inputs is needed to detect cases where the input
                     // is not needed in a one body, but the other one uses it (for example If case)
-                    required_inputs.insert(op_inputs[body_in_descriptors[i]->m_input_index]);  // only unique
+                    required_inputs_indices.insert(body_in_descriptors[i]->m_input_index);
                 }
             }
         }
@@ -148,7 +148,9 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st
                 }
             };
             auto update_op_inputs_desc = [&subgraphs_size](const std::shared_ptr<op::util::MultiSubGraphOp>& op,
+                                                           std::set<uint64_t>& required_inputs_indices,
                                                            uint64_t removed_loop_idx) {
+                std::set<uint64_t> new_required_inputs_indices;
                 for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) {
                     auto& descriptors = op->get_input_descriptions(static_cast<int>(body_idx));
                     for (auto& desc : descriptors) {
@@ -157,6 +159,14 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st
                         }
                     }
                 }
+                for (auto input_index : required_inputs_indices) {
+                    if (input_index > removed_loop_idx) {
+                        new_required_inputs_indices.insert(input_index - 1);
+                    } else {
+                        new_required_inputs_indices.insert(input_index);
+                    }
+                }
+                required_inputs_indices = new_required_inputs_indices;
             };
             // Remove dangling body params and input and update input descriptors
             for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) {
@@ -174,13 +184,17 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st
                         update_body_param_desc(body_in_descriptors,
                                                body_in_descriptors[desc_idx]->m_body_parameter_index);
                         // remove dangling input of MultiSubGraphOp which was not removed earlier
-                        auto& current_input = op_inputs[body_in_descriptors[desc_idx]->m_input_index];
-                        if (std::count(std::begin(required_inputs), std::end(required_inputs), current_input) == 0 &&
+                        auto current_input_idx = body_in_descriptors[desc_idx]->m_input_index;
+                        auto& current_input = op_inputs[current_input_idx];
+                        // the same input tensor can go to different input ports
+                        if (std::count(std::begin(required_inputs_indices),
+                                       std::end(required_inputs_indices),
+                                       current_input_idx) == 0 &&
                             std::count(std::begin(op_inputs), std::end(op_inputs), current_input) > 0) {
-                            op_inputs.erase(std::next(op_inputs.begin(), body_in_descriptors[desc_idx]->m_input_index));
+                            op_inputs.erase(std::next(op_inputs.begin(), current_input_idx));
                             // Move all input indexes (in all bodies) which are after these indicated by
                             // to_remove_descriptors_indexes and are not used in any body
-                            update_op_inputs_desc(multi_subgraph_op, body_in_descriptors[desc_idx]->m_input_index);
+                            update_op_inputs_desc(multi_subgraph_op, required_inputs_indices, current_input_idx);
                         }
                     } else {
                         updated_body_in_descriptors.emplace_back(body_in_descriptors[desc_idx]);
diff --git a/src/common/transformations/src/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.cpp b/src/common/transformations/src/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.cpp
new file mode 100644
index 00000000000000..b3040cda132852
--- /dev/null
+++ b/src/common/transformations/src/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.cpp
@@ -0,0 +1,109 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp"
+
+#include <memory>
+#include <vector>
+
+#include "itt.hpp"
+#include "openvino/core/rt_info.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/convert.hpp"
+#include "openvino/op/non_max_suppression.hpp"
+#include "openvino/op/reshape.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "ov_ops/nms_ie_internal.hpp"
+#include "transformations/utils/utils.hpp"
+
+ov::pass::ConvertNMSRotatedToNMSIEInternal::ConvertNMSRotatedToNMSIEInternal() {
+    MATCHER_SCOPE(ConvertNMSRotatedToNMSIEInternal);
+    auto nms = ov::pass::pattern::wrap_type<ov::op::v13::NMSRotated>();
+
+    matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        auto nms_rotated = std::dynamic_pointer_cast<ov::op::v13::NMSRotated>(m.get_match_root());
+        if (!nms_rotated || transformation_callback(nms_rotated)) {
+            return false;
+        }
+
+        const auto new_args = nms_rotated->input_values();
+        const std::size_t num_of_inputs = new_args.size();
+        OPENVINO_ASSERT(num_of_inputs == 5);
+
+        const auto& max_per_class = new_args.at(2);
+        const auto& iou_threshold = new_args.at(3);
+        const auto& score_threshold = new_args.at(4);
+
+        // vector of new openvino operations
+        NodeVector new_ops;
+
+        auto one_dim_shape = Shape{1};
+
+        Output<Node> new_max_per_class;
+        Output<Node> new_iou_threshold;
+        Output<Node> new_score_threshold;
+        Output<Node> new_soft_nms_sigma;
+
+        Output<Node> new_shape_for_max_per_class = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {1});
+        Output<Node> new_shape_for_iou_threshold = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {1});
+        Output<Node> new_shape_for_score_threshold = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {1});
+        Output<Node> new_shape_for_soft_nms_sigma = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {1});
+
+        new_max_per_class = std::make_shared<ov::op::v1::Reshape>(max_per_class, new_shape_for_max_per_class, true);
+        new_ops.emplace_back(new_max_per_class.get_node_shared_ptr());
+
+        new_iou_threshold = std::make_shared<ov::op::v1::Reshape>(iou_threshold, new_shape_for_iou_threshold, true);
+        new_ops.emplace_back(new_iou_threshold.get_node_shared_ptr());
+
+        new_score_threshold =
+            std::make_shared<ov::op::v1::Reshape>(score_threshold, new_shape_for_score_threshold, true);
+        new_ops.emplace_back(new_score_threshold.get_node_shared_ptr());
+
+        constexpr int BoxEncodingType_Center = 1;             // see NonMaxSuppression::BoxEncodingType
+        const int center_point_box = BoxEncodingType_Center;  // for NMSRotated is it always Center
+
+        const auto rotation = nms_rotated->get_clockwise()
+                                  ? op::internal::NonMaxSuppressionIEInternal::Rotation_Clockwise
+                                  : op::internal::NonMaxSuppressionIEInternal::Rotation_Counterclockwise;
+
+        std::shared_ptr<op::internal::NonMaxSuppressionIEInternal> nms_legacy{nullptr};
+
+        nms_legacy =
+            std::make_shared<op::internal::NonMaxSuppressionIEInternal>(new_args.at(0),
+                                                                        new_args.at(1),
+
+                                                                        new_max_per_class,
+                                                                        new_iou_threshold,
+                                                                        new_score_threshold,
+
+                                                                        center_point_box,
+                                                                        nms_rotated->get_sort_result_descending(),
+                                                                        element::i32,
+                                                                        nms_rotated->get_output_element_type(1),
+                                                                        rotation);
+        new_ops.push_back(nms_legacy);
+
+        Output<Node> output_0 = nms_legacy->output(0);
+        if (nms_rotated->output(0).get_element_type() != output_0.get_element_type()) {
+            output_0 = std::make_shared<ov::op::v0::Convert>(output_0, nms_rotated->output(0).get_element_type());
+            output_0.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms_rotated->output(0)));
+            new_ops.emplace_back(output_0.get_node_shared_ptr());
+        }
+
+        Output<Node> output_2 = nms_legacy->output(2);
+        if (nms_rotated->output(2).get_element_type() != output_2.get_element_type()) {
+            output_2 = std::make_shared<ov::op::v0::Convert>(output_2, nms_rotated->output(2).get_element_type());
+            output_2.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms_rotated->output(2)));
+            new_ops.emplace_back(output_2.get_node_shared_ptr());
+        }
+
+        nms_legacy->set_friendly_name(nms_rotated->get_friendly_name());
+        ov::copy_runtime_info(nms_rotated, new_ops);
+        ov::replace_node(nms_rotated, {output_0, nms_legacy->output(1), output_2});
+        return true;
+    };
+
+    auto m = std::make_shared<ov::pass::pattern::Matcher>(nms, matcher_name);
+    this->register_matcher(m, callback);
+}
diff --git a/src/common/transformations/src/transformations/utils/utils.cpp b/src/common/transformations/src/transformations/utils/utils.cpp
index 62b1765e7ba275..b7cde395a66eb5 100644
--- a/src/common/transformations/src/transformations/utils/utils.cpp
+++ b/src/common/transformations/src/transformations/utils/utils.cpp
@@ -31,6 +31,8 @@ bool get_single_value(const std::shared_ptr<op::v0::Constant>& const_node, float
         return util::normalize_single_value(const_node->get_vector<bfloat16>(), value, check_value_range);
     case element::Type_t::f64:
         return util::normalize_single_value(const_node->get_vector<double>(), value, check_value_range);
+    case element::Type_t::i4:
+        return util::normalize_single_value(const_node->cast_vector<int8_t>(), value, check_value_range);
     case element::Type_t::i8:
         return util::normalize_single_value(const_node->get_vector<int8_t>(), value, check_value_range);
     case element::Type_t::i16:
@@ -39,6 +41,8 @@ bool get_single_value(const std::shared_ptr<op::v0::Constant>& const_node, float
         return util::normalize_single_value(const_node->get_vector<int32_t>(), value, check_value_range);
     case element::Type_t::i64:
         return util::normalize_single_value(const_node->get_vector<int64_t>(), value, check_value_range);
+    case element::Type_t::u4:
+        return util::normalize_single_value(const_node->cast_vector<int8_t>(), value, check_value_range);
     case element::Type_t::u8:
         return util::normalize_single_value(const_node->get_vector<uint8_t>(), value, check_value_range);
     case element::Type_t::u16:
diff --git a/src/common/transformations/tests/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp b/src/common/transformations/tests/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp
new file mode 100644
index 00000000000000..8fc896065e9001
--- /dev/null
+++ b/src/common/transformations/tests/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp
@@ -0,0 +1,208 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp"
+
+#include <gtest/gtest.h>
+
+#include "common_test_utils/ov_test_utils.hpp"
+#include "openvino/core/model.hpp"
+#include "openvino/op/convert.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/shape_of.hpp"
+#include "openvino/op/subtract.hpp"
+#include "openvino/pass/manager.hpp"
+
+using namespace testing;
+using namespace ov;
+
+TEST_F(TransformationTestsF, ConvertU4WeightsFloatZeroPointToScalar) {
+    auto weights_precision = ov::element::u4;
+    auto decompression_precision = ov::element::f32;
+    ov::Shape weights_shape{32, 128, 64};
+    ov::Shape decompression_shape{32, 1, 64};
+    {
+        auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4});
+        auto convert = std::make_shared<ov::op::v0::Convert>(weights, decompression_precision);
+        auto zero_point = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {8.1f});
+        auto subtract = std::make_shared<ov::op::v1::Subtract>(convert, zero_point);
+        auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f});
+        auto multiply = std::make_shared<ov::op::v1::Multiply>(subtract, scale);
+        model = std::make_shared<Model>(NodeVector{multiply}, ParameterVector{});
+        manager.register_pass<ov::pass::ConvertU4WeightsZeroPointToScalar>();
+    }
+    {
+        ov::Shape scalar_shape{};
+        auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4});
+        auto convert = std::make_shared<ov::op::v0::Convert>(weights, decompression_precision);
+        auto zero_point = ov::op::v0::Constant::create(decompression_precision, scalar_shape, {8.1f});
+        auto subtract = std::make_shared<ov::op::v1::Subtract>(convert, zero_point);
+        auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f});
+        auto multiply = std::make_shared<ov::op::v1::Multiply>(subtract, scale);
+        model_ref = std::make_shared<Model>(NodeVector{multiply}, ParameterVector{});
+    }
+    comparator.enable(FunctionsComparator::ACCURACY);
+    comparator.enable(FunctionsComparator::CONST_VALUES);
+}
+
+TEST_F(TransformationTestsF, ConvertU4WeightsU4ZeroPointToScalar) {
+    auto weights_precision = ov::element::u4;
+    auto decompression_precision = ov::element::f32;
+    ov::Shape weights_shape{32, 128, 64};
+    ov::Shape decompression_shape{32, 1, 64};
+    {
+        auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4});
+        auto convert = std::make_shared<ov::op::v0::Convert>(weights, decompression_precision);
+        auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, {8});
+        auto zero_point_convert = std::make_shared<ov::op::v0::Convert>(zero_point, decompression_precision);
+        auto subtract = std::make_shared<ov::op::v1::Subtract>(convert, zero_point_convert);
+        auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f});
+        auto multiply = std::make_shared<ov::op::v1::Multiply>(subtract, scale);
+        model = std::make_shared<Model>(NodeVector{multiply}, ParameterVector{});
+        manager.register_pass<ov::pass::ConvertU4WeightsZeroPointToScalar>();
+    }
+    {
+        ov::Shape scalar_shape{};
+        auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4});
+        auto convert = std::make_shared<ov::op::v0::Convert>(weights, decompression_precision);
+        auto zero_point = ov::op::v0::Constant::create(weights_precision, scalar_shape, {8});
+        auto zero_point_convert = std::make_shared<ov::op::v0::Convert>(zero_point, decompression_precision);
+        auto subtract = std::make_shared<ov::op::v1::Subtract>(convert, zero_point_convert);
+        auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f});
+        auto multiply = std::make_shared<ov::op::v1::Multiply>(subtract, scale);
+        model_ref = std::make_shared<Model>(NodeVector{multiply}, ParameterVector{});
+    }
+    comparator.enable(FunctionsComparator::ACCURACY);
+    comparator.enable(FunctionsComparator::CONST_VALUES);
+}
+
+TEST_F(TransformationTestsF, ConvertU4WeightsFloatZeroPointToScalarWeightsWithBiggerRank) {
+    auto weights_precision = ov::element::u4;
+    auto decompression_precision = ov::element::f32;
+    ov::Shape weights_shape{32, 128, 64};
+    ov::Shape decompression_shape{64};
+    {
+        auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4});
+        auto convert = std::make_shared<ov::op::v0::Convert>(weights, decompression_precision);
+        auto zero_point = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {8});
+        auto subtract = std::make_shared<ov::op::v1::Subtract>(convert, zero_point);
+        auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f});
+        auto multiply = std::make_shared<ov::op::v1::Multiply>(subtract, scale);
+        model = std::make_shared<Model>(NodeVector{multiply}, ParameterVector{});
+        manager.register_pass<ov::pass::ConvertU4WeightsZeroPointToScalar>();
+    }
+    {
+        ov::Shape scalar_shape{};
+        auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4});
+        auto convert = std::make_shared<ov::op::v0::Convert>(weights, decompression_precision);
+        auto zero_point = ov::op::v0::Constant::create(decompression_precision, scalar_shape, {8});
+        auto subtract = std::make_shared<ov::op::v1::Subtract>(convert, zero_point);
+        auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f});
+        auto multiply = std::make_shared<ov::op::v1::Multiply>(subtract, scale);
+        model_ref = std::make_shared<Model>(NodeVector{multiply}, ParameterVector{});
+    }
+    comparator.enable(FunctionsComparator::ACCURACY);
+    comparator.enable(FunctionsComparator::CONST_VALUES);
+}
+
+TEST_F(TransformationTestsF, FuseU4WeightsAndZeroPointNotScalarLikeZP) {
+    auto weights_precision = ov::element::u8;
+    auto decompression_precision = ov::element::f32;
+    ov::Shape weights_shape{32, 128, 64};
+    ov::Shape decompression_shape{32, 1, 64};
+    auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4});
+    auto convert = std::make_shared<ov::op::v0::Convert>(weights, decompression_precision);
+    std::vector<std::int8_t> zero_point_values(ov::shape_size(decompression_shape), 8);
+    zero_point_values.back() = 6;
+    auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, zero_point_values);
+    auto zero_point_convert = std::make_shared<ov::op::v0::Convert>(zero_point, decompression_precision);
+    auto subtract = std::make_shared<ov::op::v1::Subtract>(convert, zero_point_convert);
+    auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f});
+    auto multiply = std::make_shared<ov::op::v1::Multiply>(subtract, scale);
+    model = std::make_shared<Model>(NodeVector{multiply}, ParameterVector{});
+    manager.register_pass<ov::pass::ConvertU4WeightsZeroPointToScalar>();
+}
+
+TEST_F(TransformationTestsF, FuseU4WeightsAndZeroPointNotU4Weights) {
+    auto weights_precision = ov::element::u8;
+    auto decompression_precision = ov::element::f32;
+    ov::Shape weights_shape{32, 128, 64};
+    ov::Shape decompression_shape{32, 1, 64};
+    auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4});
+    auto convert = std::make_shared<ov::op::v0::Convert>(weights, decompression_precision);
+    auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, {8});
+    auto zero_point_convert = std::make_shared<ov::op::v0::Convert>(zero_point, decompression_precision);
+    auto subtract = std::make_shared<ov::op::v1::Subtract>(convert, zero_point_convert);
+    auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f});
+    auto multiply = std::make_shared<ov::op::v1::Multiply>(subtract, scale);
+    model = std::make_shared<Model>(NodeVector{multiply}, ParameterVector{});
+    manager.register_pass<ov::pass::ConvertU4WeightsZeroPointToScalar>();
+}
+
+TEST_F(TransformationTestsF, ConvertU4WeightsFloatZeroPointToScalarAdditionalZPConsumer) {
+    auto weights_precision = ov::element::u4;
+    auto decompression_precision = ov::element::f32;
+    ov::Shape weights_shape{32, 128, 64};
+    ov::Shape decompression_shape{32, 1, 64};
+    auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4});
+    auto convert = std::make_shared<ov::op::v0::Convert>(weights, decompression_precision);
+    auto zero_point = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {8});
+    auto zero_point_consumer = std::make_shared<ov::op::v3::ShapeOf>(zero_point);
+    auto subtract = std::make_shared<ov::op::v1::Subtract>(convert, zero_point);
+    auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f});
+    auto multiply = std::make_shared<ov::op::v1::Multiply>(subtract, scale);
+    model = std::make_shared<Model>(NodeVector{multiply, zero_point_consumer}, ParameterVector{});
+    manager.register_pass<ov::pass::ConvertU4WeightsZeroPointToScalar>();
+}
+
+TEST_F(TransformationTestsF, ConvertU4WeightsU4ZeroPointToScalarAdditionalZPConsumer) {
+    auto weights_precision = ov::element::u4;
+    auto decompression_precision = ov::element::f32;
+    ov::Shape weights_shape{32, 128, 64};
+    ov::Shape decompression_shape{32, 1, 64};
+    auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4});
+    auto convert = std::make_shared<ov::op::v0::Convert>(weights, decompression_precision);
+    auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, {8});
+    auto zero_point_consumer = std::make_shared<ov::op::v3::ShapeOf>(zero_point);
+    auto zero_point_convert = std::make_shared<ov::op::v0::Convert>(zero_point, decompression_precision);
+    auto subtract = std::make_shared<ov::op::v1::Subtract>(convert, zero_point_convert);
+    auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f});
+    auto multiply = std::make_shared<ov::op::v1::Multiply>(subtract, scale);
+    model = std::make_shared<Model>(NodeVector{multiply, zero_point_consumer}, ParameterVector{});
+    manager.register_pass<ov::pass::ConvertU4WeightsZeroPointToScalar>();
+}
+
+TEST_F(TransformationTestsF, ConvertU4WeightsU4ZeroPointToScalarAdditionalZPConvertConsumer) {
+    auto weights_precision = ov::element::u4;
+    auto decompression_precision = ov::element::f32;
+    ov::Shape weights_shape{32, 128, 64};
+    ov::Shape decompression_shape{32, 1, 64};
+    auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4});
+    auto convert = std::make_shared<ov::op::v0::Convert>(weights, decompression_precision);
+    auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, {8});
+    auto zero_point_convert = std::make_shared<ov::op::v0::Convert>(zero_point, decompression_precision);
+    auto zero_point_convert_consumer = std::make_shared<ov::op::v3::ShapeOf>(zero_point_convert);
+    auto subtract = std::make_shared<ov::op::v1::Subtract>(convert, zero_point_convert);
+    auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f});
+    auto multiply = std::make_shared<ov::op::v1::Multiply>(subtract, scale);
+    model = std::make_shared<Model>(NodeVector{multiply, zero_point_convert_consumer}, ParameterVector{});
+    manager.register_pass<ov::pass::ConvertU4WeightsZeroPointToScalar>();
+}
+
+TEST_F(TransformationTestsF, ConvertU4WeightsU4ZeroPointToScalarZPWithBiggerRank) {
+    auto weights_precision = ov::element::u4;
+    auto decompression_precision = ov::element::f32;
+    ov::Shape weights_shape{32, 128, 64};
+    ov::Shape decompression_shape{1, 32, 1, 64};
+    auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4});
+    auto convert = std::make_shared<ov::op::v0::Convert>(weights, decompression_precision);
+    auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, {8});
+    auto zero_point_convert = std::make_shared<ov::op::v0::Convert>(zero_point, decompression_precision);
+    auto zero_point_convert_consumer = std::make_shared<ov::op::v3::ShapeOf>(zero_point_convert);
+    auto subtract = std::make_shared<ov::op::v1::Subtract>(convert, zero_point_convert);
+    auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f});
+    auto multiply = std::make_shared<ov::op::v1::Multiply>(subtract, scale);
+    model = std::make_shared<Model>(NodeVector{multiply, zero_point_convert_consumer}, ParameterVector{});
+    manager.register_pass<ov::pass::ConvertU4WeightsZeroPointToScalar>();
+}
diff --git a/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp b/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp
index 7c5311307d7d95..aa1f1d32a3da16 100644
--- a/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp
+++ b/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp
@@ -9,14 +9,21 @@
 #include <gtest/gtest.h>
 #include <math.h>
 
+#include <cmath>
 #include <memory>
 #include <queue>
 #include <string>
 
 #include "common_test_utils/ov_test_utils.hpp"
 #include "openvino/core/model.hpp"
-#include "openvino/opsets/opset7.hpp"
-#include "openvino/opsets/opset9.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/divide.hpp"
+#include "openvino/op/erf.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/parameter.hpp"
+#include "openvino/op/power.hpp"
+#include "openvino/op/tanh.hpp"
 #include "openvino/pass/constant_folding.hpp"
 #include "openvino/pass/manager.hpp"
 #include "transformations/convert_precision.hpp"
@@ -28,17 +35,17 @@ using namespace ov;
 
 TEST_F(TransformationTestsF, GeluFusionPatternOne) {
     {
-        auto data = std::make_shared<opset7::Parameter>(element::f32, Shape{2, 2});
+        auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
 
-        auto div_const = opset7::Constant::create(element::f32, Shape{1}, {M_SQRT2});
-        auto add_const = opset7::Constant::create(element::f32, Shape{1}, {1.0});
-        auto mul_const = opset7::Constant::create(element::f32, Shape{1}, {0.5});
+        auto div_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {M_SQRT2});
+        auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0});
+        auto mul_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5});
 
-        auto div = std::make_shared<opset7::Divide>(data, div_const);
-        auto erf = std::make_shared<opset7::Erf>(div);
-        auto add = std::make_shared<opset7::Add>(erf, add_const);
-        auto mul_first = std::make_shared<opset7::Multiply>(data, mul_const);
-        auto mul = std::make_shared<opset7::Multiply>(mul_first, add);
+        auto div = std::make_shared<ov::op::v1::Divide>(data, div_const);
+        auto erf = std::make_shared<ov::op::v0::Erf>(div);
+        auto add = std::make_shared<ov::op::v1::Add>(erf, add_const);
+        auto mul_first = std::make_shared<ov::op::v1::Multiply>(data, mul_const);
+        auto mul = std::make_shared<ov::op::v1::Multiply>(mul_first, add);
 
         model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{data});
 
@@ -47,24 +54,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternOne) {
 
     {
         auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
-        auto gelu = std::make_shared<opset7::Gelu>(data);
+        auto gelu = std::make_shared<ov::op::v7::Gelu>(data);
         model_ref = std::make_shared<Model>(NodeVector{gelu}, ParameterVector{data});
     }
 }
 
 TEST_F(TransformationTestsF, GeluFusionPatternOneF16) {
     {
-        auto data = std::make_shared<opset7::Parameter>(element::f16, Shape{2, 2});
+        auto data = std::make_shared<ov::op::v0::Parameter>(element::f16, Shape{2, 2});
 
-        auto div_const = opset7::Constant::create(element::f16, Shape{1}, {M_SQRT2});
-        auto add_const = opset7::Constant::create(element::f16, Shape{1}, {1.0});
-        auto mul_const = opset7::Constant::create(element::f16, Shape{1}, {0.5});
+        auto div_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {M_SQRT2});
+        auto add_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {1.0});
+        auto mul_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {0.5});
 
-        auto div = std::make_shared<opset7::Divide>(data, div_const);
-        auto erf = std::make_shared<opset7::Erf>(div);
-        auto add = std::make_shared<opset7::Add>(erf, add_const);
-        auto mul_first = std::make_shared<opset7::Multiply>(data, mul_const);
-        auto mul = std::make_shared<opset7::Multiply>(mul_first, add);
+        auto div = std::make_shared<ov::op::v1::Divide>(data, div_const);
+        auto erf = std::make_shared<ov::op::v0::Erf>(div);
+        auto add = std::make_shared<ov::op::v1::Add>(erf, add_const);
+        auto mul_first = std::make_shared<ov::op::v1::Multiply>(data, mul_const);
+        auto mul = std::make_shared<ov::op::v1::Multiply>(mul_first, add);
 
         model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{data});
 
@@ -73,24 +80,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternOneF16) {
 
     {
         auto data = std::make_shared<ov::op::v0::Parameter>(element::f16, Shape{2, 2});
-        auto gelu = std::make_shared<opset7::Gelu>(data);
+        auto gelu = std::make_shared<ov::op::v7::Gelu>(data);
         model_ref = std::make_shared<Model>(NodeVector{gelu}, ParameterVector{data});
     }
 }
 
 TEST_F(TransformationTestsF, GeluFusionPatternTwo) {
     {
-        auto data = std::make_shared<opset7::Parameter>(element::f32, Shape{2, 2});
+        auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
 
-        auto div_const = opset7::Constant::create(element::f32, Shape{1}, {M_SQRT2});
-        auto add_const = opset7::Constant::create(element::f32, Shape{1}, {1.0});
-        auto mul_const = opset7::Constant::create(element::f32, Shape{1}, {0.5});
+        auto div_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {M_SQRT2});
+        auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0});
+        auto mul_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5});
 
-        auto div = std::make_shared<opset7::Divide>(data, div_const);
-        auto erf = std::make_shared<opset7::Erf>(div);
-        auto add = std::make_shared<opset7::Add>(erf, add_const);
-        auto mul_first = std::make_shared<opset7::Multiply>(data, add);
-        auto mul = std::make_shared<opset7::Multiply>(mul_first, mul_const);
+        auto div = std::make_shared<ov::op::v1::Divide>(data, div_const);
+        auto erf = std::make_shared<ov::op::v0::Erf>(div);
+        auto add = std::make_shared<ov::op::v1::Add>(erf, add_const);
+        auto mul_first = std::make_shared<ov::op::v1::Multiply>(data, add);
+        auto mul = std::make_shared<ov::op::v1::Multiply>(mul_first, mul_const);
 
         model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{data});
 
@@ -99,24 +106,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternTwo) {
 
     {
         auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
-        auto gelu = std::make_shared<opset7::Gelu>(data);
+        auto gelu = std::make_shared<ov::op::v7::Gelu>(data);
         model_ref = std::make_shared<Model>(NodeVector{gelu}, ParameterVector{data});
     }
 }
 
 TEST_F(TransformationTestsF, GeluFusionPatternTwoF16) {
     {
-        auto data = std::make_shared<opset7::Parameter>(element::f16, Shape{2, 2});
+        auto data = std::make_shared<ov::op::v0::Parameter>(element::f16, Shape{2, 2});
 
-        auto div_const = opset7::Constant::create(element::f16, Shape{1}, {M_SQRT2});
-        auto add_const = opset7::Constant::create(element::f16, Shape{1}, {1.0});
-        auto mul_const = opset7::Constant::create(element::f16, Shape{1}, {0.5});
+        auto div_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {M_SQRT2});
+        auto add_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {1.0});
+        auto mul_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {0.5});
 
-        auto div = std::make_shared<opset7::Divide>(data, div_const);
-        auto erf = std::make_shared<opset7::Erf>(div);
-        auto add = std::make_shared<opset7::Add>(erf, add_const);
-        auto mul_first = std::make_shared<opset7::Multiply>(data, add);
-        auto mul = std::make_shared<opset7::Multiply>(mul_first, mul_const);
+        auto div = std::make_shared<ov::op::v1::Divide>(data, div_const);
+        auto erf = std::make_shared<ov::op::v0::Erf>(div);
+        auto add = std::make_shared<ov::op::v1::Add>(erf, add_const);
+        auto mul_first = std::make_shared<ov::op::v1::Multiply>(data, add);
+        auto mul = std::make_shared<ov::op::v1::Multiply>(mul_first, mul_const);
 
         model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{data});
 
@@ -125,24 +132,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternTwoF16) {
 
     {
         auto data = std::make_shared<ov::op::v0::Parameter>(element::f16, Shape{2, 2});
-        auto gelu = std::make_shared<opset7::Gelu>(data);
+        auto gelu = std::make_shared<ov::op::v7::Gelu>(data);
         model_ref = std::make_shared<Model>(NodeVector{gelu}, ParameterVector{data});
     }
 }
 
 TEST_F(TransformationTestsF, GeluFusionPatternThree) {
     {
-        auto data = std::make_shared<opset7::Parameter>(element::f32, Shape{2, 2});
+        auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
 
-        auto div_const = opset7::Constant::create(element::f32, Shape{1}, {M_SQRT2});
-        auto add_const = opset7::Constant::create(element::f32, Shape{1}, {1.0});
-        auto mul_const = opset7::Constant::create(element::f32, Shape{1}, {0.5});
+        auto div_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {M_SQRT2});
+        auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0});
+        auto mul_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5});
 
-        auto div = std::make_shared<opset7::Divide>(data, div_const);
-        auto erf = std::make_shared<opset7::Erf>(div);
-        auto add = std::make_shared<opset7::Add>(erf, add_const);
-        auto mul_first = std::make_shared<opset7::Multiply>(add, mul_const);
-        auto mul = std::make_shared<opset7::Multiply>(data, mul_first);
+        auto div = std::make_shared<ov::op::v1::Divide>(data, div_const);
+        auto erf = std::make_shared<ov::op::v0::Erf>(div);
+        auto add = std::make_shared<ov::op::v1::Add>(erf, add_const);
+        auto mul_first = std::make_shared<ov::op::v1::Multiply>(add, mul_const);
+        auto mul = std::make_shared<ov::op::v1::Multiply>(data, mul_first);
 
         model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{data});
 
@@ -151,24 +158,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternThree) {
 
     {
         auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
-        auto gelu = std::make_shared<opset7::Gelu>(data);
+        auto gelu = std::make_shared<ov::op::v7::Gelu>(data);
         model_ref = std::make_shared<Model>(NodeVector{gelu}, ParameterVector{data});
     }
 }
 
 TEST_F(TransformationTestsF, GeluFusionPatternThreeF16) {
     {
-        auto data = std::make_shared<opset7::Parameter>(element::f16, Shape{2, 2});
+        auto data = std::make_shared<ov::op::v0::Parameter>(element::f16, Shape{2, 2});
 
-        auto div_const = opset7::Constant::create(element::f16, Shape{1}, {M_SQRT2});
-        auto add_const = opset7::Constant::create(element::f16, Shape{1}, {1.0});
-        auto mul_const = opset7::Constant::create(element::f16, Shape{1}, {0.5});
+        auto div_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {M_SQRT2});
+        auto add_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {1.0});
+        auto mul_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {0.5});
 
-        auto div = std::make_shared<opset7::Divide>(data, div_const);
-        auto erf = std::make_shared<opset7::Erf>(div);
-        auto add = std::make_shared<opset7::Add>(erf, add_const);
-        auto mul_first = std::make_shared<opset7::Multiply>(add, mul_const);
-        auto mul = std::make_shared<opset7::Multiply>(data, mul_first);
+        auto div = std::make_shared<ov::op::v1::Divide>(data, div_const);
+        auto erf = std::make_shared<ov::op::v0::Erf>(div);
+        auto add = std::make_shared<ov::op::v1::Add>(erf, add_const);
+        auto mul_first = std::make_shared<ov::op::v1::Multiply>(add, mul_const);
+        auto mul = std::make_shared<ov::op::v1::Multiply>(data, mul_first);
 
         model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{data});
 
@@ -177,24 +184,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternThreeF16) {
 
     {
         auto data = std::make_shared<ov::op::v0::Parameter>(element::f16, Shape{2, 2});
-        auto gelu = std::make_shared<opset7::Gelu>(data);
+        auto gelu = std::make_shared<ov::op::v7::Gelu>(data);
         model_ref = std::make_shared<Model>(NodeVector{gelu}, ParameterVector{data});
     }
 }
 
 TEST_F(TransformationTestsF, GeluFusionPatternFour) {
     {
-        auto data = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
+        auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
 
-        auto mul1_const = opset9::Constant::create(element::f32, Shape{1}, {1.0f / M_SQRT2});
-        auto add_const = opset9::Constant::create(element::f32, Shape{1}, {0.5f});
-        auto mul2_const = opset9::Constant::create(element::f32, Shape{1}, {0.5f});
+        auto mul1_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0f / M_SQRT2});
+        auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5f});
+        auto mul2_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5f});
 
-        auto mul1 = std::make_shared<opset9::Multiply>(data, mul1_const);
-        auto erf = std::make_shared<opset9::Erf>(mul1);
-        auto mul2 = std::make_shared<opset9::Multiply>(erf, mul2_const);
-        auto add = std::make_shared<opset9::Add>(mul2, add_const);
-        auto mul3 = std::make_shared<opset9::Multiply>(data, add);
+        auto mul1 = std::make_shared<ov::op::v1::Multiply>(data, mul1_const);
+        auto erf = std::make_shared<ov::op::v0::Erf>(mul1);
+        auto mul2 = std::make_shared<ov::op::v1::Multiply>(erf, mul2_const);
+        auto add = std::make_shared<ov::op::v1::Add>(mul2, add_const);
+        auto mul3 = std::make_shared<ov::op::v1::Multiply>(data, add);
 
         model = std::make_shared<Model>(NodeVector{mul3}, ParameterVector{data});
 
@@ -203,24 +210,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternFour) {
 
     {
         auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
-        auto gelu = std::make_shared<opset9::Gelu>(data);
+        auto gelu = std::make_shared<ov::op::v7::Gelu>(data);
         model_ref = std::make_shared<Model>(NodeVector{gelu}, ParameterVector{data});
     }
 }
 
 TEST_F(TransformationTestsF, GeluFusionPatternFourF16) {
     {
-        auto data = std::make_shared<opset9::Parameter>(element::f16, Shape{2, 2});
+        auto data = std::make_shared<ov::op::v0::Parameter>(element::f16, Shape{2, 2});
 
-        auto mul1_const = opset9::Constant::create(element::f16, Shape{1}, {1.0f / M_SQRT2});
-        auto add_const = opset9::Constant::create(element::f16, Shape{1}, {0.5f});
-        auto mul2_const = opset9::Constant::create(element::f16, Shape{1}, {0.5f});
+        auto mul1_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {1.0f / M_SQRT2});
+        auto add_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {0.5f});
+        auto mul2_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {0.5f});
 
-        auto mul1 = std::make_shared<opset9::Multiply>(data, mul1_const);
-        auto erf = std::make_shared<opset9::Erf>(mul1);
-        auto mul2 = std::make_shared<opset9::Multiply>(erf, mul2_const);
-        auto add = std::make_shared<opset9::Add>(mul2, add_const);
-        auto mul3 = std::make_shared<opset9::Multiply>(data, add);
+        auto mul1 = std::make_shared<ov::op::v1::Multiply>(data, mul1_const);
+        auto erf = std::make_shared<ov::op::v0::Erf>(mul1);
+        auto mul2 = std::make_shared<ov::op::v1::Multiply>(erf, mul2_const);
+        auto add = std::make_shared<ov::op::v1::Add>(mul2, add_const);
+        auto mul3 = std::make_shared<ov::op::v1::Multiply>(data, add);
 
         model = std::make_shared<Model>(NodeVector{mul3}, ParameterVector{data});
 
@@ -229,24 +236,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternFourF16) {
 
     {
         auto data = std::make_shared<ov::op::v0::Parameter>(element::f16, Shape{2, 2});
-        auto gelu = std::make_shared<opset9::Gelu>(data);
+        auto gelu = std::make_shared<ov::op::v7::Gelu>(data);
         model_ref = std::make_shared<Model>(NodeVector{gelu}, ParameterVector{data});
     }
 }
 
 TEST_F(TransformationTestsF, GeluFusionPatternIncorrectDivConstValue) {
     {
-        auto data = std::make_shared<opset7::Parameter>(element::f32, Shape{2, 2});
+        auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
 
-        auto div_const = opset7::Constant::create(element::f32, Shape{1}, {1.4149});
-        auto add_const = opset7::Constant::create(element::f32, Shape{1}, {1.0});
-        auto mul_const = opset7::Constant::create(element::f32, Shape{1}, {0.5});
+        auto div_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.4149});
+        auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0});
+        auto mul_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5});
 
-        auto div = std::make_shared<opset7::Divide>(data, div_const);
-        auto erf = std::make_shared<opset7::Erf>(div);
-        auto add = std::make_shared<opset7::Add>(erf, add_const);
-        auto mul_first = std::make_shared<opset7::Multiply>(data, add);
-        auto mul = std::make_shared<opset7::Multiply>(mul_first, mul_const);
+        auto div = std::make_shared<ov::op::v1::Divide>(data, div_const);
+        auto erf = std::make_shared<ov::op::v0::Erf>(div);
+        auto add = std::make_shared<ov::op::v1::Add>(erf, add_const);
+        auto mul_first = std::make_shared<ov::op::v1::Multiply>(data, add);
+        auto mul = std::make_shared<ov::op::v1::Multiply>(mul_first, mul_const);
 
         model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{data});
         model_ref = std::make_shared<Model>(NodeVector{mul}, ParameterVector{data});
@@ -257,17 +264,17 @@ TEST_F(TransformationTestsF, GeluFusionPatternIncorrectDivConstValue) {
 
 TEST_F(TransformationTestsF, GeluFusionPatternTooShortDivConstValue) {
     {
-        auto data = std::make_shared<opset7::Parameter>(element::f32, Shape{2, 2});
+        auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
 
-        auto div_const = opset7::Constant::create(element::f32, Shape{1}, {1.4142});
-        auto add_const = opset7::Constant::create(element::f32, Shape{1}, {1.0});
-        auto mul_const = opset7::Constant::create(element::f32, Shape{1}, {0.5});
+        auto div_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.4142});
+        auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0});
+        auto mul_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5});
 
-        auto div = std::make_shared<opset7::Divide>(data, div_const);
-        auto erf = std::make_shared<opset7::Erf>(div);
-        auto add = std::make_shared<opset7::Add>(erf, add_const);
-        auto mul_first = std::make_shared<opset7::Multiply>(data, add);
-        auto mul = std::make_shared<opset7::Multiply>(mul_first, mul_const);
+        auto div = std::make_shared<ov::op::v1::Divide>(data, div_const);
+        auto erf = std::make_shared<ov::op::v0::Erf>(div);
+        auto add = std::make_shared<ov::op::v1::Add>(erf, add_const);
+        auto mul_first = std::make_shared<ov::op::v1::Multiply>(data, add);
+        auto mul = std::make_shared<ov::op::v1::Multiply>(mul_first, mul_const);
 
         model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{data});
         model_ref = std::make_shared<Model>(NodeVector{mul}, ParameterVector{data});
@@ -278,60 +285,62 @@ TEST_F(TransformationTestsF, GeluFusionPatternTooShortDivConstValue) {
 
 TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_equal_const_values) {
     {
-        auto input = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
-        auto pow_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
-        auto pow = std::make_shared<opset9::Power>(input, pow_constant);
-        auto mul_0_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
-        auto mul_0 = std::make_shared<opset9::Multiply>(pow, mul_0_constant);
-        auto add_0 = std::make_shared<opset9::Add>(input, mul_0);
+        auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
+        auto pow_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
+        auto pow = std::make_shared<ov::op::v1::Power>(input, pow_constant);
+        auto mul_0_constant =
+            std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
+        auto mul_0 = std::make_shared<ov::op::v1::Multiply>(pow, mul_0_constant);
+        auto add_0 = std::make_shared<ov::op::v1::Add>(input, mul_0);
 
-        constexpr float pi = 3.141592653589793238462643383279502884f;
         auto mul_1_constant =
-            std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{std::sqrt(2.0f / pi)});
-        auto mul_1 = std::make_shared<opset9::Multiply>(add_0, mul_1_constant);
+            std::make_shared<ov::op::v0::Constant>(element::f32,
+                                                   Shape{1},
+                                                   std::vector<float>{static_cast<float>(std::sqrt(2.0 / M_PI))});
+        auto mul_1 = std::make_shared<ov::op::v1::Multiply>(add_0, mul_1_constant);
 
-        auto tanh = std::make_shared<opset9::Tanh>(mul_1);
+        auto tanh = std::make_shared<ov::op::v0::Tanh>(mul_1);
 
-        auto add_1_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
-        auto add_1 = std::make_shared<opset9::Add>(tanh, add_1_constant);
+        auto add_1_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
+        auto add_1 = std::make_shared<ov::op::v1::Add>(tanh, add_1_constant);
 
-        auto mul_2_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
-        auto mul_2 = std::make_shared<opset9::Multiply>(add_1, mul_2_constant);
+        auto mul_2_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
+        auto mul_2 = std::make_shared<ov::op::v1::Multiply>(add_1, mul_2_constant);
 
-        auto mul_3 = std::make_shared<opset9::Multiply>(input, mul_2);
+        auto mul_3 = std::make_shared<ov::op::v1::Multiply>(input, mul_2);
 
         model = std::make_shared<Model>(NodeVector{mul_3}, ParameterVector{input});
         manager.register_pass<ov::pass::GeluFusionWithTanh>();
     }
 
     {
-        auto data = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
-        auto gelu = std::make_shared<opset9::Gelu>(data, op::GeluApproximationMode::TANH);
+        auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
+        auto gelu = std::make_shared<ov::op::v7::Gelu>(data, op::GeluApproximationMode::TANH);
         model_ref = std::make_shared<Model>(NodeVector{gelu}, ParameterVector{data});
     }
 }
 
 TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_params_no_conversion) {
     {
-        auto input = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
-        auto pow_param = std::make_shared<opset9::Parameter>(element::f32, Shape{1});
-        auto pow = std::make_shared<opset9::Power>(input, pow_param);
-        auto mul_0_param = std::make_shared<opset9::Parameter>(element::f32, Shape{1});
-        auto mul_0 = std::make_shared<opset9::Multiply>(pow, mul_0_param);
-        auto add_0 = std::make_shared<opset9::Add>(input, mul_0);
+        auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
+        auto pow_param = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1});
+        auto pow = std::make_shared<ov::op::v1::Power>(input, pow_param);
+        auto mul_0_param = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1});
+        auto mul_0 = std::make_shared<ov::op::v1::Multiply>(pow, mul_0_param);
+        auto add_0 = std::make_shared<ov::op::v1::Add>(input, mul_0);
 
-        auto mul_1_param = std::make_shared<opset9::Parameter>(element::f32, Shape{1});
-        auto mul_1 = std::make_shared<opset9::Multiply>(add_0, mul_1_param);
+        auto mul_1_param = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1});
+        auto mul_1 = std::make_shared<ov::op::v1::Multiply>(add_0, mul_1_param);
 
-        auto tanh = std::make_shared<opset9::Tanh>(mul_1);
+        auto tanh = std::make_shared<ov::op::v0::Tanh>(mul_1);
 
-        auto add_1_param = std::make_shared<opset9::Parameter>(element::f32, Shape{1});
-        auto add_1 = std::make_shared<opset9::Add>(tanh, add_1_param);
+        auto add_1_param = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1});
+        auto add_1 = std::make_shared<ov::op::v1::Add>(tanh, add_1_param);
 
-        auto mul_2_param = std::make_shared<opset9::Parameter>(element::f32, Shape{1});
-        auto mul_2 = std::make_shared<opset9::Multiply>(add_1, mul_2_param);
+        auto mul_2_param = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1});
+        auto mul_2 = std::make_shared<ov::op::v1::Multiply>(add_1, mul_2_param);
 
-        auto mul_3 = std::make_shared<opset9::Multiply>(input, mul_2);
+        auto mul_3 = std::make_shared<ov::op::v1::Multiply>(input, mul_2);
 
         model = std::make_shared<Model>(
             NodeVector{mul_3},
@@ -342,63 +351,67 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_params_no_conversion) {
 
 TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_pow_value) {
     {
-        auto input = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
+        auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
         auto pow_constant =
-            std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f + 1.0e-8f});
-        auto pow = std::make_shared<opset9::Power>(input, pow_constant);
-        auto mul_0_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
-        auto mul_0 = std::make_shared<opset9::Multiply>(pow, mul_0_constant);
-        auto add_0 = std::make_shared<opset9::Add>(input, mul_0);
+            std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f + 1.0e-8f});
+        auto pow = std::make_shared<ov::op::v1::Power>(input, pow_constant);
+        auto mul_0_constant =
+            std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
+        auto mul_0 = std::make_shared<ov::op::v1::Multiply>(pow, mul_0_constant);
+        auto add_0 = std::make_shared<ov::op::v1::Add>(input, mul_0);
 
-        constexpr float pi = 3.141592653589793238462643383279502884f;
         auto mul_1_constant =
-            std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{std::sqrt(2.0f / pi)});
-        auto mul_1 = std::make_shared<opset9::Multiply>(add_0, mul_1_constant);
+            std::make_shared<ov::op::v0::Constant>(element::f32,
+                                                   Shape{1},
+                                                   std::vector<float>{static_cast<float>(std::sqrt(2.0 / M_PI))});
+        auto mul_1 = std::make_shared<ov::op::v1::Multiply>(add_0, mul_1_constant);
 
-        auto tanh = std::make_shared<opset9::Tanh>(mul_1);
+        auto tanh = std::make_shared<ov::op::v0::Tanh>(mul_1);
 
-        auto add_1_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
-        auto add_1 = std::make_shared<opset9::Add>(tanh, add_1_constant);
+        auto add_1_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
+        auto add_1 = std::make_shared<ov::op::v1::Add>(tanh, add_1_constant);
 
-        auto mul_2_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
-        auto mul_2 = std::make_shared<opset9::Multiply>(add_1, mul_2_constant);
+        auto mul_2_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
+        auto mul_2 = std::make_shared<ov::op::v1::Multiply>(add_1, mul_2_constant);
 
-        auto mul_3 = std::make_shared<opset9::Multiply>(input, mul_2);
+        auto mul_3 = std::make_shared<ov::op::v1::Multiply>(input, mul_2);
 
         model = std::make_shared<Model>(NodeVector{mul_3}, ParameterVector{input});
         manager.register_pass<ov::pass::GeluFusionWithTanh>();
     }
 
     {
-        auto data = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
-        auto gelu = std::make_shared<opset9::Gelu>(data, op::GeluApproximationMode::TANH);
+        auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
+        auto gelu = std::make_shared<ov::op::v7::Gelu>(data, op::GeluApproximationMode::TANH);
         model_ref = std::make_shared<Model>(NodeVector{gelu}, ParameterVector{data});
     }
 }
 
 TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_pow_value) {
     {
-        auto input = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
-        auto pow_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{2.0f});
-        auto pow = std::make_shared<opset9::Power>(input, pow_constant);
-        auto mul_0_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
-        auto mul_0 = std::make_shared<opset9::Multiply>(pow, mul_0_constant);
-        auto add_0 = std::make_shared<opset9::Add>(input, mul_0);
+        auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
+        auto pow_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{2.0f});
+        auto pow = std::make_shared<ov::op::v1::Power>(input, pow_constant);
+        auto mul_0_constant =
+            std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
+        auto mul_0 = std::make_shared<ov::op::v1::Multiply>(pow, mul_0_constant);
+        auto add_0 = std::make_shared<ov::op::v1::Add>(input, mul_0);
 
-        constexpr float pi = 3.141592653589793238462643383279502884f;
         auto mul_1_constant =
-            std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{std::sqrt(2.0f / pi)});
-        auto mul_1 = std::make_shared<opset9::Multiply>(add_0, mul_1_constant);
+            std::make_shared<ov::op::v0::Constant>(element::f32,
+                                                   Shape{1},
+                                                   std::vector<float>{static_cast<float>(std::sqrt(2.0 / M_PI))});
+        auto mul_1 = std::make_shared<ov::op::v1::Multiply>(add_0, mul_1_constant);
 
-        auto tanh = std::make_shared<opset9::Tanh>(mul_1);
+        auto tanh = std::make_shared<ov::op::v0::Tanh>(mul_1);
 
-        auto add_1_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
-        auto add_1 = std::make_shared<opset9::Add>(tanh, add_1_constant);
+        auto add_1_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
+        auto add_1 = std::make_shared<ov::op::v1::Add>(tanh, add_1_constant);
 
-        auto mul_2_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
-        auto mul_2 = std::make_shared<opset9::Multiply>(add_1, mul_2_constant);
+        auto mul_2_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
+        auto mul_2 = std::make_shared<ov::op::v1::Multiply>(add_1, mul_2_constant);
 
-        auto mul_3 = std::make_shared<opset9::Multiply>(input, mul_2);
+        auto mul_3 = std::make_shared<ov::op::v1::Multiply>(input, mul_2);
 
         model = std::make_shared<Model>(NodeVector{mul_3}, ParameterVector{input});
         manager.register_pass<ov::pass::GeluFusionWithTanh>();
@@ -407,62 +420,66 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_pow_value) {
 
 TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_mul_0_value) {
     {
-        auto input = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
-        auto pow_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
-        auto pow = std::make_shared<opset9::Power>(input, pow_constant);
-        auto mul_0_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.04515f});
-        auto mul_0 = std::make_shared<opset9::Multiply>(pow, mul_0_constant);
-        auto add_0 = std::make_shared<opset9::Add>(input, mul_0);
+        auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
+        auto pow_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
+        auto pow = std::make_shared<ov::op::v1::Power>(input, pow_constant);
+        auto mul_0_constant =
+            std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.04515f});
+        auto mul_0 = std::make_shared<ov::op::v1::Multiply>(pow, mul_0_constant);
+        auto add_0 = std::make_shared<ov::op::v1::Add>(input, mul_0);
 
-        constexpr float pi = 3.141592653589793238462643383279502884f;
         auto mul_1_constant =
-            std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{std::sqrt(2.0f / pi)});
-        auto mul_1 = std::make_shared<opset9::Multiply>(add_0, mul_1_constant);
+            std::make_shared<ov::op::v0::Constant>(element::f32,
+                                                   Shape{1},
+                                                   std::vector<float>{static_cast<float>(std::sqrt(2.0 / M_PI))});
+        auto mul_1 = std::make_shared<ov::op::v1::Multiply>(add_0, mul_1_constant);
 
-        auto tanh = std::make_shared<opset9::Tanh>(mul_1);
+        auto tanh = std::make_shared<ov::op::v0::Tanh>(mul_1);
 
-        auto add_1_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
-        auto add_1 = std::make_shared<opset9::Add>(tanh, add_1_constant);
+        auto add_1_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
+        auto add_1 = std::make_shared<ov::op::v1::Add>(tanh, add_1_constant);
 
-        auto mul_2_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
-        auto mul_2 = std::make_shared<opset9::Multiply>(add_1, mul_2_constant);
+        auto mul_2_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
+        auto mul_2 = std::make_shared<ov::op::v1::Multiply>(add_1, mul_2_constant);
 
-        auto mul_3 = std::make_shared<opset9::Multiply>(input, mul_2);
+        auto mul_3 = std::make_shared<ov::op::v1::Multiply>(input, mul_2);
 
         model = std::make_shared<Model>(NodeVector{mul_3}, ParameterVector{input});
         manager.register_pass<ov::pass::GeluFusionWithTanh>();
     }
 
     {
-        auto data = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
-        auto gelu = std::make_shared<opset9::Gelu>(data, op::GeluApproximationMode::TANH);
+        auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
+        auto gelu = std::make_shared<ov::op::v7::Gelu>(data, op::GeluApproximationMode::TANH);
         model_ref = std::make_shared<Model>(NodeVector{gelu}, ParameterVector{data});
     }
 }
 
 TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_mul_0_value) {
     {
-        auto input = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
-        auto pow_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
-        auto pow = std::make_shared<opset9::Power>(input, pow_constant);
-        auto mul_0_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{1.4715f});
-        auto mul_0 = std::make_shared<opset9::Multiply>(pow, mul_0_constant);
-        auto add_0 = std::make_shared<opset9::Add>(input, mul_0);
+        auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
+        auto pow_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
+        auto pow = std::make_shared<ov::op::v1::Power>(input, pow_constant);
+        auto mul_0_constant =
+            std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{1.4715f});
+        auto mul_0 = std::make_shared<ov::op::v1::Multiply>(pow, mul_0_constant);
+        auto add_0 = std::make_shared<ov::op::v1::Add>(input, mul_0);
 
-        constexpr float pi = 3.141592653589793238462643383279502884f;
         auto mul_1_constant =
-            std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{std::sqrt(2.0f / pi)});
-        auto mul_1 = std::make_shared<opset9::Multiply>(add_0, mul_1_constant);
+            std::make_shared<ov::op::v0::Constant>(element::f32,
+                                                   Shape{1},
+                                                   std::vector<float>{static_cast<float>(std::sqrt(2.0 / M_PI))});
+        auto mul_1 = std::make_shared<ov::op::v1::Multiply>(add_0, mul_1_constant);
 
-        auto tanh = std::make_shared<opset9::Tanh>(mul_1);
+        auto tanh = std::make_shared<ov::op::v0::Tanh>(mul_1);
 
-        auto add_1_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
-        auto add_1 = std::make_shared<opset9::Add>(tanh, add_1_constant);
+        auto add_1_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
+        auto add_1 = std::make_shared<ov::op::v1::Add>(tanh, add_1_constant);
 
-        auto mul_2_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
-        auto mul_2 = std::make_shared<opset9::Multiply>(add_1, mul_2_constant);
+        auto mul_2_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
+        auto mul_2 = std::make_shared<ov::op::v1::Multiply>(add_1, mul_2_constant);
 
-        auto mul_3 = std::make_shared<opset9::Multiply>(input, mul_2);
+        auto mul_3 = std::make_shared<ov::op::v1::Multiply>(input, mul_2);
 
         model = std::make_shared<Model>(NodeVector{mul_3}, ParameterVector{input});
         manager.register_pass<ov::pass::GeluFusionWithTanh>();
@@ -471,61 +488,64 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_mul_0_value) {
 
 TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_mul_1_value) {
     {
-        auto input = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
-        auto pow_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
-        auto pow = std::make_shared<opset9::Power>(input, pow_constant);
-        auto mul_0_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
-        auto mul_0 = std::make_shared<opset9::Multiply>(pow, mul_0_constant);
-        auto add_0 = std::make_shared<opset9::Add>(input, mul_0);
+        auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
+        auto pow_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
+        auto pow = std::make_shared<ov::op::v1::Power>(input, pow_constant);
+        auto mul_0_constant =
+            std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
+        auto mul_0 = std::make_shared<ov::op::v1::Multiply>(pow, mul_0_constant);
+        auto add_0 = std::make_shared<ov::op::v1::Add>(input, mul_0);
 
         auto mul_1_constant =
-            std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.7980868f});
-        auto mul_1 = std::make_shared<opset9::Multiply>(add_0, mul_1_constant);
+            std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.7980868f});
+        auto mul_1 = std::make_shared<ov::op::v1::Multiply>(add_0, mul_1_constant);
 
-        auto tanh = std::make_shared<opset9::Tanh>(mul_1);
+        auto tanh = std::make_shared<ov::op::v0::Tanh>(mul_1);
 
-        auto add_1_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
-        auto add_1 = std::make_shared<opset9::Add>(tanh, add_1_constant);
+        auto add_1_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
+        auto add_1 = std::make_shared<ov::op::v1::Add>(tanh, add_1_constant);
 
-        auto mul_2_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
-        auto mul_2 = std::make_shared<opset9::Multiply>(add_1, mul_2_constant);
+        auto mul_2_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
+        auto mul_2 = std::make_shared<ov::op::v1::Multiply>(add_1, mul_2_constant);
 
-        auto mul_3 = std::make_shared<opset9::Multiply>(input, mul_2);
+        auto mul_3 = std::make_shared<ov::op::v1::Multiply>(input, mul_2);
 
         model = std::make_shared<Model>(NodeVector{mul_3}, ParameterVector{input});
         manager.register_pass<ov::pass::GeluFusionWithTanh>();
     }
 
     {
-        auto data = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
-        auto gelu = std::make_shared<opset9::Gelu>(data, op::GeluApproximationMode::TANH);
+        auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
+        auto gelu = std::make_shared<ov::op::v7::Gelu>(data, op::GeluApproximationMode::TANH);
         model_ref = std::make_shared<Model>(NodeVector{gelu}, ParameterVector{data});
     }
 }
 
 TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_mul_1_value) {
     {
-        auto input = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
-        auto pow_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
-        auto pow = std::make_shared<opset9::Power>(input, pow_constant);
-        auto mul_0_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
-        auto mul_0 = std::make_shared<opset9::Multiply>(pow, mul_0_constant);
-        auto add_0 = std::make_shared<opset9::Add>(input, mul_0);
+        auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
+        auto pow_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
+        auto pow = std::make_shared<ov::op::v1::Power>(input, pow_constant);
+        auto mul_0_constant =
+            std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
+        auto mul_0 = std::make_shared<ov::op::v1::Multiply>(pow, mul_0_constant);
+        auto add_0 = std::make_shared<ov::op::v1::Add>(input, mul_0);
 
-        constexpr float pi = 3.141592653589793238462643383279502884f;
         auto mul_1_constant =
-            std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{std::sqrt(10.0f / pi)});
-        auto mul_1 = std::make_shared<opset9::Multiply>(add_0, mul_1_constant);
+            std::make_shared<ov::op::v0::Constant>(element::f32,
+                                                   Shape{1},
+                                                   std::vector<float>{static_cast<float>(std::sqrt(10.0 / M_PI))});
+        auto mul_1 = std::make_shared<ov::op::v1::Multiply>(add_0, mul_1_constant);
 
-        auto tanh = std::make_shared<opset9::Tanh>(mul_1);
+        auto tanh = std::make_shared<ov::op::v0::Tanh>(mul_1);
 
-        auto add_1_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
-        auto add_1 = std::make_shared<opset9::Add>(tanh, add_1_constant);
+        auto add_1_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
+        auto add_1 = std::make_shared<ov::op::v1::Add>(tanh, add_1_constant);
 
-        auto mul_2_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
-        auto mul_2 = std::make_shared<opset9::Multiply>(add_1, mul_2_constant);
+        auto mul_2_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
+        auto mul_2 = std::make_shared<ov::op::v1::Multiply>(add_1, mul_2_constant);
 
-        auto mul_3 = std::make_shared<opset9::Multiply>(input, mul_2);
+        auto mul_3 = std::make_shared<ov::op::v1::Multiply>(input, mul_2);
 
         model = std::make_shared<Model>(NodeVector{mul_3}, ParameterVector{input});
         manager.register_pass<ov::pass::GeluFusionWithTanh>();
@@ -534,63 +554,67 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_mul_1_value) {
 
 TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_add_1_value) {
     {
-        auto input = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
-        auto pow_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
-        auto pow = std::make_shared<opset9::Power>(input, pow_constant);
-        auto mul_0_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
-        auto mul_0 = std::make_shared<opset9::Multiply>(pow, mul_0_constant);
-        auto add_0 = std::make_shared<opset9::Add>(input, mul_0);
+        auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
+        auto pow_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
+        auto pow = std::make_shared<ov::op::v1::Power>(input, pow_constant);
+        auto mul_0_constant =
+            std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
+        auto mul_0 = std::make_shared<ov::op::v1::Multiply>(pow, mul_0_constant);
+        auto add_0 = std::make_shared<ov::op::v1::Add>(input, mul_0);
 
-        constexpr float pi = 3.141592653589793238462643383279502884f;
         auto mul_1_constant =
-            std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{std::sqrt(2.0f / pi)});
-        auto mul_1 = std::make_shared<opset9::Multiply>(add_0, mul_1_constant);
+            std::make_shared<ov::op::v0::Constant>(element::f32,
+                                                   Shape{1},
+                                                   std::vector<float>{static_cast<float>(std::sqrt(2.0 / M_PI))});
+        auto mul_1 = std::make_shared<ov::op::v1::Multiply>(add_0, mul_1_constant);
 
-        auto tanh = std::make_shared<opset9::Tanh>(mul_1);
+        auto tanh = std::make_shared<ov::op::v0::Tanh>(mul_1);
 
         auto add_1_constant =
-            std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f + 1.0e-8f});
-        auto add_1 = std::make_shared<opset9::Add>(tanh, add_1_constant);
+            std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f + 1.0e-8f});
+        auto add_1 = std::make_shared<ov::op::v1::Add>(tanh, add_1_constant);
 
-        auto mul_2_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
-        auto mul_2 = std::make_shared<opset9::Multiply>(add_1, mul_2_constant);
+        auto mul_2_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
+        auto mul_2 = std::make_shared<ov::op::v1::Multiply>(add_1, mul_2_constant);
 
-        auto mul_3 = std::make_shared<opset9::Multiply>(input, mul_2);
+        auto mul_3 = std::make_shared<ov::op::v1::Multiply>(input, mul_2);
 
         model = std::make_shared<Model>(NodeVector{mul_3}, ParameterVector{input});
         manager.register_pass<ov::pass::GeluFusionWithTanh>();
     }
 
     {
-        auto data = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
-        auto gelu = std::make_shared<opset9::Gelu>(data, op::GeluApproximationMode::TANH);
+        auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
+        auto gelu = std::make_shared<ov::op::v7::Gelu>(data, op::GeluApproximationMode::TANH);
         model_ref = std::make_shared<Model>(NodeVector{gelu}, ParameterVector{data});
     }
 }
 
 TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_add_1_value) {
     {
-        auto input = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
-        auto pow_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
-        auto pow = std::make_shared<opset9::Power>(input, pow_constant);
-        auto mul_0_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
-        auto mul_0 = std::make_shared<opset9::Multiply>(pow, mul_0_constant);
-        auto add_0 = std::make_shared<opset9::Add>(input, mul_0);
+        auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
+        auto pow_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
+        auto pow = std::make_shared<ov::op::v1::Power>(input, pow_constant);
+        auto mul_0_constant =
+            std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
+        auto mul_0 = std::make_shared<ov::op::v1::Multiply>(pow, mul_0_constant);
+        auto add_0 = std::make_shared<ov::op::v1::Add>(input, mul_0);
 
-        constexpr float pi = 3.141592653589793238462643383279502884f;
         auto mul_1_constant =
-            std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{std::sqrt(2.0f / pi)});
-        auto mul_1 = std::make_shared<opset9::Multiply>(add_0, mul_1_constant);
+            std::make_shared<ov::op::v0::Constant>(element::f32,
+                                                   Shape{1},
+                                                   std::vector<float>{static_cast<float>(std::sqrt(2.0 / M_PI))});
+        auto mul_1 = std::make_shared<ov::op::v1::Multiply>(add_0, mul_1_constant);
 
-        auto tanh = std::make_shared<opset9::Tanh>(mul_1);
+        auto tanh = std::make_shared<ov::op::v0::Tanh>(mul_1);
 
-        auto add_1_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{2.0f});
-        auto add_1 = std::make_shared<opset9::Add>(tanh, add_1_constant);
+        auto add_1_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{2.0f});
+        auto add_1 = std::make_shared<ov::op::v1::Add>(tanh, add_1_constant);
 
-        auto mul_2_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
-        auto mul_2 = std::make_shared<opset9::Multiply>(add_1, mul_2_constant);
+        auto mul_2_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f});
+        auto mul_2 = std::make_shared<ov::op::v1::Multiply>(add_1, mul_2_constant);
 
-        auto mul_3 = std::make_shared<opset9::Multiply>(input, mul_2);
+        auto mul_3 = std::make_shared<ov::op::v1::Multiply>(input, mul_2);
 
         model = std::make_shared<Model>(NodeVector{mul_3}, ParameterVector{input});
         manager.register_pass<ov::pass::GeluFusionWithTanh>();
@@ -599,65 +623,110 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_add_1_value) {
 
 TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_mul_2_value) {
     {
-        auto input = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
-        auto pow_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
-        auto pow = std::make_shared<opset9::Power>(input, pow_constant);
-        auto mul_0_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
-        auto mul_0 = std::make_shared<opset9::Multiply>(pow, mul_0_constant);
-        auto add_0 = std::make_shared<opset9::Add>(input, mul_0);
+        auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
+        auto pow_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
+        auto pow = std::make_shared<ov::op::v1::Power>(input, pow_constant);
+        auto mul_0_constant =
+            std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
+        auto mul_0 = std::make_shared<ov::op::v1::Multiply>(pow, mul_0_constant);
+        auto add_0 = std::make_shared<ov::op::v1::Add>(input, mul_0);
 
-        constexpr float pi = 3.141592653589793238462643383279502884f;
         auto mul_1_constant =
-            std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{std::sqrt(2.0f / pi)});
-        auto mul_1 = std::make_shared<opset9::Multiply>(add_0, mul_1_constant);
+            std::make_shared<ov::op::v0::Constant>(element::f32,
+                                                   Shape{1},
+                                                   std::vector<float>{static_cast<float>(std::sqrt(2.0 / M_PI))});
+        auto mul_1 = std::make_shared<ov::op::v1::Multiply>(add_0, mul_1_constant);
 
-        auto tanh = std::make_shared<opset9::Tanh>(mul_1);
+        auto tanh = std::make_shared<ov::op::v0::Tanh>(mul_1);
 
-        auto add_1_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
-        auto add_1 = std::make_shared<opset9::Add>(tanh, add_1_constant);
+        auto add_1_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
+        auto add_1 = std::make_shared<ov::op::v1::Add>(tanh, add_1_constant);
 
         auto mul_2_constant =
-            std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f + 1.0e-8f});
-        auto mul_2 = std::make_shared<opset9::Multiply>(add_1, mul_2_constant);
+            std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.5f + 1.0e-8f});
+        auto mul_2 = std::make_shared<ov::op::v1::Multiply>(add_1, mul_2_constant);
 
-        auto mul_3 = std::make_shared<opset9::Multiply>(input, mul_2);
+        auto mul_3 = std::make_shared<ov::op::v1::Multiply>(input, mul_2);
 
         model = std::make_shared<Model>(NodeVector{mul_3}, ParameterVector{input});
         manager.register_pass<ov::pass::GeluFusionWithTanh>();
     }
 
     {
-        auto data = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
-        auto gelu = std::make_shared<opset9::Gelu>(data, op::GeluApproximationMode::TANH);
+        auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
+        auto gelu = std::make_shared<ov::op::v7::Gelu>(data, op::GeluApproximationMode::TANH);
         model_ref = std::make_shared<Model>(NodeVector{gelu}, ParameterVector{data});
     }
 }
 
 TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_mul_2_value) {
     {
-        auto input = std::make_shared<opset9::Parameter>(element::f32, Shape{2, 2});
-        auto pow_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
-        auto pow = std::make_shared<opset9::Power>(input, pow_constant);
-        auto mul_0_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
-        auto mul_0 = std::make_shared<opset9::Multiply>(pow, mul_0_constant);
-        auto add_0 = std::make_shared<opset9::Add>(input, mul_0);
+        auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{2, 2});
+        auto pow_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{3.0f});
+        auto pow = std::make_shared<ov::op::v1::Power>(input, pow_constant);
+        auto mul_0_constant =
+            std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{0.044715f});
+        auto mul_0 = std::make_shared<ov::op::v1::Multiply>(pow, mul_0_constant);
+        auto add_0 = std::make_shared<ov::op::v1::Add>(input, mul_0);
 
-        constexpr float pi = 3.141592653589793238462643383279502884f;
         auto mul_1_constant =
-            std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{std::sqrt(2.0f / pi)});
-        auto mul_1 = std::make_shared<opset9::Multiply>(add_0, mul_1_constant);
+            std::make_shared<ov::op::v0::Constant>(element::f32,
+                                                   Shape{1},
+                                                   std::vector<float>{static_cast<float>(std::sqrt(2.0 / M_PI))});
+        auto mul_1 = std::make_shared<ov::op::v1::Multiply>(add_0, mul_1_constant);
 
-        auto tanh = std::make_shared<opset9::Tanh>(mul_1);
+        auto tanh = std::make_shared<ov::op::v0::Tanh>(mul_1);
 
-        auto add_1_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
-        auto add_1 = std::make_shared<opset9::Add>(tanh, add_1_constant);
+        auto add_1_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{1.0f});
+        auto add_1 = std::make_shared<ov::op::v1::Add>(tanh, add_1_constant);
 
-        auto mul_2_constant = std::make_shared<opset9::Constant>(element::f32, Shape{1}, std::vector<float>{5.0f});
-        auto mul_2 = std::make_shared<opset9::Multiply>(add_1, mul_2_constant);
+        auto mul_2_constant = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{5.0f});
+        auto mul_2 = std::make_shared<ov::op::v1::Multiply>(add_1, mul_2_constant);
 
-        auto mul_3 = std::make_shared<opset9::Multiply>(input, mul_2);
+        auto mul_3 = std::make_shared<ov::op::v1::Multiply>(input, mul_2);
 
         model = std::make_shared<Model>(NodeVector{mul_3}, ParameterVector{input});
         manager.register_pass<ov::pass::GeluFusionWithTanh>();
     }
 }
+
+TEST_F(TransformationTestsF, FoldGeluOperation) {
+    {
+        auto param = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1006, 2, 100, 3, 4096});
+        auto const1 = ov::op::v0::Constant::create(element::f32, Shape{1, 1, 1}, std::vector<float>{0.044715f});
+
+        auto mul1 = std::make_shared<ov::op::v1::Multiply>(param, const1);
+        auto mul2 = std::make_shared<ov::op::v1::Multiply>(mul1, param);
+
+        auto const2 = ov::op::v0::Constant::create(element::f32, Shape{1, 1, 1}, std::vector<float>{1.0});
+        auto add1 = std::make_shared<ov::op::v1::Add>(const2, mul2);
+
+        auto const3 = ov::op::v0::Constant::create(element::f32,
+                                                   Shape{1, 1, 1},
+                                                   std::vector<float>{static_cast<float>(std::sqrt(2.0 / M_PI))});
+        auto mul3 = std::make_shared<ov::op::v1::Multiply>(param, const3);
+
+        auto mul4 = std::make_shared<ov::op::v1::Multiply>(add1, mul3);
+        auto tan = std::make_shared<ov::op::v0::Tanh>(mul4);
+
+        auto const4 = ov::op::v0::Constant::create(element::f32, Shape{1, 1, 1}, std::vector<float>{1.0});
+        auto add2 = std::make_shared<ov::op::v1::Add>(tan, const4);
+
+        auto const5 = ov::op::v0::Constant::create(element::f32, Shape{1, 1, 1}, std::vector<float>{0.5});
+        auto mul5 = std::make_shared<ov::op::v1::Multiply>(param, const5);
+
+        auto mul6 = std::make_shared<ov::op::v1::Multiply>(add2, mul5);
+
+        auto result = std::make_shared<ov::op::v0::Result>(mul6);
+        model = std::make_shared<Model>(NodeVector{result}, ParameterVector{param});
+
+        manager.register_pass<ov::pass::GeluFusionWithTanhNoPower>();
+    }
+
+    {
+        auto param = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1006, 2, 100, 3, 4096});
+        auto gelu = std::make_shared<ov::op::v7::Gelu>(param, ov::op::GeluApproximationMode::TANH);
+        auto result = std::make_shared<ov::op::v0::Result>(gelu);
+        model_ref = std::make_shared<Model>(NodeVector{result}, ParameterVector{param});
+    }
+}
diff --git a/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp b/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp
index 698973740e08e6..b0e327e4d4bad4 100644
--- a/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp
+++ b/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp
@@ -433,3 +433,74 @@ TEST_F(SharedTransformationTestsF, SharedShapeOfTestMixed) {
         model_ref = std::make_shared<Model>(NodeVector{concat}, ParameterVector{input});
     }
 }
+
+namespace {
+OutputVector createShapeNodesInMemory(const std::vector<size_t>& node_order_in_memory,
+                                      std::shared_ptr<void>& memory,
+                                      const std::string& node_name_prefix,
+                                      const std::shared_ptr<Node>& input,
+                                      element::Type output_type) {
+    OutputVector outputs;
+    memory.reset(::malloc(node_order_in_memory.size() * sizeof(v3::ShapeOf)), ::free);
+    for (size_t i = 0; i < node_order_in_memory.size(); ++i) {
+        v3::ShapeOf* node_addr = static_cast<v3::ShapeOf*>(memory.get()) + node_order_in_memory[i];
+        auto node_ptr =
+            std::shared_ptr<v3::ShapeOf>(new (node_addr) v3::ShapeOf(input, output_type), [](v3::ShapeOf* node) {
+                node->v3::ShapeOf::~ShapeOf();
+            });
+        std::stringstream ss;
+        ss << node_name_prefix << i;
+        node_ptr->set_friendly_name(ss.str());
+        outputs.push_back(node_ptr->output(0));
+    }
+
+    return outputs;
+}
+
+std::shared_ptr<Model> createModelWithShapes(const Shape& input_shape,
+                                             const std::vector<size_t>& node_order_in_memory,
+                                             const std::string& node_name_prefix,
+                                             std::shared_ptr<void>& buffer) {
+    auto input = std::make_shared<v0::Parameter>(element::f32, input_shape);
+    auto shape_nodes = createShapeNodesInMemory(node_order_in_memory, buffer, node_name_prefix, input, element::i64);
+
+    NodeVector inputs_of_concat;
+    for (const auto& shape_node : shape_nodes) {
+        auto node = std::make_shared<v0::Convert>(shape_node, element::i64);
+        inputs_of_concat.push_back(node);
+    }
+
+    auto concat = std::make_shared<v0::Concat>(inputs_of_concat, 0);
+    return std::make_shared<Model>(NodeVector{concat}, ParameterVector{input});
+}
+}  // namespace
+
+/**
+ * @brief Check that node address is not influenced on the transformation result
+ */
+TEST(TransformationTests, SharedShapeOfTestRandomOrder) {
+    Shape input_shape{120, 4};
+    std::shared_ptr<void> buffer;
+    // nodes are placed into pre-allocated memory in order that is specified in next variable
+    std::vector<std::vector<size_t>> node_orders_in_memory = {{0, 1}, {1, 0}};
+
+    std::vector<std::shared_ptr<Model>> models;
+    for (const auto& node_order_in_memory : node_orders_in_memory) {
+        auto model = createModelWithShapes(input_shape, node_order_in_memory, "Shape_", buffer);
+
+        ov::pass::Manager manager;
+        manager.register_pass<pass::SharedOpOptimization>();
+        manager.run_passes(model);
+
+        const auto model_ops = model->get_ops();
+        const auto op_it = std::find_if(model_ops.begin(), model_ops.end(), [](const std::shared_ptr<Node>& node) {
+            return node->get_friendly_name() == "Shape_0";
+        });
+        ASSERT_TRUE(op_it != model_ops.end()) << "node Shape_0 is not found in model";
+        // we need to clone while memory will be reused on the next iteration for the new model
+        models.push_back(model->clone());
+    }
+
+    FunctionsComparator comparator = FunctionsComparator::with_default();
+    comparator.compare(models[0], models[1]);
+}
diff --git a/src/common/transformations/tests/utils/compress_quantize_weights.cpp b/src/common/transformations/tests/utils/compress_quantize_weights.cpp
index cc31017368863f..15d07188f805f2 100644
--- a/src/common/transformations/tests/utils/compress_quantize_weights.cpp
+++ b/src/common/transformations/tests/utils/compress_quantize_weights.cpp
@@ -232,6 +232,30 @@ TEST_F(TransformationTestsF, CompressQuantizeWeightsWithZeroPointEliminated) {
     comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
 }
 
+TEST_F(TransformationTestsF, CompressQuantizeWeightsWithZeroPointEliminatedZeroScale) {
+    {
+        auto data = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {-0.144816, 0.0858578, 0.110928});
+        auto input_low = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {-0.402659, -0.383148, -0.34054});
+        auto input_high = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {0.399513, 0.380155, 0.33788});
+        auto output_low = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {-0.402659, 0.0, -0.34054});
+        auto output_high = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {0.399513, 0.0, 0.33788});
+        auto fq = std::make_shared<opset8::FakeQuantize>(data, input_low, input_high, output_low, output_high, 256);
+        model = std::make_shared<Model>(NodeVector{fq}, ParameterVector{});
+
+        manager.register_pass<ov::pass::CompressQuantizeWeights>();
+    }
+
+    {
+        auto data = opset8::Constant::create(element::i8, Shape{3, 1, 1, 1}, {-46, 29, 42});
+        auto convert = std::make_shared<opset8::Convert>(data, element::f32);
+        auto scale = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {0.00314577, 0.0, 0.00266047});
+        auto mul = std::make_shared<opset8::Multiply>(convert, scale);
+        model_ref = std::make_shared<Model>(NodeVector{mul}, ParameterVector{});
+    }
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
+}
+
 TEST_F(TransformationTestsF, CompressQuantizeWeightsWithZeroPointEliminatedFP16) {
     {
         auto data = opset8::Constant::create(element::f16, Shape{3, 1, 1, 1}, {0.2, 1.2, 1.2});
diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp
index 6c0da965f9bfad..1dac080461d16b 100644
--- a/src/common/transformations/tests/utils/convert_precision.cpp
+++ b/src/common/transformations/tests/utils/convert_precision.cpp
@@ -1188,7 +1188,7 @@ void constant_convert_test(element::Type type_from,
     }
     ASSERT_TRUE(actual.size() >= expected.size());
     for (size_t i = 0; i < expected.size(); i++) {
-        ASSERT_EQ(expected[i], actual[i]);
+        EXPECT_EQ(expected[i], actual[i]) << "Elements with index " << i << " are not equal.";
     }
 }
 
@@ -1378,7 +1378,7 @@ TEST(TransformationTests, ConvertPrecision_ConstantConversion_U1ToU4) {
     constant_convert_test<uint8_t, uint8_t>(element::u1,
                                             element::u4,
                                             std::vector<uint8_t>{171},
-                                            {1, 0, 1, 0, 1, 0, 1, 1});
+                                            {0, 1, 0, 1, 0, 1, 1, 1});
 }
 
 TEST(TransformationTests, ConvertPrecision_keep_precission_sensitive_fp32_with_exp) {
diff --git a/src/core/include/openvino/core/type/nf4.hpp b/src/core/dev_api/openvino/core/type/nf4.hpp
similarity index 100%
rename from src/core/include/openvino/core/type/nf4.hpp
rename to src/core/dev_api/openvino/core/type/nf4.hpp
diff --git a/src/core/dev_api/openvino/runtime/aligned_buffer.hpp b/src/core/dev_api/openvino/runtime/aligned_buffer.hpp
new file mode 100644
index 00000000000000..7611744f7c4e58
--- /dev/null
+++ b/src/core/dev_api/openvino/runtime/aligned_buffer.hpp
@@ -0,0 +1,75 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+
+#include "openvino/core/attribute_adapter.hpp"
+#include "openvino/core/core_visibility.hpp"
+
+namespace ov {
+/// \brief Allocates a block of memory on the specified alignment. The actual size of the
+/// allocated memory is larger than the requested size by the alignment, so allocating 1
+/// byte
+/// on 64 byte alignment will allocate 65 bytes.
+class OPENVINO_API AlignedBuffer {
+public:
+    // Allocator objects and the allocation interfaces are owned by the
+    // creators of AlignedBuffers. They need to ensure that the lifetime of
+    // allocator exceeds the lifetime of this AlignedBuffer.
+    AlignedBuffer(size_t byte_size, size_t alignment = 64);
+
+    AlignedBuffer();
+    virtual ~AlignedBuffer();
+
+    AlignedBuffer(AlignedBuffer&& other);
+    AlignedBuffer& operator=(AlignedBuffer&& other);
+
+    size_t size() const {
+        return m_byte_size;
+    }
+    void* get_ptr(size_t offset) const {
+        return m_aligned_buffer + offset;
+    }
+    void* get_ptr() {
+        return m_aligned_buffer;
+    }
+    const void* get_ptr() const {
+        return m_aligned_buffer;
+    }
+    template <typename T>
+    T* get_ptr() {
+        return reinterpret_cast<T*>(m_aligned_buffer);
+    }
+    template <typename T>
+    const T* get_ptr() const {
+        return reinterpret_cast<const T*>(m_aligned_buffer);
+    }
+
+    template <typename T>
+    explicit operator T*() {
+        return get_ptr<T>();
+    }
+
+private:
+    AlignedBuffer(const AlignedBuffer&) = delete;
+    AlignedBuffer& operator=(const AlignedBuffer&) = delete;
+
+protected:
+    char* m_allocated_buffer;
+    char* m_aligned_buffer;
+    size_t m_byte_size;
+};
+
+template <>
+class OPENVINO_API AttributeAdapter<std::shared_ptr<ov::AlignedBuffer>>
+    : public DirectValueAccessor<std::shared_ptr<ov::AlignedBuffer>> {
+public:
+    AttributeAdapter(std::shared_ptr<ov::AlignedBuffer>& value);
+
+    OPENVINO_RTTI("AttributeAdapter<std::shared_ptr<ov::AlignedBuffer>");
+};
+
+}  // namespace ov
diff --git a/src/core/dev_api/openvino/runtime/shared_buffer.hpp b/src/core/dev_api/openvino/runtime/shared_buffer.hpp
new file mode 100644
index 00000000000000..1b51bfa07b7ee3
--- /dev/null
+++ b/src/core/dev_api/openvino/runtime/shared_buffer.hpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/runtime/aligned_buffer.hpp"
+
+namespace ov {
+
+/// \brief SharedBuffer class to store pointer to pre-acclocated buffer.
+template <typename T>
+class SharedBuffer : public ov::AlignedBuffer {
+public:
+    SharedBuffer(char* data, size_t size, const T& shared_object) : _shared_object(shared_object) {
+        m_allocated_buffer = data;
+        m_aligned_buffer = data;
+        m_byte_size = size;
+    }
+
+    virtual ~SharedBuffer() {
+        m_aligned_buffer = nullptr;
+        m_allocated_buffer = nullptr;
+        m_byte_size = 0;
+    }
+
+private:
+    T _shared_object;
+};
+
+}  // namespace ov
diff --git a/src/core/dev_api/validation_util.hpp b/src/core/dev_api/validation_util.hpp
index e93fefd1411eb9..2495fd1029959a 100644
--- a/src/core/dev_api/validation_util.hpp
+++ b/src/core/dev_api/validation_util.hpp
@@ -34,7 +34,7 @@ OPENVINO_API bool are_unique(const std::vector<int64_t>& data);
 ///
 /// \param value  Value to be clipped.
 /// \param min    Minimum value bound.
-/// \param max    Maximum value boiund
+/// \param max    Maximum value bound.
 ///
 /// \return Value if between min, max otherwise min or max.
 OPENVINO_API int64_t clip(const int64_t& value, const int64_t& min, const int64_t& max);
@@ -43,18 +43,21 @@ OPENVINO_API int64_t clip(const int64_t& value, const int64_t& min, const int64_
 ///
 /// \param subgraph sink
 ///
-/// \return Constant node or nullptr if unable to constantfold the subgraph
+/// \return Constant node or nullptr if unable to constant fold the subgraph
 OPENVINO_API std::shared_ptr<op::v0::Constant> constantfold_subgraph(const Output<Node>& subgraph_sink);
 
-/**
- * @brief Runs an estimation of source tensor. If it succeeded to calculate both bounds and
- * they are the same returns Constant operation from the resulting bound, otherwise nullptr.
- *
- * @param source  Node output used to get its tensor data as constant.
- * @return Shared pointer to constant data or nullptr.
- */
+/// \brief Runs an estimation of source tensor. If it succeeded to calculate both bounds and
+/// they are the same returns Constant operation from the resulting bound, otherwise nullptr.
+///
+/// \param source  Node output used to get its tensor data as constant.
+/// \return Shared pointer to constant data or nullptr.
 OPENVINO_API std::shared_ptr<op::v0::Constant> get_constant_from_source(const Output<Node>& source);
 
+/// \brief Make scalar tensor which stores maximum value of ov::element::Type.
+/// \param et  Element type to get its maximum.
+/// \return Tensor with maximum value.
+Tensor make_tensor_of_max_value(const element::Type_t et);
+
 /// \brief Apply auto padding to padding_above and padding_below inputs
 ///        if all needed informations are known.
 ///
diff --git a/src/core/include/ngraph/op/util/op_annotations.hpp b/src/core/include/ngraph/op/util/op_annotations.hpp
index dad2aa3b43431a..dec2879f9c837f 100644
--- a/src/core/include/ngraph/op/util/op_annotations.hpp
+++ b/src/core/include/ngraph/op/util/op_annotations.hpp
@@ -30,7 +30,6 @@ struct NGRAPH_API_DEPRECATED oi_pair {
 };
 
 /// \brief Base class for annotations added to graph ops
-
 class NGRAPH_API_DEPRECATED NGRAPH_API OpAnnotations {
     NGRAPH_SUPPRESS_DEPRECATED_START
 public:
diff --git a/src/core/include/ngraph/op/util/slice_plan.hpp b/src/core/include/ngraph/op/util/slice_plan.hpp
index a0f99cccaed30c..e47e4ecd80e4f0 100644
--- a/src/core/include/ngraph/op/util/slice_plan.hpp
+++ b/src/core/include/ngraph/op/util/slice_plan.hpp
@@ -38,6 +38,7 @@ namespace ngraph {
 //
 // A SlicePlan is used to collect parameters for these ops.
 //
+// This class is moved to dev API
 struct NGRAPH_API_DEPRECATED NGRAPH_API SlicePlan {
     // Parameters for the Slice
     std::vector<int64_t> begins;
diff --git a/src/core/include/ngraph/opsets/opset.hpp b/src/core/include/ngraph/opsets/opset.hpp
index 26c21e237b16c3..3f65437c6d3801 100644
--- a/src/core/include/ngraph/opsets/opset.hpp
+++ b/src/core/include/ngraph/opsets/opset.hpp
@@ -31,9 +31,7 @@ namespace ngraph {
 class NGRAPH_API OpSet : public ov::OpSet {
 public:
     explicit OpSet(const ov::OpSet& opset);
-    NGRAPH_SUPPRESS_DEPRECATED_START
     OpSet(const ngraph::OpSet& opset);
-    NGRAPH_SUPPRESS_DEPRECATED_END
     OpSet() = default;
     /// \brief Insert an op into the opset with a particular name and factory
     void insert(const std::string& name, const NodeTypeInfo& type_info, FactoryRegistry<Node>::Factory factory) {
@@ -56,19 +54,20 @@ class NGRAPH_API OpSet : public ov::OpSet {
     }
 };
 
-const NGRAPH_API OpSet& get_opset1();
-const NGRAPH_API OpSet& get_opset2();
-const NGRAPH_API OpSet& get_opset3();
-const NGRAPH_API OpSet& get_opset4();
-const NGRAPH_API OpSet& get_opset5();
-const NGRAPH_API OpSet& get_opset6();
-const NGRAPH_API OpSet& get_opset7();
-const NGRAPH_API OpSet& get_opset8();
-const NGRAPH_API OpSet& get_opset9();
-const NGRAPH_API OpSet& get_opset10();
-const NGRAPH_API OpSet& get_opset11();
-const NGRAPH_API OpSet& get_opset12();
-const NGRAPH_API OpSet& get_opset13();
-const NGRAPH_API std::map<std::string, std::function<const ngraph::OpSet&()>>& get_available_opsets();
+NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset1();
+NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset2();
+NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset3();
+NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset4();
+NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset5();
+NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset6();
+NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset7();
+NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset8();
+NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset9();
+NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset10();
+NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset11();
+NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset12();
+NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset13();
+NGRAPH_API_DEPRECATED const NGRAPH_API std::map<std::string, std::function<const ngraph::OpSet&()>>&
+get_available_opsets();
 }  // namespace ngraph
 NGRAPH_SUPPRESS_DEPRECATED_END
diff --git a/src/core/include/openvino/core/type/element_type.hpp b/src/core/include/openvino/core/type/element_type.hpp
index 1534e9e0cc8fca..78e200d5035f79 100644
--- a/src/core/include/openvino/core/type/element_type.hpp
+++ b/src/core/include/openvino/core/type/element_type.hpp
@@ -20,7 +20,6 @@
 #include "openvino/core/rtti.hpp"
 #include "openvino/core/type/bfloat16.hpp"
 #include "openvino/core/type/float16.hpp"
-#include "openvino/core/type/nf4.hpp"
 
 /**
  * @defgroup ov_element_cpp_api Element types
diff --git a/src/core/include/openvino/op/constant.hpp b/src/core/include/openvino/op/constant.hpp
index 14ee7b3313490e..6299dde459061c 100644
--- a/src/core/include/openvino/op/constant.hpp
+++ b/src/core/include/openvino/op/constant.hpp
@@ -12,7 +12,6 @@
 #    define WAS_OV_LIBRARY_DEFINED_CONSTANT
 #endif
 
-#include "ngraph/runtime/aligned_buffer.hpp"
 #include "ngraph/runtime/host_tensor.hpp"
 #include "ngraph/runtime/shared_buffer.hpp"
 
@@ -21,11 +20,14 @@
 #    undef WAS_OV_LIBRARY_DEFINED_CONSTANT
 #endif
 #include "openvino/core/coordinate_diff.hpp"
-#include "openvino/core/node.hpp"
 #include "openvino/core/type/element_type.hpp"
 #include "openvino/core/type/element_type_traits.hpp"
+#include "openvino/op/op.hpp"
 
 namespace ov {
+
+class AlignedBuffer;
+
 namespace op {
 namespace v0 {
 /// \brief Class for constants.
@@ -177,13 +179,20 @@ class OPENVINO_API Constant : public Op {
     /// \param shape The shape of the tensor constant.
     /// \param data A pointer to pre-allocated shared data.
     template <typename T>
+    OPENVINO_DEPRECATED("This constructor is deprecated and will be removed in 2024.0 release")
     Constant(const element::Type& type, const Shape& shape, std::shared_ptr<ngraph::runtime::SharedBuffer<T>> data)
         : m_element_type(type),
           m_shape(shape) {
-        m_data = data;
+        m_data = legacy_to_ov_aligned_buffer(data);
         constructor_validate_and_infer_types();
     }
     OPENVINO_SUPPRESS_DEPRECATED_END
+    Constant(const element::Type& type, const Shape& shape, const std::shared_ptr<ov::AlignedBuffer>& data)
+        : m_element_type(type),
+          m_shape(shape) {
+        m_data = data;
+        constructor_validate_and_infer_types();
+    }
 
     Constant(const Constant& other);
     Constant(const Constant& other, const Shape& new_shape);
@@ -241,11 +250,7 @@ class OPENVINO_API Constant : public Op {
     AxisSet get_axis_set_val() const;
 
     /// \brief Return data size in bytes
-    size_t get_byte_size() const {
-        OPENVINO_SUPPRESS_DEPRECATED_START
-        return m_data->size();
-        OPENVINO_SUPPRESS_DEPRECATED_END
-    }
+    size_t get_byte_size() const;
 
     /// \brief Wrapper around constructing a shared_ptr of a Constant
     ///
@@ -370,11 +375,8 @@ class OPENVINO_API Constant : public Op {
         return rc;
     }
 
-    const void* get_data_ptr() const {
-        OPENVINO_SUPPRESS_DEPRECATED_START
-        return (m_data ? m_data->get_ptr() : nullptr);
-        OPENVINO_SUPPRESS_DEPRECATED_END
-    }
+    const void* get_data_ptr() const;
+
     template <typename T>
     const T* get_data_ptr() const {
         OPENVINO_ASSERT(sizeof(T) <= m_element_type.size() || shape_size(m_shape) <= 0, "Buffer over-read");
@@ -406,6 +408,11 @@ class OPENVINO_API Constant : public Op {
 private:
     Constant(bool memset_allocation, const element::Type& type, const Shape& shape);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
+    std::shared_ptr<ov::AlignedBuffer> legacy_to_ov_aligned_buffer(
+        const std::shared_ptr<ngraph::runtime::AlignedBuffer>& buffer);
+    OPENVINO_SUPPRESS_DEPRECATED_END
+
     template <element::Type_t Type,
               typename StorageDataType = fundamental_type_for<Type>,
               typename std::enable_if<Type != element::Type_t::u1 && Type != element::Type_t::u4 &&
@@ -426,7 +433,7 @@ class OPENVINO_API Constant : public Op {
               typename StorageDataType = fundamental_type_for<Type>,
               typename std::enable_if<Type == element::Type_t::u4, bool>::type = true>
     StorageDataType get_element_value(size_t index) const {
-        return (get_data_ptr<uint8_t>()[index / 2] >> (index % 2 ? 0 : 4)) & 0x0F;
+        return (get_data_ptr<uint8_t>()[index / 2] >> (index % 2 ? 4 : 0)) & 0x0F;
     }
 
     template <element::Type_t Type,
@@ -440,7 +447,7 @@ class OPENVINO_API Constant : public Op {
               typename StorageDataType = fundamental_type_for<Type>,
               typename std::enable_if<Type == element::Type_t::i4, bool>::type = true>
     StorageDataType get_element_value(size_t index) const {
-        const uint8_t i4data = (get_data_ptr<uint8_t>()[index / 2] >> (index % 2 ? 0 : 4)) & 0x0F;
+        const uint8_t i4data = (get_data_ptr<uint8_t>()[index / 2] >> (index % 2 ? 4 : 0)) & 0x0F;
         const bool is_negative_number = (i4data >> 3) & 0x01;
         const int8_t data = is_negative_number ? i4data | 0xF0 : i4data;
         return data;
@@ -485,9 +492,19 @@ class OPENVINO_API Constant : public Op {
                            if (!std::is_same<OUT_T, IN_T>::value) {
                                OPENVINO_ASSERT(
                                    !std::numeric_limits<IN_T>::is_signed || std::numeric_limits<OUT_T>::lowest() <= c,
-                                   "Cannot cast vector from constant. Some values are outside the range.");
+                                   "Cannot cast vector from ",
+                                   Type,
+                                   " constant to ",
+                                   element::from<OUT_T>(),
+                                   ". Some values are outside the range. Example: ",
+                                   c);
                                OPENVINO_ASSERT(std::numeric_limits<OUT_T>::max() >= c,
-                                               "Cannot cast vector from constant. Some values are outside the range.");
+                                               "Cannot cast vector from ",
+                                               Type,
+                                               " constant to ",
+                                               element::from<OUT_T>(),
+                                               ". Some values are outside the range. Example: ",
+                                               c);
                            }
 #if defined(__clang__)
 #    pragma clang diagnostic pop
@@ -530,7 +547,7 @@ class OPENVINO_API Constant : public Op {
         const auto round_element_no = element_number % 2 ? element_number + 1 : element_number;
         output.reserve(round_element_no);  // adds 1 more elements here?
         std::for_each(source_begin, source_end, [&](IN_T c) {
-            for (const auto i : {4, 0}) {
+            for (const auto i : {0, 4}) {
                 const uint8_t data = (c >> i) & 0x0F;
                 output.push_back(data);
             }
@@ -548,7 +565,7 @@ class OPENVINO_API Constant : public Op {
         const auto round_element_no = element_number % 2 ? element_number + 1 : element_number;
         output.reserve(round_element_no);  // adds 1 more elements here?
         std::for_each(source_begin, source_end, [&](IN_T c) {
-            for (const auto i : {4, 0}) {
+            for (const auto i : {0, 4}) {
                 const uint8_t i4data = (c >> i) & 0x0F;
                 const bool is_negative_number = (i4data >> 3) & 0x01;
                 const int8_t data = is_negative_number ? i4data | 0xF0 : i4data;
@@ -627,11 +644,7 @@ class OPENVINO_API Constant : public Op {
 
     void allocate_buffer(bool memset_allocation);
 
-    void* get_data_ptr_nc() {
-        OPENVINO_SUPPRESS_DEPRECATED_START
-        return (m_data ? m_data->get_ptr() : nullptr);
-        OPENVINO_SUPPRESS_DEPRECATED_END
-    }
+    void* get_data_ptr_nc();
 
     template <element::Type_t ET>
     typename element_type_traits<ET>::value_type* get_data_ptr_nc() {
@@ -663,27 +676,9 @@ class OPENVINO_API Constant : public Op {
     template <element::Type_t Type,
               typename T,
               typename StorageDataType = fundamental_type_for<Type>,
-              typename std::enable_if<Type == element::Type_t::u4 || Type == element::Type_t::i4, bool>::type = true>
-    void write_buffer(const std::vector<T>& source) {
-        auto p = get_data_ptr_nc<Type>();
-        size_t i = 0;
-        for (; i < source.size() / 2; i++) {
-            const auto v1 = value_in_range<Type>(source[i * 2]) & 0x0F;
-            const auto v2 = value_in_range<Type>(source[i * 2 + 1]) & 0x0F;
-            const auto v = (v1 << 4) | v2;
-            p[i] = static_cast<StorageDataType>(v);
-        }
-        if (source.size() % 2) {
-            const auto v1 = value_in_range<Type>(source[i * 2]) & 0x0F;
-            const auto v = v1 << 4;
-            p[i] = static_cast<StorageDataType>(v);
-        }
-    }
-
-    template <element::Type_t Type,
-              typename T,
-              typename StorageDataType = fundamental_type_for<Type>,
-              typename std::enable_if<Type == element::Type_t::nf4 && std::is_integral<T>::value, bool>::type = true>
+              typename std::enable_if<Type == element::Type_t::u4 || Type == element::Type_t::i4 ||
+                                          (Type == element::Type_t::nf4 && std::is_integral<T>::value),
+                                      bool>::type = true>
     void write_buffer(const std::vector<T>& source) {
         auto p = get_data_ptr_nc<Type>();
         size_t i = 0;
@@ -710,15 +705,15 @@ class OPENVINO_API Constant : public Op {
         auto p = get_data_ptr_nc<Type>();
         size_t i = 0;
         for (; i < source.size() / 2; i++) {
-            const auto idx1 = ConvertNF4::quantize(static_cast<float>(source[i * 2]));
-            const auto idx2 = ConvertNF4::quantize(static_cast<float>(source[i * 2 + 1]));
+            const auto idx1 = quantize_nf4(static_cast<float>(source[i * 2]));
+            const auto idx2 = quantize_nf4(static_cast<float>(source[i * 2 + 1]));
             const auto v1 = value_in_range<Type>(idx1) & 0x0F;
             const auto v2 = value_in_range<Type>(idx2) & 0x0F;
             const auto v = (v2 << 4) | v1;
             p[i] = static_cast<StorageDataType>(v);
         }
         if (source.size() % 2) {
-            const auto idx1 = ConvertNF4::quantize(static_cast<float>(source[i * 2]));
+            const auto idx1 = quantize_nf4(static_cast<float>(source[i * 2]));
             const auto v = value_in_range<Type>(idx1) & 0x0F;
             p[i] = static_cast<StorageDataType>(v);
         }
@@ -858,12 +853,11 @@ class OPENVINO_API Constant : public Op {
         }
         return shape_size(m_shape) * m_element_type.size();
     }
+    static uint8_t quantize_nf4(float x);
 
     element::Type m_element_type;
     Shape m_shape{};
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    std::shared_ptr<ngraph::runtime::AlignedBuffer> m_data;
-    OPENVINO_SUPPRESS_DEPRECATED_END
+    std::shared_ptr<ov::AlignedBuffer> m_data;
     mutable std::atomic_bool m_all_elements_bitwise_identical{false};
     mutable std::atomic_bool m_all_elements_bitwise_identical_checked{false};
     bool m_alloc_buffer_on_visit_attributes = true;
diff --git a/src/core/include/openvino/op/minimum.hpp b/src/core/include/openvino/op/minimum.hpp
index c8cfc5c9d7c999..30819b2a72f849 100644
--- a/src/core/include/openvino/op/minimum.hpp
+++ b/src/core/include/openvino/op/minimum.hpp
@@ -29,9 +29,7 @@ class OPENVINO_API Minimum : public util::BinaryElementwiseArithmetic {
 
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
 
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override;
-    OPENVINO_SUPPRESS_DEPRECATED_END
+    bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override;
     bool has_evaluate() const override;
 };
 }  // namespace v1
diff --git a/src/core/include/openvino/op/mod.hpp b/src/core/include/openvino/op/mod.hpp
index 5e58a2ec03d733..defb1c65163898 100644
--- a/src/core/include/openvino/op/mod.hpp
+++ b/src/core/include/openvino/op/mod.hpp
@@ -29,6 +29,8 @@ class OPENVINO_API Mod : public util::BinaryElementwiseArithmetic {
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
 
     bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override;
+    bool evaluate_lower(TensorVector& outputs) const override;
+    bool evaluate_upper(TensorVector& outputs) const override;
     bool has_evaluate() const override;
 };
 }  // namespace v1
diff --git a/src/core/include/openvino/op/util/framework_node.hpp b/src/core/include/openvino/op/util/framework_node.hpp
index 57a6be7a3a34fb..f8a63b38b823e5 100644
--- a/src/core/include/openvino/op/util/framework_node.hpp
+++ b/src/core/include/openvino/op/util/framework_node.hpp
@@ -81,7 +81,7 @@ class OPENVINO_API FrameworkNode : public MultiSubGraphOp {
 
     explicit FrameworkNode(const OutputVector& inputs, size_t output_size = 1, size_t num_subgraphs = 0);
 
-    void validate_and_infer_types() override;
+    virtual void validate_and_infer_types() override;
 
     bool visit_attributes(AttributeVisitor& visitor) override;
 
diff --git a/src/core/reference/include/openvino/reference/convert.hpp b/src/core/reference/include/openvino/reference/convert.hpp
index e943e548a8fa4e..3924ce690553b2 100644
--- a/src/core/reference/include/openvino/reference/convert.hpp
+++ b/src/core/reference/include/openvino/reference/convert.hpp
@@ -8,13 +8,14 @@
 
 #include "openvino/core/type/element_type.hpp"
 #include "openvino/core/type/float16.hpp"
+#include "openvino/core/type/nf4.hpp"
 
 namespace ov {
 namespace reference {
 namespace detail {
 inline void set_u1(uint8_t* buf, size_t idx, uint8_t val) {
     const size_t byte_idx = idx / 8;
-    const uint8_t bit_idx = 7 - (idx % 8);
+    const uint8_t bit_idx = 7 - (idx % 8);  // Reversed order of bits
     if (val) {
         buf[byte_idx] |= (1 << bit_idx);
     } else {
@@ -24,33 +25,33 @@ inline void set_u1(uint8_t* buf, size_t idx, uint8_t val) {
 
 inline uint8_t get_u1(const uint8_t* buf, size_t idx) {
     const size_t byte_idx = idx / 8;
-    const uint8_t bit_idx = 7 - (idx % 8);
+    const uint8_t bit_idx = 7 - (idx % 8);  // Reversed order of bits
     return (buf[byte_idx] & (1 << bit_idx)) ? 1 : 0;
 }
 
 inline void set_u4(uint8_t* buf, size_t idx, uint8_t val) {
     const size_t byte_idx = idx / 2;
-    const uint8_t bit_shift = 4 * (++idx % 2);
+    const uint8_t bit_shift = 4 * (idx % 2);
     buf[byte_idx] &= ~(0xF << bit_shift);         // half byte zeroed
     buf[byte_idx] |= ((val & 0xF) << bit_shift);  // set 1's
 }
 
 inline uint8_t get_u4(const uint8_t* buf, size_t idx) {
     const size_t byte_idx = idx / 2;
-    const uint8_t bit_shift = 4 * (++idx % 2);
+    const uint8_t bit_shift = 4 * (idx % 2);
     return (buf[byte_idx] >> bit_shift) & 0xF;
 }
 
 inline void set_i4(uint8_t* buf, size_t idx, int8_t val) {
     const size_t byte_idx = idx / 2;
-    const uint8_t bit_shift = 4 * (++idx % 2);
+    const uint8_t bit_shift = 4 * (idx % 2);
     buf[byte_idx] &= ~(0xF << bit_shift);         // half byte zeroed
     buf[byte_idx] |= ((val & 0xF) << bit_shift);  // set 1's
 }
 
 inline int8_t get_i4(const uint8_t* buf, size_t idx) {
     const size_t byte_idx = idx / 2;
-    const uint8_t bit_shift = 4 * (++idx % 2);
+    const uint8_t bit_shift = 4 * (idx % 2);
     uint8_t val = (buf[byte_idx] >> bit_shift) & 0xF;
     if (val & 0x08) {  // negative number
         val |= 0xF0;
@@ -87,7 +88,7 @@ void lp_convert(const TI* arg, TO* out, size_t count, element::Type_t src_type,
         } else if (dst_type == element::i4) {
             detail::set_i4(output, i, detail::get_value<int8_t, TI>(input, i, src_type));
         } else if (src_type == element::nf4) {
-            ConvertNF4::unpack(out, input, i);
+            ov::ConvertNF4::unpack(out, input, i);
         } else {
             out[i] = detail::get_value<TO, TI>(input, i, src_type);
         }
diff --git a/src/core/reference/include/openvino/reference/minimum.hpp b/src/core/reference/include/openvino/reference/minimum.hpp
index 4bfe8ff0c89c83..8d70ae0fc99ee0 100644
--- a/src/core/reference/include/openvino/reference/minimum.hpp
+++ b/src/core/reference/include/openvino/reference/minimum.hpp
@@ -4,7 +4,7 @@
 
 #pragma once
 
-#include <cstddef>
+#include <algorithm>
 
 #include "openvino/core/shape.hpp"
 #include "openvino/op/util/attr_types.hpp"
@@ -12,11 +12,16 @@
 
 namespace ov {
 namespace reference {
+namespace func {
+template <class T>
+T min(const T a, const T b) {
+    return std::min(a, b);
+}
+}  // namespace func
+
 template <typename T>
 void minimum(const T* arg0, const T* arg1, T* out, size_t count) {
-    for (size_t i = 0; i < count; i++) {
-        out[i] = arg0[i] < arg1[i] ? arg0[i] : arg1[i];
-    }
+    std::transform(arg0, std::next(arg0, count), arg1, out, func::min<T>);
 }
 
 template <typename T>
@@ -26,9 +31,7 @@ void minimum(const T* arg0,
              const Shape& arg0_shape,
              const Shape& arg1_shape,
              const op::AutoBroadcastSpec& broadcast_spec) {
-    autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
-        return x < y ? x : y;
-    });
+    autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, func::min<T>);
 }
 }  // namespace reference
 }  // namespace ov
diff --git a/src/core/reference/include/openvino/reference/mod.hpp b/src/core/reference/include/openvino/reference/mod.hpp
index 81ae69e32ebfb2..671ee012393641 100644
--- a/src/core/reference/include/openvino/reference/mod.hpp
+++ b/src/core/reference/include/openvino/reference/mod.hpp
@@ -6,6 +6,7 @@
 
 #include <cmath>
 #include <cstddef>
+#include <utility>
 
 #include "openvino/reference/autobroadcast_binop.hpp"
 #include "openvino/reference/utils/type_util.hpp"
@@ -22,6 +23,72 @@ template <class T, typename std::enable_if<ov::is_floating_point<T>()>::type* =
 T mod(const T x, const T y) {
     return x - (std::trunc(x / y) * y);
 }
+
+/**
+ * @brief Estimates division remainder `[v1, v2] % m = [r0, r1]` as interval.
+ *
+ * Assumes that ` 0 <= v1 <= v2 and m != 0`, in other cases result is undefined behaviour.
+ * The result interval estimate minimum and maximum but is not true that value can be any value between min and max.
+ * e.g.
+ *  - [4,6] % 5 = [0, 4], but in fact accurate result is set of [0,1,4]
+
+ * @param v1 Minimum of value interval.
+ * @param v2 Maximum of value interval.
+ * @param m  Modulo divisor.
+ * @return Remainder of division as interval range.
+ */
+template <class T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
+std::pair<T, T> mod_interval_value(const T v1, const T v2, const T m) {
+    const auto v_diff = v2 - v1;
+    auto r = std::make_pair(func::mod(v1, m), func::mod(v2, m));
+
+    if ((r.second < r.first) || ((v_diff != T{0}) && (v_diff >= m))) {
+        r.first = T{0};
+        r.second = m - T{1};
+    }
+    return r;
+}
+
+/**
+ * @brief Estimates division reminder of `[v1, v2] & [m1, m2] = [r0, r1]` as interval.
+ *
+ * * Assumes that ` 0 <= v1 <= v2 and 0 < m1 <= m2`, in other cases result is undefined behaviour.
+ *
+ * @param v1 Minimum of value interval.
+ * @param v2 Maximum of value interval.
+ * @param m1 Minimum of modulo divisor.
+ * @param m2 Maximum of modulo divisor.
+ * @return Remainder of division as interval range.
+ */
+template <class T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
+std::pair<T, T> mod_interval(const T v1, const T v2, const T m1, const T m2) {
+    auto r = mod_interval_value(v1, v2, m1);
+    if (v2 != 0) {
+        if (m1 != m2) {
+            const auto v_diff = v2 - v1;
+            const auto m_diff = m2 - m1;
+
+            auto r2 = mod_interval_value(v1, v2, m2);
+            r.first = std::min(r.first, r2.first);
+            r.second = std::max(r.second, r2.second);
+
+            if (v_diff == T{0} && m_diff != T{1}) {
+                const T v2_half = v2 / T{2};
+                if ((m1 < v2_half) || ((m1 < v2) && (v2 < m2))) {
+                    r.first = T{0};
+
+                    if ((v2_half < m2) && (m2 < v2)) {
+                        const T v2_half_next = v2_half + T{1};
+                        r.second = func::mod(v2, v2_half_next);
+                    } else {
+                        r.second = m2 - T{1};
+                    }
+                }
+            }
+        }
+    }
+    return r;
+}
 }  // namespace func
 
 /**
@@ -42,7 +109,7 @@ void mod(InputIt arg0,
          const Shape& arg_shape1,
          const op::AutoBroadcastSpec& broadcast_spec) {
     using T = typename std::iterator_traits<OutputIt>::value_type;
-    autobroadcast_binop(arg0, arg1, out, arg_shape0, arg_shape1, broadcast_spec, &func::mod<T>);
+    autobroadcast_binop(arg0, arg1, out, arg_shape0, arg_shape1, broadcast_spec, func::mod<T>);
 }
 }  // namespace reference
 }  // namespace ov
diff --git a/src/core/reference/src/op/nms_rotated.cpp b/src/core/reference/src/op/nms_rotated.cpp
index fd604acd5cc6c7..3b4f21d4431c31 100644
--- a/src/core/reference/src/op/nms_rotated.cpp
+++ b/src/core/reference/src/op/nms_rotated.cpp
@@ -127,7 +127,7 @@ void nms_rotated(const float* boxes_data,
 
     for (int64_t batch = 0; batch < num_batches; batch++) {
         const float* boxesPtr = boxes_data + batch * num_boxes * 5;
-        RotatedBox* r = reinterpret_cast<RotatedBox*>(const_cast<float*>(boxesPtr));
+        const RotatedBox* r = reinterpret_cast<const RotatedBox*>(boxesPtr);
 
         for (int64_t class_idx = 0; class_idx < num_classes; class_idx++) {
             const float* scoresPtr = scores_data + batch * (num_classes * num_boxes) + class_idx * num_boxes;
@@ -137,11 +137,11 @@ void nms_rotated(const float* boxes_data,
 
             for (int64_t box_idx = 0; box_idx < num_boxes; box_idx++) {
                 if (scoresPtr[box_idx] > score_threshold) {
+                    candidate_boxes.emplace_back(r[box_idx], box_idx, scoresPtr[box_idx], 0, batch, class_idx);
                     // Convert counterclockwise to clockwise
                     if (!clockwise) {
-                        r[box_idx].a *= -1;
+                        candidate_boxes.back().box.a *= -1.f;
                     }
-                    candidate_boxes.emplace_back(r[box_idx], box_idx, scoresPtr[box_idx], 0, batch, class_idx);
                 }
             }
 
diff --git a/src/core/reference/src/op/strided_slice.cpp b/src/core/reference/src/op/strided_slice.cpp
index 6e83305e653059..cad9dee20d02be 100644
--- a/src/core/reference/src/op/strided_slice.cpp
+++ b/src/core/reference/src/op/strided_slice.cpp
@@ -8,10 +8,10 @@
 
 #include <cmath>
 
-#include "ngraph/runtime/aligned_buffer.hpp"
 #include "openvino/reference/reshape.hpp"
 #include "openvino/reference/reverse.hpp"
 #include "openvino/reference/slice.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
 
 namespace ov {
 namespace reference {
@@ -30,8 +30,7 @@ void strided_slice(const char* arg,
         return;
     }
 
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    ngraph::runtime::AlignedBuffer slice_out_buffer(shape_size(sp.reshape_in_shape) * elem_type);
+    ov::AlignedBuffer slice_out_buffer(shape_size(sp.reshape_in_shape) * elem_type);
     slice(reinterpret_cast<const char*>(arg),
           slice_out_buffer.get_ptr<char>(),
           arg_shape,
@@ -41,7 +40,7 @@ void strided_slice(const char* arg,
           sp.reshape_in_shape,
           elem_type);
 
-    ngraph::runtime::AlignedBuffer reshape_out_buffer(shape_size(sp.reshape_out_shape) * elem_type);
+    ov::AlignedBuffer reshape_out_buffer(shape_size(sp.reshape_out_shape) * elem_type);
     reshape(slice_out_buffer.get_ptr<char>(), reshape_out_buffer.get_ptr<char>(), sp.reshape_in_shape, elem_type);
 
     reverse(reshape_out_buffer.get_ptr<char>(),
@@ -50,7 +49,6 @@ void strided_slice(const char* arg,
             sp.reshape_out_shape,
             sp.reverse_axes,
             elem_type);
-    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 }  // namespace reference
 }  // namespace ov
diff --git a/src/core/shape_inference/include/utils.hpp b/src/core/shape_inference/include/utils.hpp
index 32e53766ba0d60..cac12973a18179 100644
--- a/src/core/shape_inference/include/utils.hpp
+++ b/src/core/shape_inference/include/utils.hpp
@@ -419,6 +419,17 @@ ov::optional<TResult> get_input_bounds(const ov::Node* op, size_t port, const IT
  * @return Result shape from inputs with applied broadcast specification.
  */
 ov::Shape infer_broadcast_shape(const ov::Node* const op, const ov::Shape& first, const ov::Shape& second);
+
+/**
+ * @brief Inference broadcast shape from input tensor shapes for element wise operator
+ * according to broadcast specification stored in operator.
+ *
+ * @param op      Pointer to operator.
+ * @param inputs  Tensors vector to get theirs shapes.
+ *
+ * @return Result shape from input tensors shape with applied broadcast specification.
+ */
+ov::Shape infer_broadcast_shape(const ov::Node* const op, const ov::TensorVector& inputs);
 }  // namespace op
 
 /**
diff --git a/src/core/shape_inference/src/utils.cpp b/src/core/shape_inference/src/utils.cpp
index c89221d286ac69..74351e6fc1cfc0 100644
--- a/src/core/shape_inference/src/utils.cpp
+++ b/src/core/shape_inference/src/utils.cpp
@@ -5,6 +5,7 @@
 #include "utils.hpp"
 
 #include "eltwise_shape_inference.hpp"
+#include "openvino/core/validation_util.hpp"
 
 namespace ov {
 namespace op {
@@ -12,5 +13,9 @@ namespace op {
 ov::Shape infer_broadcast_shape(const ov::Node* const op, const ov::Shape& first, const ov::Shape& second) {
     return eltwise_shape_infer(op, std::vector<ov::PartialShape>{first, second}).front().to_shape();
 }
+
+ov::Shape infer_broadcast_shape(const ov::Node* const op, const ov::TensorVector& inputs) {
+    return eltwise_shape_infer(op, ov::util::get_tensors_partial_shapes(inputs)).front().to_shape();
+}
 }  // namespace op
 }  // namespace ov
diff --git a/src/core/src/graph_util.cpp b/src/core/src/graph_util.cpp
index 8001678dab2601..4c6a4d0f33e516 100644
--- a/src/core/src/graph_util.cpp
+++ b/src/core/src/graph_util.cpp
@@ -319,7 +319,8 @@ bool replace_output_update_name(Output<Node> output, const Output<Node>& replace
 
 bool replace_node_update_name(const std::shared_ptr<Node>& target, const std::shared_ptr<Node>& replacement) {
     for (auto& output : target->output(0).get_target_inputs()) {
-        if (ov::as_type<op::v0::Parameter>(replacement->input_value(0).get_node()) &&
+        if (replacement->get_input_size() > 0 &&
+            ov::as_type<op::v0::Parameter>(replacement->input_value(0).get_node()) &&
             ov::as_type<op::v0::Result>(output.get_node())) {
             return false;
         }
diff --git a/src/core/src/node_input.cpp b/src/core/src/node_input.cpp
index 7c6b8a9ff2102c..11a353cb765b49 100644
--- a/src/core/src/node_input.cpp
+++ b/src/core/src/node_input.cpp
@@ -60,12 +60,15 @@ bool Input<Node>::operator==(const Input& other) const {
 bool Input<Node>::operator!=(const Input& other) const {
     return !(*this == other);
 }
+
 bool Input<Node>::operator<(const Input& other) const {
-    return m_node < other.m_node || (m_node == other.m_node && m_index < other.m_index);
+    return m_node->get_instance_id() < other.m_node->get_instance_id() ||
+           (m_node == other.m_node && m_index < other.m_index);
 }
 
 bool Input<Node>::operator>(const Input& other) const {
-    return m_node > other.m_node || (m_node == other.m_node && m_index > other.m_index);
+    return m_node->get_instance_id() > other.m_node->get_instance_id() ||
+           (m_node == other.m_node && m_index > other.m_index);
 }
 
 bool Input<Node>::operator<=(const Input& other) const {
@@ -135,11 +138,13 @@ bool Input<const Node>::operator!=(const Input& other) const {
     return !(*this == other);
 }
 bool Input<const Node>::operator<(const Input& other) const {
-    return m_node < other.m_node || (m_node == other.m_node && m_index < other.m_index);
+    return m_node->get_instance_id() < other.m_node->get_instance_id() ||
+           (m_node == other.m_node && m_index < other.m_index);
 }
 
 bool Input<const Node>::operator>(const Input& other) const {
-    return m_node > other.m_node || (m_node == other.m_node && m_index > other.m_index);
+    return m_node->get_instance_id() > other.m_node->get_instance_id() ||
+           (m_node == other.m_node && m_index > other.m_index);
 }
 
 bool Input<const Node>::operator<=(const Input& other) const {
diff --git a/src/core/src/node_output.cpp b/src/core/src/node_output.cpp
index fbd7d3f172280c..4d5de39b75132a 100644
--- a/src/core/src/node_output.cpp
+++ b/src/core/src/node_output.cpp
@@ -137,10 +137,12 @@ bool Output<Node>::operator!=(const Output& other) const {
     return !(*this == other);
 }
 bool Output<Node>::operator<(const Output& other) const {
-    return m_node < other.m_node || (m_node == other.m_node && m_index < other.m_index);
+    return m_node->get_instance_id() < other.m_node->get_instance_id() ||
+           (m_node == other.m_node && m_index < other.m_index);
 }
 bool Output<Node>::operator>(const Output& other) const {
-    return m_node > other.m_node || (m_node == other.m_node && m_index > other.m_index);
+    return m_node->get_instance_id() > other.m_node->get_instance_id() ||
+           (m_node == other.m_node && m_index > other.m_index);
 }
 bool Output<Node>::operator<=(const Output& other) const {
     return !(*this > other);
@@ -211,10 +213,12 @@ bool Output<const Node>::operator!=(const Output& other) const {
     return !(*this == other);
 }
 bool Output<const Node>::operator<(const Output& other) const {
-    return m_node < other.m_node || (m_node == other.m_node && m_index < other.m_index);
+    return m_node->get_instance_id() < other.m_node->get_instance_id() ||
+           (m_node == other.m_node && m_index < other.m_index);
 }
 bool Output<const Node>::operator>(const Output& other) const {
-    return m_node > other.m_node || (m_node == other.m_node && m_index > other.m_index);
+    return m_node->get_instance_id() > other.m_node->get_instance_id() ||
+           (m_node == other.m_node && m_index > other.m_index);
 }
 bool Output<const Node>::operator<=(const Output& other) const {
     return !(*this > other);
diff --git a/src/core/src/op/add.cpp b/src/core/src/op/add.cpp
index 316f71b3ebca7c..0d09563b9ae201 100644
--- a/src/core/src/op/add.cpp
+++ b/src/core/src/op/add.cpp
@@ -19,14 +19,11 @@ struct Evaluate : element::NoAction<bool> {
     static result_type visit(const Tensor& in0,
                              const Tensor& in1,
                              Tensor& out,
+                             const Shape& shape0,
+                             const Shape& shape1,
                              const AutoBroadcastSpec& broadcast_spec) {
         using T = typename element_type_traits<ET>::value_type;
-        reference::add(in0.data<const T>(),
-                       in1.data<const T>(),
-                       out.data<T>(),
-                       in0.get_shape(),
-                       in1.get_shape(),
-                       broadcast_spec);
+        reference::add(in0.data<const T>(), in1.data<const T>(), out.data<T>(), shape0, shape1, broadcast_spec);
         return true;
     }
 };
@@ -48,15 +45,16 @@ std::shared_ptr<Node> Add::clone_with_new_inputs(const OutputVector& new_args) c
 bool Add::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const {
     OV_OP_SCOPE(v1_Add_evaluate);
     OPENVINO_ASSERT(outputs.size() == 1);
-    OPENVINO_ASSERT(inputs.size() == 2);
 
-    outputs[0].set_shape(infer_broadcast_shape(this, inputs[0].get_shape(), inputs[1].get_shape()));
+    outputs[0].set_shape(infer_broadcast_shape(this, inputs));
     using namespace ov::element;
     return IfTypeOf<bf16, f16, f32, i8, i16, i32, i64, u8, u16, u32, u64>::apply<add::Evaluate>(
         inputs[0].get_element_type(),
         inputs[0],
         inputs[1],
         outputs[0],
+        inputs[0].get_shape(),
+        inputs[1].get_shape(),
         get_autob());
 }
 
diff --git a/src/core/src/op/constant.cpp b/src/core/src/op/constant.cpp
index 27d9e000b64dec..34e97d73eeee30 100644
--- a/src/core/src/op/constant.cpp
+++ b/src/core/src/op/constant.cpp
@@ -10,8 +10,13 @@
 #include <sstream>
 
 #include "itt.hpp"
+#include "ngraph/runtime/aligned_buffer.hpp"
 #include "ngraph/runtime/host_tensor.hpp"
 #include "ngraph/runtime/tensor.hpp"
+#include "openvino/core/type/element_type.hpp"
+#include "openvino/core/type/float16.hpp"
+#include "openvino/core/type/nf4.hpp"
+#include "openvino/runtime/shared_buffer.hpp"
 
 template <typename T>
 static inline std::string to_cpp_string(T value) {
@@ -27,6 +32,14 @@ static inline std::string to_cpp_string(T value) {
     }
     return rc;
 }
+OPENVINO_SUPPRESS_DEPRECATED_START
+std::shared_ptr<ov::AlignedBuffer> ov::op::v0::Constant::legacy_to_ov_aligned_buffer(
+    const std::shared_ptr<ngraph::runtime::AlignedBuffer>& buffer) {
+    return std::make_shared<ov::SharedBuffer<std::shared_ptr<ngraph::runtime::AlignedBuffer>>>(buffer->get_ptr<char>(),
+                                                                                               buffer->size(),
+                                                                                               buffer);
+}
+OPENVINO_SUPPRESS_DEPRECATED_END
 
 OPENVINO_SUPPRESS_DEPRECATED_START
 ov::op::v0::Constant::Constant(const std::shared_ptr<ngraph::runtime::Tensor>& tensor) {
@@ -35,7 +48,7 @@ ov::op::v0::Constant::Constant(const std::shared_ptr<ngraph::runtime::Tensor>& t
     // Share data from HostTensor if we work with it
     // And copy data in other cas
     if (auto hostTensor = std::dynamic_pointer_cast<ngraph::runtime::HostTensor>(tensor)) {
-        m_data = std::make_shared<ngraph::runtime::SharedBuffer<std::shared_ptr<ngraph::runtime::Tensor>>>(
+        m_data = std::make_shared<ov::SharedBuffer<std::shared_ptr<ngraph::runtime::Tensor>>>(
             static_cast<char*>(hostTensor->get_data_ptr()),
             tensor->get_size_in_bytes(),
             tensor);
@@ -51,12 +64,10 @@ OPENVINO_SUPPRESS_DEPRECATED_END
 ov::op::v0::Constant::Constant(const ov::Tensor& tensor) {
     m_element_type = tensor.get_element_type();
     m_shape = tensor.get_shape();
-    OPENVINO_SUPPRESS_DEPRECATED_START
     // Share data from ov::Tensor
-    m_data = std::make_shared<ngraph::runtime::SharedBuffer<ov::Tensor>>(static_cast<char*>(tensor.data()),
-                                                                         tensor.get_byte_size(),
-                                                                         tensor);
-    OPENVINO_SUPPRESS_DEPRECATED_END
+    m_data = std::make_shared<ov::SharedBuffer<ov::Tensor>>(static_cast<char*>(tensor.data()),
+                                                            tensor.get_byte_size(),
+                                                            tensor);
 
     constructor_validate_and_infer_types();
 }
@@ -211,12 +222,10 @@ ov::op::v0::Constant::Constant(bool memset_allocation, const element::Type& type
 }
 
 void ov::op::v0::Constant::allocate_buffer(bool memset_allocation) {
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    m_data = std::make_shared<ngraph::runtime::AlignedBuffer>(mem_size(), host_alignment());
+    m_data = std::make_shared<ov::AlignedBuffer>(mem_size(), host_alignment());
     if (memset_allocation) {
         std::memset(m_data->get_ptr(), 0, m_data->size());
     }
-    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 ov::op::v0::Constant::Constant(const element::Type& type, const ov::Shape& shape, const void* data)
@@ -316,6 +325,18 @@ std::string ov::op::v0::Constant::convert_value_to_string(size_t index) const {
     return rc;
 }
 
+size_t ov::op::v0::Constant::get_byte_size() const {
+    return m_data->size();
+}
+
+const void* ov::op::v0::Constant::get_data_ptr() const {
+    return (m_data ? m_data->get_ptr() : nullptr);
+}
+
+void* ov::op::v0::Constant::get_data_ptr_nc() {
+    return (m_data ? m_data->get_ptr() : nullptr);
+}
+
 std::vector<std::string> ov::op::v0::Constant::get_value_strings() const {
     std::vector<std::string> rc;
 
@@ -588,3 +609,7 @@ bool ov::op::v0::Constant::evaluate_lower(TensorVector& outputs) const {
 bool ov::op::v0::Constant::evaluate_upper(TensorVector& outputs) const {
     return evaluate(outputs, {});
 }
+
+uint8_t ov::op::v0::Constant::quantize_nf4(float x) {
+    return ov::ConvertNF4::quantize(x);
+}
diff --git a/src/core/src/op/logical_and.cpp b/src/core/src/op/logical_and.cpp
index fe8bd612ed2d85..91ff10dc15601c 100644
--- a/src/core/src/op/logical_and.cpp
+++ b/src/core/src/op/logical_and.cpp
@@ -25,19 +25,16 @@ std::shared_ptr<Node> LogicalAnd::clone_with_new_inputs(const OutputVector& new_
 bool LogicalAnd::evaluate(TensorVector& outputs, const TensorVector& inputs) const {
     OV_OP_SCOPE(v1_LogicalAnd_evaluate);
     OPENVINO_ASSERT(outputs.size() == 1);
-    OPENVINO_ASSERT(inputs.size() == 2);
 
-    const auto& shape_0 = inputs[0].get_shape();
-    const auto& shape_1 = inputs[1].get_shape();
-    outputs[0].set_shape(infer_broadcast_shape(this, shape_0, shape_1));
+    outputs[0].set_shape(infer_broadcast_shape(this, inputs));
 
     if (inputs[0].get_element_type() == element::boolean) {
         using T = fundamental_type_for<element::boolean>;
         reference::logical_and(inputs[0].data<const T>(),
                                inputs[1].data<const T>(),
                                outputs[0].data<T>(),
-                               shape_0,
-                               shape_1,
+                               inputs[0].get_shape(),
+                               inputs[1].get_shape(),
                                get_autob());
         return true;
     } else {
diff --git a/src/core/src/op/logical_or.cpp b/src/core/src/op/logical_or.cpp
index 403089318de314..5d9532b1358286 100644
--- a/src/core/src/op/logical_or.cpp
+++ b/src/core/src/op/logical_or.cpp
@@ -26,19 +26,16 @@ std::shared_ptr<Node> LogicalOr::clone_with_new_inputs(const OutputVector& new_a
 bool LogicalOr::evaluate(TensorVector& outputs, const TensorVector& inputs) const {
     OV_OP_SCOPE(v1_LogicalOr_evaluate);
     OPENVINO_ASSERT(outputs.size() == 1);
-    OPENVINO_ASSERT(inputs.size() == 2);
 
-    const auto& shape_0 = inputs[0].get_shape();
-    const auto& shape_1 = inputs[1].get_shape();
-    outputs[0].set_shape(infer_broadcast_shape(this, shape_0, shape_1));
+    outputs[0].set_shape(infer_broadcast_shape(this, inputs));
 
     if (inputs[0].get_element_type() == element::boolean) {
         using T = fundamental_type_for<element::boolean>;
         reference::logical_or(inputs[0].data<const T>(),
                               inputs[1].data<const T>(),
                               outputs[0].data<T>(),
-                              shape_0,
-                              shape_1,
+                              inputs[0].get_shape(),
+                              inputs[1].get_shape(),
                               get_autob());
         return true;
     } else {
diff --git a/src/core/src/op/minimum.cpp b/src/core/src/op/minimum.cpp
index 83252519beeeac..1844c6e5b25e36 100644
--- a/src/core/src/op/minimum.cpp
+++ b/src/core/src/op/minimum.cpp
@@ -2,92 +2,78 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ngraph/op/minimum.hpp"
-
-#include <memory>
+#include "openvino/op/minimum.hpp"
 
+#include "element_visitor.hpp"
 #include "itt.hpp"
-#include "ngraph/op/convert.hpp"
-#include "ngraph/op/less.hpp"
-#include "ngraph/op/multiply.hpp"
-#include "ngraph/runtime/host_tensor.hpp"
-#include "ngraph/type/element_type.hpp"
 #include "openvino/reference/minimum.hpp"
+#include "utils.hpp"
 
-using namespace std;
-using namespace ngraph;
+namespace ov {
+namespace op {
 
-OPENVINO_SUPPRESS_DEPRECATED_START
-namespace minimumop {
-namespace {
-template <element::Type_t ET>
-bool evaluate(const HostTensorPtr& arg0,
-              const HostTensorPtr& arg1,
-              const HostTensorPtr& out,
-              const op::AutoBroadcastSpec& broadcast_spec) {
-    ov::reference::minimum(arg0->get_data_ptr<ET>(),
-                           arg1->get_data_ptr<ET>(),
-                           out->get_data_ptr<ET>(),
-                           arg0->get_shape(),
-                           arg1->get_shape(),
-                           broadcast_spec);
-    return true;
-}
+namespace minimum {
 
-bool evaluate_minimum(const HostTensorPtr& arg0,
-                      const HostTensorPtr& arg1,
-                      const HostTensorPtr& out,
-                      const op::AutoBroadcastSpec& broadcast_spec) {
-    bool rc = true;
-    out->set_broadcast(broadcast_spec, arg0, arg1);
-    switch (arg0->get_element_type()) {
-        OPENVINO_TYPE_CASE(evaluate_minimum, i32, arg0, arg1, out, broadcast_spec);
-        OPENVINO_TYPE_CASE(evaluate_minimum, i64, arg0, arg1, out, broadcast_spec);
-        OPENVINO_TYPE_CASE(evaluate_minimum, u8, arg0, arg1, out, broadcast_spec);
-        OPENVINO_TYPE_CASE(evaluate_minimum, u16, arg0, arg1, out, broadcast_spec);
-        OPENVINO_TYPE_CASE(evaluate_minimum, u32, arg0, arg1, out, broadcast_spec);
-        OPENVINO_TYPE_CASE(evaluate_minimum, u64, arg0, arg1, out, broadcast_spec);
-        OPENVINO_TYPE_CASE(evaluate_minimum, f16, arg0, arg1, out, broadcast_spec);
-        OPENVINO_TYPE_CASE(evaluate_minimum, f32, arg0, arg1, out, broadcast_spec);
-    default:
-        rc = false;
-        break;
+struct Evaluate : element::NoAction<bool> {
+    using element::NoAction<bool>::visit;
+
+    template <element::Type_t ET, class T = fundamental_type_for<ET>>
+    static result_type visit(const Tensor& arg0,
+                             const Tensor& arg1,
+                             Tensor& out,
+                             const Shape& shape0,
+                             const Shape& shape1,
+                             const AutoBroadcastSpec& broadcast_spec) {
+        reference::minimum(arg0.data<const T>(), arg1.data<const T>(), out.data<T>(), shape0, shape1, broadcast_spec);
+        return true;
     }
-    return rc;
-}
-}  // namespace
-}  // namespace minimumop
+};
+}  // namespace minimum
 
 // ------------------------------ v1 -------------------------------------------
-
-op::v1::Minimum::Minimum(const Output<Node>& arg0, const Output<Node>& arg1, const AutoBroadcastSpec& auto_broadcast)
+namespace v1 {
+Minimum::Minimum(const Output<Node>& arg0, const Output<Node>& arg1, const AutoBroadcastSpec& auto_broadcast)
     : BinaryElementwiseArithmetic(arg0, arg1, auto_broadcast) {
     constructor_validate_and_infer_types();
 }
 
-shared_ptr<Node> op::v1::Minimum::clone_with_new_inputs(const OutputVector& new_args) const {
+std::shared_ptr<Node> Minimum::clone_with_new_inputs(const OutputVector& new_args) const {
     OV_OP_SCOPE(v1_Minimum_clone_with_new_inputs);
     check_new_args_count(this, new_args);
-    return make_shared<op::v1::Minimum>(new_args.at(0), new_args.at(1), this->get_autob());
+    return std::make_shared<Minimum>(new_args.at(0), new_args.at(1), get_autob());
 }
 
-bool op::v1::Minimum::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
+bool Minimum::evaluate(TensorVector& outputs, const TensorVector& inputs) const {
     OV_OP_SCOPE(v1_Minimum_evaluate);
-    return minimumop::evaluate_minimum(inputs[0], inputs[1], outputs[0], get_autob());
+    OPENVINO_ASSERT(outputs.size() == 1);
+
+    outputs[0].set_shape(infer_broadcast_shape(this, inputs));
+    using namespace ov::element;
+    return IfTypeOf<f16, f32, i32, i64, u8, u16, u32, u64>::apply<minimum::Evaluate>(inputs[0].get_element_type(),
+                                                                                     inputs[0],
+                                                                                     inputs[1],
+                                                                                     outputs[0],
+                                                                                     inputs[0].get_shape(),
+                                                                                     inputs[1].get_shape(),
+                                                                                     get_autob());
 }
 
-bool op::v1::Minimum::has_evaluate() const {
+bool Minimum::has_evaluate() const {
     OV_OP_SCOPE(v1_Minimum_has_evaluate);
     switch (get_input_element_type(0)) {
-    case ngraph::element::i32:
-    case ngraph::element::i64:
-    case ngraph::element::u32:
-    case ngraph::element::u64:
-    case ngraph::element::f16:
-    case ngraph::element::f32:
+    case element::f16:
+    case element::f32:
+    case element::i32:
+    case element::i64:
+    case element::u8:
+    case element::u16:
+    case element::u32:
+    case element::u64:
         return true;
     default:
-        break;
+        return false;
     }
-    return false;
 }
+}  // namespace v1
+}  // namespace op
+}  // namespace ov
diff --git a/src/core/src/op/mod.cpp b/src/core/src/op/mod.cpp
index e8aa1a8a009cc2..69ac9493052d20 100644
--- a/src/core/src/op/mod.cpp
+++ b/src/core/src/op/mod.cpp
@@ -4,13 +4,30 @@
 
 #include "openvino/op/mod.hpp"
 
+#include "bound_evaluate.hpp"
 #include "element_visitor.hpp"
 #include "itt.hpp"
-#include "openvino/core/shape_util.hpp"
+#include "openvino/core/validation_util.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/equal.hpp"
+#include "openvino/op/logical_or.hpp"
+#include "openvino/op/select.hpp"
 #include "openvino/reference/mod.hpp"
 #include "utils.hpp"
+#include "validation_util.hpp"
 
 namespace ov {
+namespace util {
+namespace {
+Tensor make_tensor_of_value(const element::Type_t et, const int64_t value) {
+    auto c = op::v0::Constant(et, Shape{}, value);
+    auto t = Tensor(et, Shape{});
+    std::memcpy(t.data(), c.get_data_ptr(), t.get_byte_size());
+    return t;
+}
+}  // namespace
+}  // namespace util
+
 namespace op {
 namespace mod {
 struct Evaluate : ov::element::NoAction<bool> {
@@ -20,17 +37,193 @@ struct Evaluate : ov::element::NoAction<bool> {
     static result_type visit(const Tensor& in0,
                              const Tensor& in1,
                              Tensor& out,
+                             const Shape& shape0,
+                             const Shape& shape1,
                              const AutoBroadcastSpec& broadcast_spec) {
         using T = typename element_type_traits<ET>::value_type;
-        reference::mod(in0.data<const T>(),
-                       in1.data<const T>(),
-                       out.data<T>(),
-                       in0.get_shape(),
-                       in1.get_shape(),
-                       broadcast_spec);
+        reference::mod(in0.data<const T>(), in1.data<const T>(), out.data<T>(), shape0, shape1, broadcast_spec);
         return true;
     }
 };
+
+struct EvaluateBound : element::NoAction<bool> {
+    using element::NoAction<bool>::visit;
+
+    template <element::Type_t ET, class T = fundamental_type_for<ET>>
+    static result_type visit(const Tensor& v_lb,
+                             const Tensor& v_ub,
+                             const Tensor& m_lb,
+                             const Tensor& m_ub,
+                             Tensor& out,
+                             const bool is_lower) {
+        auto v_lb_first = v_lb.data<const T>();
+        auto v_lb_last = std::next(v_lb_first, v_lb.get_size());
+        auto v_ub_first = v_ub.data<const T>();
+        auto m_lb_first = m_lb.data<const T>();
+        auto m_ub_first = m_ub.data<const T>();
+        auto out_first = out.data<T>();
+
+        if (is_lower) {
+            while (v_lb_first != v_lb_last) {
+                *out_first++ =
+                    reference::func::mod_interval(*v_lb_first++, *v_ub_first++, *m_lb_first++, *m_ub_first++).first;
+            }
+        } else {
+            while (v_lb_first != v_lb_last) {
+                *out_first++ =
+                    reference::func::mod_interval(*v_lb_first++, *v_ub_first++, *m_lb_first++, *m_ub_first++).second;
+            }
+        }
+        return true;
+    }
+};
+
+namespace {
+
+/**
+ * @brief Get node inputs bounds as TensorVector.
+ *
+ * The inputs bounds are stored as [lower0, upper0, lower1, upper1].
+ *
+ * @param op  Pointer to the node.
+ * @return Vector with inputs bounds tensors.
+ */
+TensorVector get_bounds(const Node* const op) {
+    auto&& v_bounds = ov::evaluate_both_bounds(op->input_value(0));
+    auto&& m_bounds = ov::evaluate_both_bounds(op->input_value(1));
+    return {std::move(v_bounds.first),
+            std::move(v_bounds.second),
+            std::move(m_bounds.first),
+            std::move(m_bounds.second)};
+}
+
+/**
+ * @brief Check if all bounds in vector are valid (allocated).
+ *
+ * @param bounds  TensorVector of bounds for check.
+ * @return True if bounds area valid otherwise false.
+ */
+bool are_bounds_valid(const TensorVector& bounds) {
+    return std::all_of(bounds.begin(), bounds.end(), [](const Tensor& t) {
+        return static_cast<bool>(t);
+    });
+}
+
+/**
+ * @brief Evaluate binary mask of values which cannot be calculated by modulo.
+ *
+ * @param bounds       Modulo inputs bounds.
+ * @return Tensor with binary mask or empty tensor if evaluate failed.
+ */
+Tensor evaluate_undefined_result_mask(const TensorVector& bounds) {
+    const auto eq_op = v1::Equal();
+    const auto or_op = v1::LogicalOr();
+
+    const auto& in_et = bounds.front().get_element_type();
+
+    auto zero_t = ov::util::make_tensor_of_value(in_et, 0);
+    auto max_t = ov::util::make_tensor_of_max_value(in_et);
+
+    const auto& v_ub = bounds[1];
+    const auto& m_lb = bounds[2];
+    const auto& m_ub = bounds[3];
+
+    auto m_mask = TensorVector{{element::boolean, m_ub.get_shape()}};
+    if (!eq_op.evaluate(m_mask, {m_lb, zero_t})) {
+        return {};
+    }
+
+    auto out_masks = TensorVector{{element::boolean, m_lb.get_shape()}};
+    if (!eq_op.evaluate(out_masks, {m_ub, zero_t})) {
+        return {};
+    }
+
+    auto m_or_inputs = TensorVector{out_masks[0], m_mask[0]};
+    or_op.evaluate(m_mask, m_or_inputs);
+    if (!eq_op.evaluate(out_masks, {m_lb, max_t})) {
+        return {};
+    }
+
+    or_op.evaluate(m_mask, m_or_inputs);
+    auto v_mask = TensorVector{{element::boolean, v_ub.get_shape()}};
+    if (!eq_op.evaluate(v_mask, {v_ub, max_t})) {
+        return {};
+    }
+
+    out_masks[0].set_shape(ov::op::infer_broadcast_shape(&or_op, v_mask[0].get_shape(), m_mask[0].get_shape()));
+    return or_op.evaluate(out_masks, {v_mask[0], m_mask[0]}) ? out_masks[0] : Tensor{};
+}
+
+/**
+ * @brief Get the inputs bound with valid values only.
+ *
+ * The values which result modulo to give undefined result are replaced by one.
+ * The auto broadcast is applied to have inputs same shape.
+ *
+ * @param bounds  Modulo operator inputs bounds.
+ * @param mask    Mask with undefined result values.
+ * @return Vector of bounds tensors.
+ */
+TensorVector get_bounds_with_valid_values(const TensorVector& bounds, const Tensor& mask) {
+    const auto select_op = v1::Select();
+    const auto one_t = ov::util::make_tensor_of_value(bounds.front().get_element_type(), 1);
+
+    auto m_bounds = TensorVector();
+    m_bounds.reserve(bounds.size());
+    std::transform(bounds.cbegin(), bounds.cend(), std::back_inserter(m_bounds), [&](const Tensor& b) {
+        auto tmp = TensorVector{{b.get_element_type(), mask.get_shape()}};
+        return select_op.evaluate(tmp, {mask, one_t, b}) ? tmp.front() : Tensor{};
+    });
+    return m_bounds;
+}
+
+/**
+ * @brief Evaluate modulo upper or lower bound.
+ *
+ * @param op        Pointer to modulo node.
+ * @param outputs   Tensor vector with one tensor to store bounds result.
+ * @param is_lower  True to evaluate lower otherwise evaluate upper.
+ * @return True if outputs has valid data otherwise false.
+ */
+bool evaluate_bound(const Node* const op, TensorVector& outputs, bool is_lower) {
+    const auto bounds = mod::get_bounds(op);
+
+    if (mod::are_bounds_valid(bounds)) {
+        const auto& in_et = bounds[0].get_element_type();
+
+        const auto undefined_result_mask = mod::evaluate_undefined_result_mask(bounds);
+        if (!undefined_result_mask) {
+            return false;
+        }
+
+        // Set inputs values to 1 for undefined results mask (0, inf, etc.)
+        const auto m_bounds = mod::get_bounds_with_valid_values(bounds, undefined_result_mask);
+        if (!mod::are_bounds_valid(m_bounds)) {
+            return false;
+        }
+
+        // Evaluate bound.
+        outputs[0].set_shape(undefined_result_mask.get_shape());
+        using namespace ov::element;
+        if (!IfTypeOf<i8, i16, i32, i64, u8, u16, u32, u64>::apply<mod::EvaluateBound>(in_et,
+                                                                                       m_bounds[0],
+                                                                                       m_bounds[1],
+                                                                                       m_bounds[2],
+                                                                                       m_bounds[3],
+                                                                                       outputs[0],
+                                                                                       is_lower)) {
+            return false;
+        }
+        // Set undefined bound value for results which cannot be calculated.
+        const auto select_op = v1::Select();
+        const auto undefined_bound =
+            is_lower ? ov::util::make_tensor_of_value(in_et, 0) : ov::util::make_tensor_of_max_value(in_et);
+        return select_op.evaluate(outputs, {undefined_result_mask, undefined_bound, outputs.front()});
+    } else {
+        return false;
+    }
+}
+}  // namespace
 }  // namespace mod
 
 namespace v1 {
@@ -48,17 +241,28 @@ std::shared_ptr<Node> Mod::clone_with_new_inputs(const OutputVector& new_args) c
 bool Mod::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const {
     OV_OP_SCOPE(v1_Mod_evaluate);
     OPENVINO_ASSERT(outputs.size() == 1);
-    OPENVINO_ASSERT(inputs.size() == 2);
 
-    outputs[0].set_shape(infer_broadcast_shape(this, inputs[0].get_shape(), inputs[1].get_shape()));
+    outputs[0].set_shape(infer_broadcast_shape(this, inputs));
     using namespace ov::element;
     return IfTypeOf<i8, i16, i32, i64, u8, u16, u32, u64>::apply<mod::Evaluate>(inputs[0].get_element_type(),
                                                                                 inputs[0],
                                                                                 inputs[1],
                                                                                 outputs[0],
+                                                                                inputs[0].get_shape(),
+                                                                                inputs[1].get_shape(),
                                                                                 get_autob());
 }
 
+bool Mod::evaluate_lower(TensorVector& outputs) const {
+    OV_OP_SCOPE(v1_Mod_evaluate_lower);
+    return mod::evaluate_bound(this, outputs, true);
+}
+
+bool Mod::evaluate_upper(TensorVector& outputs) const {
+    OV_OP_SCOPE(v1_Mod_evaluate_upper);
+    return mod::evaluate_bound(this, outputs, false);
+}
+
 bool Mod::has_evaluate() const {
     OV_OP_SCOPE(v1_Mod_has_evaluate);
 
diff --git a/src/core/src/op/range.cpp b/src/core/src/op/range.cpp
index 6285391ae56e06..204161ce10ac7b 100644
--- a/src/core/src/op/range.cpp
+++ b/src/core/src/op/range.cpp
@@ -144,37 +144,46 @@ bool evaluate(const HostTensorPtr& out,
               const HostTensorPtr& step,
               int version) {
     using T = typename element_type_traits<ET>::value_type;
-    T start_val;
-    T stop_val;
-    T step_val;
+    double start_val;
+    double stop_val;
+    double step_val;
     if (version < 4) {
-        start_val = *start->get_data_ptr<ET>();
-        stop_val = *stop->get_data_ptr<ET>();
-        step_val = *step->get_data_ptr<ET>();
+        start_val = static_cast<double>(*start->get_data_ptr<ET>());
+        stop_val = static_cast<double>(*stop->get_data_ptr<ET>());
+        step_val = static_cast<double>(*step->get_data_ptr<ET>());
         if (!(check_value(start_val) && check_value(stop_val) && check_value(step_val) &&
               (step_val != static_cast<T>(0)))) {
             return false;
         }
     } else {
-        if (!(get_casted_value<T>(start, &start_val) && get_casted_value<T>(stop, &stop_val) &&
-              get_casted_value<T>(step, &step_val))) {
+        if (!(get_casted_value<double>(start, &start_val) && get_casted_value<double>(stop, &stop_val) &&
+              get_casted_value<double>(step, &step_val))) {
             return false;
         }
     }
 
     int64_t out_size = 0;
 
+    if (ov::element::Type(ET).is_integral_number()) {
+        start_val = std::trunc(start_val);
+        stop_val = std::trunc(stop_val);
+        step_val = std::trunc(step_val);
+    }
+
     int64_t steps = static_cast<int64_t>(std::ceil(double(stop_val - start_val) / step_val));
     if (steps > 0) {
         out_size = steps;
     }
     ov::Shape out_shape = ov::Shape({static_cast<size_t>(out_size)});
     out->set_shape(out_shape);
-    ov::reference::range(&start_val, &step_val, shape_size(out_shape), out->get_data_ptr<ET>());
+
+    T start_val_casted = static_cast<T>(start_val);
+    T step_val_casted = static_cast<T>(step_val);
+    ov::reference::range(&start_val_casted, &step_val_casted, shape_size(out_shape), out->get_data_ptr<ET>());
     return true;
 }
 
-bool evaluate_power(const HostTensorPtr& out,
+bool evaluate_range(const HostTensorPtr& out,
                     const HostTensorPtr& start,
                     const HostTensorPtr& stop,
                     const HostTensorPtr& step,
@@ -209,7 +218,7 @@ bool op::v4::Range::evaluate(const HostTensorVector& outputs, const HostTensorVe
     HostTensorPtr start = inputs[0];
     HostTensorPtr stop = inputs[1];
     HostTensorPtr step = inputs[2];
-    return rangeop::evaluate_power(out, start, stop, step, m_output_type, 4);
+    return rangeop::evaluate_range(out, start, stop, step, m_output_type, 4);
 }
 
 bool op::v4::Range::has_evaluate() const {
@@ -381,7 +390,7 @@ bool op::v0::Range::evaluate(const HostTensorVector& outputs, const HostTensorVe
     HostTensorPtr start = inputs[0];
     HostTensorPtr stop = inputs[1];
     HostTensorPtr step = inputs[2];
-    return rangeop::evaluate_power(out, start, stop, step, start->get_element_type(), 0);
+    return rangeop::evaluate_range(out, start, stop, step, start->get_element_type(), 0);
 }
 
 bool op::v0::Range::has_evaluate() const {
diff --git a/src/core/src/op/subtract.cpp b/src/core/src/op/subtract.cpp
index 6538918f9f14e2..6b21fa00483b78 100644
--- a/src/core/src/op/subtract.cpp
+++ b/src/core/src/op/subtract.cpp
@@ -19,14 +19,11 @@ struct Evaluate : element::NoAction<bool> {
     static result_type visit(const Tensor& in0,
                              const Tensor& in1,
                              Tensor& out,
+                             const Shape& shape0,
+                             const Shape& shape1,
                              const AutoBroadcastSpec& broadcast_spec) {
         using T = typename element_type_traits<ET>::value_type;
-        reference::subtract(in0.data<const T>(),
-                            in1.data<const T>(),
-                            out.data<T>(),
-                            in0.get_shape(),
-                            in1.get_shape(),
-                            broadcast_spec);
+        reference::subtract(in0.data<const T>(), in1.data<const T>(), out.data<T>(), shape0, shape1, broadcast_spec);
         return true;
     }
 };
@@ -48,14 +45,15 @@ std::shared_ptr<Node> Subtract::clone_with_new_inputs(const OutputVector& new_ar
 bool Subtract::evaluate(TensorVector& outputs, const TensorVector& inputs) const {
     OV_OP_SCOPE(v1_Subtract_evaluate);
     OPENVINO_ASSERT(outputs.size() == 1);
-    OPENVINO_ASSERT(inputs.size() == 2);
 
-    outputs[0].set_shape(infer_broadcast_shape(this, inputs[0].get_shape(), inputs[1].get_shape()));
+    outputs[0].set_shape(infer_broadcast_shape(this, inputs));
     using namespace ov::element;
     return IfTypeOf<bf16, f16, f32, i8, i32, i64, u8, u32, u64>::apply<subtract::Evaluate>(inputs[0].get_element_type(),
                                                                                            inputs[0],
                                                                                            inputs[1],
                                                                                            outputs[0],
+                                                                                           inputs[0].get_shape(),
+                                                                                           inputs[1].get_shape(),
                                                                                            get_autob());
 }
 
diff --git a/src/core/src/op/xor.cpp b/src/core/src/op/xor.cpp
index eafe1fe465e315..c96599d9de3cef 100644
--- a/src/core/src/op/xor.cpp
+++ b/src/core/src/op/xor.cpp
@@ -21,13 +21,15 @@ struct Evaluate : element::NoAction<bool> {
     static result_type visit(const Tensor& arg0,
                              const Tensor& arg1,
                              Tensor& out,
+                             const Shape& shape0,
+                             const Shape& shape1,
                              const AutoBroadcastSpec& broadcast_spec) {
         using T = typename element_type_traits<ET>::value_type;
         reference::logical_xor(arg0.data<const T>(),
                                arg1.data<const T>(),
                                out.data<T>(),
-                               arg0.get_shape(),
-                               arg1.get_shape(),
+                               shape0,
+                               shape1,
                                broadcast_spec);
         return true;
     }
@@ -40,14 +42,15 @@ bool input_supported_type(const element::Type& et) {
 
 bool evaluate(const Node* const op, TensorVector& outputs, const TensorVector& inputs) {
     OPENVINO_ASSERT(outputs.size() == 1);
-    OPENVINO_ASSERT(inputs.size() == 2);
 
-    outputs[0].set_shape(infer_broadcast_shape(op, inputs[0].get_shape(), inputs[1].get_shape()));
+    outputs[0].set_shape(infer_broadcast_shape(op, inputs));
     using namespace ov::element;
     return IfTypeOf<boolean>::apply<logxor::Evaluate>(inputs[0].get_element_type(),
                                                       inputs[0],
                                                       inputs[1],
                                                       outputs[0],
+                                                      inputs[0].get_shape(),
+                                                      inputs[1].get_shape(),
                                                       op->get_autob());
 }
 }  // namespace
diff --git a/src/core/src/pass/serialize.cpp b/src/core/src/pass/serialize.cpp
index dc1263d7991deb..c879e8780f1370 100644
--- a/src/core/src/pass/serialize.cpp
+++ b/src/core/src/pass/serialize.cpp
@@ -21,6 +21,7 @@
 #include "openvino/opsets/opset1.hpp"
 #include "openvino/pass/constant_folding.hpp"
 #include "openvino/reference/convert.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
 #include "openvino/util/file_util.hpp"
 #include "pugixml.hpp"
 #include "transformations/hash.hpp"
@@ -532,6 +533,19 @@ class XmlSerializer : public ov::AttributeVisitor {
                 m_xml_node.append_attribute("offset").set_value(static_cast<unsigned long long>(offset));
                 m_xml_node.append_attribute("size").set_value(static_cast<unsigned long long>(new_size));
             }
+        } else if (const auto& a = ov::as_type<ov::AttributeAdapter<std::shared_ptr<ov::AlignedBuffer>>>(&adapter)) {
+            if (name == "value" && translate_type_name(m_node_type_name) == "Const") {
+                const int64_t size = a->get()->size();
+                size_t new_size;
+                int64_t offset = m_constant_write_handler.write(static_cast<const char*>(a->get()->get_ptr()),
+                                                                size,
+                                                                &new_size,
+                                                                m_compress_to_fp16,
+                                                                m_output_element_type);
+
+                m_xml_node.append_attribute("offset").set_value(static_cast<unsigned long long>(offset));
+                m_xml_node.append_attribute("size").set_value(static_cast<unsigned long long>(new_size));
+            }
         } else if (const auto& a = ov::as_type<ov::AttributeAdapter<ov::op::util::FrameworkNodeAttrs>>(&adapter)) {
             const auto& attrs = a->get();
 
diff --git a/src/core/src/runtime/aligned_buffer.cpp b/src/core/src/runtime/aligned_buffer.cpp
index d7c5229fcc0efa..4207eefe5db9b7 100644
--- a/src/core/src/runtime/aligned_buffer.cpp
+++ b/src/core/src/runtime/aligned_buffer.cpp
@@ -8,6 +8,7 @@
 #include <memory>
 
 #include "ngraph/util.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
 #include "openvino/util/log.hpp"
 
 NGRAPH_SUPPRESS_DEPRECATED_START
@@ -64,3 +65,53 @@ AttributeAdapter<std::shared_ptr<ngraph::runtime::AlignedBuffer>>::AttributeAdap
     std::shared_ptr<ngraph::runtime::AlignedBuffer>& value)
     : DirectValueAccessor<std::shared_ptr<ngraph::runtime::AlignedBuffer>>(value) {}
 }  // namespace ov
+NGRAPH_SUPPRESS_DEPRECATED_END
+
+namespace ov {
+AlignedBuffer::AlignedBuffer() : m_allocated_buffer(nullptr), m_aligned_buffer(nullptr), m_byte_size(0) {}
+
+AlignedBuffer::AlignedBuffer(size_t byte_size, size_t alignment) : m_byte_size(byte_size) {
+    m_byte_size = std::max<size_t>(1, byte_size);
+    size_t allocation_size = m_byte_size + alignment;
+    m_allocated_buffer = new char[allocation_size];
+    m_aligned_buffer = m_allocated_buffer;
+    size_t mod = (alignment != 0) ? reinterpret_cast<size_t>(m_aligned_buffer) % alignment : 0;
+
+    if (mod != 0) {
+        m_aligned_buffer += (alignment - mod);
+    }
+}
+
+AlignedBuffer::AlignedBuffer(AlignedBuffer&& other)
+    : m_allocated_buffer(other.m_allocated_buffer),
+      m_aligned_buffer(other.m_aligned_buffer),
+      m_byte_size(other.m_byte_size) {
+    other.m_allocated_buffer = nullptr;
+    other.m_aligned_buffer = nullptr;
+    other.m_byte_size = 0;
+}
+
+AlignedBuffer::~AlignedBuffer() {
+    if (m_allocated_buffer != nullptr) {
+        delete[] m_allocated_buffer;
+    }
+}
+
+AlignedBuffer& AlignedBuffer::operator=(AlignedBuffer&& other) {
+    if (this != &other) {
+        if (m_allocated_buffer != nullptr) {
+            delete[] m_allocated_buffer;
+        }
+        m_allocated_buffer = other.m_allocated_buffer;
+        m_aligned_buffer = other.m_aligned_buffer;
+        m_byte_size = other.m_byte_size;
+        other.m_allocated_buffer = nullptr;
+        other.m_aligned_buffer = nullptr;
+        other.m_byte_size = 0;
+    }
+    return *this;
+}
+
+AttributeAdapter<std::shared_ptr<ov::AlignedBuffer>>::AttributeAdapter(std::shared_ptr<ov::AlignedBuffer>& value)
+    : DirectValueAccessor<std::shared_ptr<ov::AlignedBuffer>>(value) {}
+}  // namespace ov
diff --git a/src/core/src/validation_util.cpp b/src/core/src/validation_util.cpp
index 803364b289008d..4a7bd1958f1c53 100644
--- a/src/core/src/validation_util.cpp
+++ b/src/core/src/validation_util.cpp
@@ -910,32 +910,8 @@ void evaluate_nodes(std::map<RawNodeOutput, HostTensorPtr>& value_map,
 }
 
 std::shared_ptr<op::v0::Constant> get_constant_max_of_type(element::Type_t t) {
-#define OPENVINO_TYPE_TO_MAX_CONST(t)                                                   \
-    case t:                                                                             \
-        return ov::op::v0::Constant::create(                                            \
-            t,                                                                          \
-            {},                                                                         \
-            {std::numeric_limits<typename element_type_traits<t>::value_type>::max()}); \
-        break
-
-    switch (t) {
-        OPENVINO_TYPE_TO_MAX_CONST(element::boolean);
-        OPENVINO_TYPE_TO_MAX_CONST(element::bf16);
-        OPENVINO_TYPE_TO_MAX_CONST(element::f16);
-        OPENVINO_TYPE_TO_MAX_CONST(element::f32);
-        OPENVINO_TYPE_TO_MAX_CONST(element::f64);
-        OPENVINO_TYPE_TO_MAX_CONST(element::i8);
-        OPENVINO_TYPE_TO_MAX_CONST(element::i16);
-        OPENVINO_TYPE_TO_MAX_CONST(element::i32);
-        OPENVINO_TYPE_TO_MAX_CONST(element::i64);
-        OPENVINO_TYPE_TO_MAX_CONST(element::u1);
-        OPENVINO_TYPE_TO_MAX_CONST(element::u8);
-        OPENVINO_TYPE_TO_MAX_CONST(element::u16);
-        OPENVINO_TYPE_TO_MAX_CONST(element::u32);
-        OPENVINO_TYPE_TO_MAX_CONST(element::u64);
-    default:
-        return nullptr;
-    }
+    auto tensor = ov::util::make_tensor_of_max_value(t);
+    return tensor ? std::make_shared<op::v0::Constant>(tensor) : nullptr;
 }
 
 std::shared_ptr<op::v0::Constant> get_constant_min_of_type(element::Type_t t) {
@@ -1385,6 +1361,48 @@ std::shared_ptr<Constant> get_constant_from_source(const Output<Node>& source) {
     }
 }
 
+template <class T>
+Tensor make_tensor_of_max_value(const element::Type_t et) {
+    Tensor t{et, Shape{}};
+    *t.data<T>() = std::numeric_limits<T>::max();
+    return t;
+}
+
+Tensor make_tensor_of_max_value(const element::Type_t et) {
+    switch (et) {
+    case element::boolean:
+        return make_tensor_of_max_value<ov::fundamental_type_for<element::boolean>>(et);
+    case element::bf16:
+        return make_tensor_of_max_value<ov::fundamental_type_for<element::bf16>>(et);
+    case element::f16:
+        return make_tensor_of_max_value<ov::fundamental_type_for<element::f16>>(et);
+    case element::f32:
+        return make_tensor_of_max_value<ov::fundamental_type_for<element::f32>>(et);
+    case element::f64:
+        return make_tensor_of_max_value<ov::fundamental_type_for<element::f64>>(et);
+    case element::i8:
+        return make_tensor_of_max_value<ov::fundamental_type_for<element::i8>>(et);
+    case element::i16:
+        return make_tensor_of_max_value<ov::fundamental_type_for<element::i16>>(et);
+    case element::i32:
+        return make_tensor_of_max_value<ov::fundamental_type_for<element::i32>>(et);
+    case element::i64:
+        return make_tensor_of_max_value<ov::fundamental_type_for<element::i64>>(et);
+    case element::u1:
+        return make_tensor_of_max_value<ov::fundamental_type_for<element::u1>>(et);
+    case element::u8:
+        return make_tensor_of_max_value<ov::fundamental_type_for<element::u8>>(et);
+    case element::u16:
+        return make_tensor_of_max_value<ov::fundamental_type_for<element::u16>>(et);
+    case element::u32:
+        return make_tensor_of_max_value<ov::fundamental_type_for<element::u32>>(et);
+    case element::u64:
+        return make_tensor_of_max_value<ov::fundamental_type_for<element::u64>>(et);
+    default:
+        return {};
+    }
+}
+
 std::vector<PartialShape> get_tensors_partial_shapes(const TensorVector& tensors) {
     std::vector<PartialShape> shapes;
     shapes.reserve(tensors.size());
diff --git a/src/core/tests/aligned_buffer.cpp b/src/core/tests/aligned_buffer.cpp
index fae5929ba3db2a..604d153eeb5c36 100644
--- a/src/core/tests/aligned_buffer.cpp
+++ b/src/core/tests/aligned_buffer.cpp
@@ -2,31 +2,30 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ngraph/runtime/aligned_buffer.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
 
 #include "gtest/gtest.h"
 
-using namespace ngraph;
+using namespace ov;
 
-OPENVINO_SUPPRESS_DEPRECATED_START
 TEST(aligned_buffer, alignment) {
-    runtime::AlignedBuffer buffer(100, 64);
+    AlignedBuffer buffer(100, 64);
     size_t addr = reinterpret_cast<size_t>(buffer.get_ptr()) % 64;
     EXPECT_EQ(addr, 0);
 }
 
 TEST(aligned_buffer, move) {
     {
-        runtime::AlignedBuffer buffer1(100, 64);
-        runtime::AlignedBuffer buffer2(std::move(buffer1));
+        AlignedBuffer buffer1(100, 64);
+        AlignedBuffer buffer2(std::move(buffer1));
         EXPECT_EQ(buffer1.size(), 0);
         EXPECT_EQ(buffer1.get_ptr(), nullptr);
         EXPECT_EQ(buffer2.size(), 100);
         EXPECT_NE(buffer2.get_ptr(), nullptr);
     }
     {
-        runtime::AlignedBuffer buffer1(100, 64);
-        runtime::AlignedBuffer buffer2;
+        AlignedBuffer buffer1(100, 64);
+        AlignedBuffer buffer2;
         buffer2 = std::move(buffer1);
         EXPECT_EQ(buffer1.size(), 0);
         EXPECT_EQ(buffer1.get_ptr(), nullptr);
diff --git a/src/core/tests/bfloat16.cpp b/src/core/tests/bfloat16.cpp
index bb3a35dc9953a3..f031d01226cf41 100644
--- a/src/core/tests/bfloat16.cpp
+++ b/src/core/tests/bfloat16.cpp
@@ -10,7 +10,7 @@
 #include <random>
 
 #include "common_test_utils/float_util.hpp"
-#include "ngraph/runtime/aligned_buffer.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
 #include "openvino/util/log.hpp"
 
 using namespace std;
@@ -140,9 +140,8 @@ TEST(bfloat16, numeric_limits) {
 }
 
 TEST(benchmark, bfloat16) {
-    OPENVINO_SUPPRESS_DEPRECATED_START
     size_t buffer_size = 128 * 3 * 224 * 224;
-    ngraph::runtime::AlignedBuffer data(buffer_size * sizeof(float), 4096);
+    ov::AlignedBuffer data(buffer_size * sizeof(float), 4096);
     float* f = static_cast<float*>(data.get_ptr());
     // vector<float> data(buffer_size);
     std::mt19937 rng(2112);
@@ -153,53 +152,36 @@ TEST(benchmark, bfloat16) {
     OPENVINO_INFO << "buffer size " << buffer_size << " floats or " << data.size() << " bytes";
 
     {
-        ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096);
+        ov::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096);
         bfloat16* p = static_cast<bfloat16*>(bf_data.get_ptr());
-        ngraph::stopwatch timer;
-        timer.start();
         for (size_t i = 0; i < buffer_size; ++i) {
             p[i] = bfloat16(f[i]);
         }
-        timer.stop();
-        OPENVINO_INFO << "float to bfloat16 ctor                  " << timer.get_milliseconds() << "ms";
     }
 
     {
-        ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096);
+        ov::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096);
         bfloat16* p = static_cast<bfloat16*>(bf_data.get_ptr());
-        ngraph::stopwatch timer;
-        timer.start();
         for (size_t i = 0; i < buffer_size; ++i) {
             p[i] = bfloat16::truncate(f[i]);
         }
-        timer.stop();
-        OPENVINO_INFO << "float to bfloat16 truncate              " << timer.get_milliseconds() << "ms";
     }
 
     {
-        ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096);
+        ov::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096);
         bfloat16* p = static_cast<bfloat16*>(bf_data.get_ptr());
-        ngraph::stopwatch timer;
-        timer.start();
         for (size_t i = 0; i < buffer_size; ++i) {
             p[i] = bfloat16::round_to_nearest(f[i]);
         }
-        timer.stop();
-        OPENVINO_INFO << "float to bfloat16 round to nearest      " << timer.get_milliseconds() << "ms";
     }
 
     {
-        ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096);
+        ov::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096);
         bfloat16* p = static_cast<bfloat16*>(bf_data.get_ptr());
-        ngraph::stopwatch timer;
-        timer.start();
         for (size_t i = 0; i < buffer_size; ++i) {
             p[i] = bfloat16::round_to_nearest_even(f[i]);
         }
-        timer.stop();
-        OPENVINO_INFO << "float to bfloat16 round to nearest even " << timer.get_milliseconds() << "ms";
     }
-    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 TEST(bfloat16, assigns) {
diff --git a/src/core/tests/constant.cpp b/src/core/tests/constant.cpp
index 45ad60d153627a..010bb83b3e75d9 100644
--- a/src/core/tests/constant.cpp
+++ b/src/core/tests/constant.cpp
@@ -10,6 +10,8 @@
 
 #include "common_test_utils/type_prop.hpp"
 #include "openvino/core/except.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
+#include "openvino/runtime/shared_buffer.hpp"
 
 using namespace ov;
 using namespace std;
@@ -266,8 +268,8 @@ TEST(constant, int4_string) {
     EXPECT_EQ(v[2], -1);
 
     const auto p = c.get_data_ptr<uint8_t>();
-    EXPECT_EQ(0x10, p[0]);
-    EXPECT_EQ(0xF0, p[1] & 0xF0);
+    EXPECT_EQ(0x01, p[0]);
+    EXPECT_EQ(0x0F, p[1] & 0x0F);
 
     EXPECT_EQ(input, c.get_value_strings());
 
@@ -318,8 +320,8 @@ TEST(constant, int4_vector_negative_number) {
     EXPECT_EQ(v[2], int8_t(-1));
 
     const auto p = c.get_data_ptr<uint8_t>();
-    EXPECT_EQ(0xFE, p[0]);
-    EXPECT_EQ(0xF0, p[1] & 0xF0);
+    EXPECT_EQ(0xEF, p[0]);
+    EXPECT_EQ(0x0F, p[1] & 0x0F);
 }
 
 TEST(constant, int4_vector_positive_number) {
@@ -332,8 +334,8 @@ TEST(constant, int4_vector_positive_number) {
     EXPECT_EQ(v[2], int8_t(5));
 
     const auto p = c.get_data_ptr<uint8_t>();
-    EXPECT_EQ(0x12, p[0]);
-    EXPECT_EQ(0x50, p[1] & 0xF0);
+    EXPECT_EQ(0x21, p[0]);
+    EXPECT_EQ(0x05, p[1] & 0x0F);
 }
 
 TEST(constant, int4_vector_broadcast_negative_number) {
@@ -795,8 +797,8 @@ TEST(constant, uint4_string) {
     EXPECT_EQ(v[3], 0);
 
     const auto p = c.get_data_ptr<uint8_t>();
-    EXPECT_EQ(p[0], 0x10);
-    EXPECT_EQ(p[1], 0x10);
+    EXPECT_EQ(p[0], 0x01);
+    EXPECT_EQ(p[1], 0x01);
 
     EXPECT_EQ(input, c.get_value_strings());
 
@@ -831,8 +833,8 @@ TEST(constant, uint4_vector) {
     EXPECT_EQ(v[3], 0);
 
     const auto p = c.get_data_ptr<uint8_t>();
-    EXPECT_EQ(p[0], 0x10);
-    EXPECT_EQ(p[1], 0x10);
+    EXPECT_EQ(p[0], 0x01);
+    EXPECT_EQ(p[1], 0x01);
 }
 
 TEST(constant, uint4_vector_broadcast) {
@@ -1726,14 +1728,12 @@ TEST(constant, lazy_bitwise_identical) {
     auto shape = Shape{10, 1000, 1000};
     auto type = element::i32;
     auto byte_size = shape_size(shape) * sizeof(int32_t);
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    auto aligned_weights_buffer = std::make_shared<ngraph::runtime::AlignedBuffer>(byte_size);
+    auto aligned_weights_buffer = std::make_shared<ov::AlignedBuffer>(byte_size);
     std::memset(aligned_weights_buffer->get_ptr<char>(), 1, byte_size);
-    auto weights = std::make_shared<ngraph::runtime::SharedBuffer<std::shared_ptr<ngraph::runtime::AlignedBuffer>>>(
-        aligned_weights_buffer->get_ptr<char>(),
-        aligned_weights_buffer->size(),
-        aligned_weights_buffer);
-    OPENVINO_SUPPRESS_DEPRECATED_END
+    auto weights =
+        std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::AlignedBuffer>>>(aligned_weights_buffer->get_ptr<char>(),
+                                                                               aligned_weights_buffer->size(),
+                                                                               aligned_weights_buffer);
 
     using namespace std::chrono;
     auto create_constant = [&]() {
diff --git a/src/core/tests/eval.cpp b/src/core/tests/eval.cpp
index 86b3cc2ecf82ce..fabf47f0f2f248 100644
--- a/src/core/tests/eval.cpp
+++ b/src/core/tests/eval.cpp
@@ -178,6 +178,27 @@ TEST(eval, evaluate_dynamic_range_sum) {
     ASSERT_EQ(cval, seq);
 }
 
+TEST(eval, evaluate_dynamic_range_fp16_out) {
+    auto p_start = make_shared<ov::op::v0::Parameter>(element::i32, PartialShape{});
+    auto p_stop = make_shared<ov::op::v0::Parameter>(element::i32, PartialShape{});
+    auto p_step = make_shared<ov::op::v0::Parameter>(element::i32, PartialShape{});
+    auto range = make_shared<op::v4::Range>(p_start, p_stop, p_step, ov::element::f16);
+    auto model = make_shared<Model>(OutputVector{range}, ParameterVector{p_start, p_stop, p_step});
+    auto result_tensor = ov::Tensor();
+    auto out_vector = ov::TensorVector{result_tensor};
+    auto in_vector = ov::TensorVector{make_tensor<element::Type_t::i32>({}, {0}),
+                                      make_tensor<element::Type_t::i32>({}, {3087}),
+                                      make_tensor<element::Type_t::i32>({}, {1})};
+    ASSERT_TRUE(model->evaluate(out_vector, in_vector));
+    result_tensor = out_vector.at(0);
+    EXPECT_EQ(result_tensor.get_element_type(), element::f16);
+    EXPECT_EQ(result_tensor.get_shape(), (Shape{3087}));
+    auto cval = read_vector<ov::float16>(result_tensor);
+    for (size_t i = 0; i < 3087; i++) {
+        ASSERT_EQ(cval[i], ov::float16(i));
+    }
+}
+
 TEST(eval, evaluate_broadcast_v3_bidirectional) {
     Shape shape_a{4, 1};
     auto A = make_shared<ov::op::v0::Parameter>(element::f32, shape_a);
diff --git a/src/core/tests/int4.cpp b/src/core/tests/int4.cpp
index 2edb82dda0183c..d9a20fbf3649b2 100644
--- a/src/core/tests/int4.cpp
+++ b/src/core/tests/int4.cpp
@@ -15,9 +15,9 @@ TEST(int4, convert_i4_to_string) {
     vector<uint8_t> values{171, 16};
     auto constant = make_shared<ov::op::v0::Constant>(element::i4, Shape{3}, &values[0]);
 
-    vector<string> ref{"-6", "-5", "1"};
+    vector<string> ref{"-5", "-6", "0"};
     for (size_t i = 0; i < 3; ++i) {
-        ASSERT_EQ(constant->convert_value_to_string(i), ref[i]);
+        EXPECT_EQ(constant->convert_value_to_string(i), ref[i]);
     }
 }
 
diff --git a/src/core/tests/type_prop/mod.cpp b/src/core/tests/type_prop/mod.cpp
index b1dbab11eea61f..0e5af52401b412 100644
--- a/src/core/tests/type_prop/mod.cpp
+++ b/src/core/tests/type_prop/mod.cpp
@@ -5,7 +5,171 @@
 #include "openvino/op/mod.hpp"
 
 #include "arithmetic_ops.hpp"
+#include "openvino/core/validation_util.hpp"
+#include "openvino/op/broadcast.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/shape_of.hpp"
+#include "openvino/op/squeeze.hpp"
 
 using Type = ::testing::Types<ov::op::v1::Mod>;
 
 INSTANTIATE_TYPED_TEST_SUITE_P(type_prop_mod, ArithmeticOperator, Type);
+
+using ov::op::v0::Constant;
+using ov::op::v0::Parameter;
+using ov::op::v0::Squeeze;
+using ov::op::v3::Broadcast;
+using ov::op::v3::ShapeOf;
+
+class TypePropModV1Test : public TypePropOpTest<op::v1::Mod> {};
+
+TEST_F(TypePropModV1Test, preserve_constant_data_on_inputs) {
+    const auto a = Constant::create(ov::element::i32, ov::Shape{4}, {4, 10, 22, 5});
+    const auto b = Constant::create(ov::element::i32, ov::Shape{4}, {3, 4, 8, 3});
+    const auto op = make_op(a, b);
+
+    const auto param = std::make_shared<Parameter>(ov::element::i32, ov::Shape{1});
+    auto bc = std::make_shared<Broadcast>(param, op, ov::op::BroadcastType::BIDIRECTIONAL);
+    const auto& output_shape = bc->get_output_partial_shape(0);
+    EXPECT_EQ(output_shape, ov::PartialShape({1, 2, 6, 2}));
+}
+
+TEST_F(TypePropModV1Test, preserve_partial_values_on_inputs) {
+    const auto a = std::make_shared<Parameter>(ov::element::i64, ov::PartialShape{{5, 6}, 22, {3, 7}, -1, {7, 9}});
+    const auto b = std::make_shared<Parameter>(ov::element::i64, ov::PartialShape{3, {12, 18}, {4, 6}, -1, {0, 4}});
+    const auto op = make_op(std::make_shared<ShapeOf>(a), std::make_shared<ShapeOf>(b));
+
+    const auto param = std::make_shared<Parameter>(ov::element::i64, ov::Shape{1});
+    auto bc = std::make_shared<Broadcast>(param, op, ov::op::BroadcastType::BIDIRECTIONAL);
+
+    const auto& output_shape = bc->get_output_partial_shape(0);
+    EXPECT_EQ(output_shape, ov::PartialShape({{0, 2}, {4, 10}, {0, 5}, -1, -1}));
+}
+
+TEST_F(TypePropModV1Test, preserve_partial_values_when_m_is_interval_scalar) {
+    const auto a = std::make_shared<Parameter>(ov::element::i64, ov::PartialShape{{5, 6}, 22, {3, 7}, -1, {7, 9}});
+    const auto b = std::make_shared<Parameter>(ov::element::i64, ov::PartialShape{{12, 18}});
+    const auto b_scalar = std::make_shared<Squeeze>(std::make_shared<ShapeOf>(b));
+    const auto op = make_op(std::make_shared<ShapeOf>(a), b_scalar);
+
+    const auto param = std::make_shared<Parameter>(ov::element::i64, ov::Shape{1});
+    auto bc = std::make_shared<Broadcast>(param, op, ov::op::BroadcastType::BIDIRECTIONAL);
+
+    const auto& output_shape = bc->get_output_partial_shape(0);
+    EXPECT_EQ(output_shape, ov::PartialShape({{5, 6}, {4, 10}, {3, 7}, -1, {7, 9}}));
+}
+
+TEST_F(TypePropModV1Test, preserve_partial_values_when_value_is_interval_scalar) {
+    const auto a = std::make_shared<Parameter>(ov::element::i64, ov::PartialShape{{3, 7}});
+    const auto b = std::make_shared<Parameter>(ov::element::i64, ov::PartialShape{3, {12, 18}, {4, 6}, -1, {0, 4}});
+    const auto a_scalar = std::make_shared<Squeeze>(std::make_shared<ShapeOf>(a));
+    const auto op = make_op(a_scalar, std::make_shared<ShapeOf>(b));
+
+    const auto param = std::make_shared<Parameter>(ov::element::i64, ov::Shape{1});
+    auto bc = std::make_shared<Broadcast>(param, op, ov::op::BroadcastType::BIDIRECTIONAL);
+
+    const auto& output_shape = bc->get_output_partial_shape(0);
+    EXPECT_EQ(output_shape, ov::PartialShape({{0, 2}, {3, 7}, {0, 5}, -1, -1}));
+}
+
+// test params as {a, b, exp_result}
+using IntervalModuloParams = std::tuple<ov::Dimension, ov::Dimension, ov::Dimension>;
+
+class SingleDimModV1Test : public TypePropModV1Test, public testing::WithParamInterface<IntervalModuloParams> {
+protected:
+    void SetUp() override {
+        std::tie(a_dim, b_dim, exp_dim) = GetParam();
+    }
+
+    ov::Dimension a_dim, b_dim, exp_dim;
+};
+
+const auto v_and_m_static = testing::Values(IntervalModuloParams{{0, 0}, {1, 1}, {0, 0}},
+                                            IntervalModuloParams{{0, 0}, {9, 9}, {0, 0}},
+                                            IntervalModuloParams{{0, 0}, {1000, 1000}, {0, 0}},
+                                            IntervalModuloParams{{10, 10}, {3, 3}, {1, 1}},
+                                            IntervalModuloParams{{10, 10}, {6, 6}, {4, 4}},
+                                            IntervalModuloParams{{10, 10}, {5, 5}, {0, 0}},
+                                            IntervalModuloParams{{10, 10}, {15, 15}, {10, 10}});
+
+const auto v_interval_m_static = testing::Values(IntervalModuloParams{{6, 7}, {4, 4}, {2, 3}},
+                                                 IntervalModuloParams{{6, 8}, {4, 4}, {0, 3}},  // Result [0,2,3]
+                                                 IntervalModuloParams{{6, 8}, {10, 10}, {6, 8}},
+                                                 IntervalModuloParams{{6, 8}, {7, 7}, {0, 6}},
+                                                 IntervalModuloParams{{4, 8}, {7, 7}, {0, 6}},
+                                                 IntervalModuloParams{{15, 16}, {7, 7}, {1, 2}},
+                                                 IntervalModuloParams{{5, 20}, {5, 5}, {0, 4}},
+
+                                                 IntervalModuloParams{{5, 10}, {7, 7}, {0, 6}});
+
+const auto v_static_m_interval = testing::Values(IntervalModuloParams{{0, 0}, {3, 13}, {0, 0}},
+                                                 IntervalModuloParams{{10, 10}, {2, 4}, {0, 3}},
+                                                 IntervalModuloParams{{10, 10}, {2, 6}, {0, 4}},
+                                                 IntervalModuloParams{{10, 10}, {6, 9}, {1, 4}},
+                                                 IntervalModuloParams{{10, 10}, {9, 11}, {0, 10}},
+                                                 IntervalModuloParams{{10, 10}, {3, 11}, {0, 10}},
+                                                 IntervalModuloParams{{10, 10}, {3, 10}, {0, 9}},
+                                                 IntervalModuloParams{{10, 10}, {7, 8}, {2, 3}},
+                                                 IntervalModuloParams{{100, 100}, {2, 20}, {0, 19}},
+                                                 // can be estimated accurate as only two results are possible
+                                                 IntervalModuloParams{{100, 100}, {15, 16}, {4, 10}},
+                                                 // can not be estimated accurate as there are three results [10,4,15]
+                                                 // Requires to calculate all possibilities and pick min, max
+                                                 IntervalModuloParams{{100, 100}, {15, 17}, {0, 16}});
+
+const auto v_and_m_intervals = testing::Values(IntervalModuloParams{{1, 10}, {2, 9}, {0, 8}},
+                                               IntervalModuloParams{{1, 10}, {6, 9}, {0, 8}},
+                                               IntervalModuloParams{{1, 10}, {2, 12}, {0, 10}},
+                                               IntervalModuloParams{{1, 10}, {6, 12}, {0, 10}},
+                                               IntervalModuloParams{{1, 10}, {11, 12}, {1, 10}},
+                                               IntervalModuloParams{{1, 10}, {11, 15}, {1, 10}},
+                                               IntervalModuloParams{{4, 10}, {10, 13}, {0, 10}},
+                                               IntervalModuloParams{{10, 20}, {3, 5}, {0, 4}},
+                                               IntervalModuloParams{{10, 10}, {3, 10}, {0, 9}},
+                                               IntervalModuloParams{{5, 20}, {5, 10}, {0, 9}},
+                                               IntervalModuloParams{{10, 100}, {3, 20}, {0, 19}},
+                                               IntervalModuloParams{{10, 100}, {2, 20}, {0, 19}},
+                                               IntervalModuloParams{{10, 100}, {51, 60}, {0, 59}});
+
+// If input is infinite or m has 0 then output is undefined.
+const auto v_and_m_special_values = testing::Values(IntervalModuloParams{{0, -1}, {5, 5}, {0, -1}},
+                                                    IntervalModuloParams{{10, -1}, {4, 4}, {0, -1}},
+                                                    // Evaluate low/up return [0, max]
+                                                    // but evaluate both bounds return [0] as `m` has same bounds
+                                                    IntervalModuloParams{{11, 11}, {0, 0}, {0, 0}},
+                                                    IntervalModuloParams{{11, 11}, {0, 5}, {0, -1}},
+                                                    IntervalModuloParams{{11, 20}, {0, 5}, {0, -1}},
+                                                    IntervalModuloParams{{11, 20}, {0, -1}, {0, -1}},
+                                                    IntervalModuloParams{{0, -1}, {0, -1}, {0, -1}});
+
+INSTANTIATE_TEST_SUITE_P(v_and_m_static, SingleDimModV1Test, v_and_m_static);
+INSTANTIATE_TEST_SUITE_P(value_interval_m_static, SingleDimModV1Test, v_interval_m_static);
+INSTANTIATE_TEST_SUITE_P(value_static_m_interval, SingleDimModV1Test, v_static_m_interval);
+INSTANTIATE_TEST_SUITE_P(value_and_m_as_intervals, SingleDimModV1Test, v_and_m_intervals);
+INSTANTIATE_TEST_SUITE_P(value_and_m_special_values, SingleDimModV1Test, v_and_m_special_values);
+
+TEST_P(SingleDimModV1Test, preserve_value_on_inputs_i64) {
+    constexpr auto et = ov::element::i64;
+    const auto a = std::make_shared<Parameter>(et, ov::PartialShape{a_dim});
+    const auto b = std::make_shared<Parameter>(et, ov::PartialShape{b_dim});
+    const auto op = make_op(std::make_shared<ShapeOf>(a), std::make_shared<ShapeOf>(b));
+
+    const auto param = std::make_shared<Parameter>(et, ov::Shape{1});
+    const auto bc = std::make_shared<Broadcast>(param, op, ov::op::BroadcastType::BIDIRECTIONAL);
+    const auto& output_shape = bc->get_output_partial_shape(0);
+
+    EXPECT_EQ(output_shape, ov::PartialShape({exp_dim}));
+}
+
+TEST_P(SingleDimModV1Test, preserve_value_on_inputs_i32) {
+    constexpr auto et = ov::element::i32;
+    const auto a = std::make_shared<Parameter>(et, ov::PartialShape{a_dim});
+    const auto b = std::make_shared<Parameter>(et, ov::PartialShape{b_dim});
+    const auto op = make_op(std::make_shared<ShapeOf>(a, et), std::make_shared<ShapeOf>(b, et));
+
+    const auto param = std::make_shared<Parameter>(et, ov::Shape{1});
+    const auto bc = std::make_shared<Broadcast>(param, op, ov::op::BroadcastType::BIDIRECTIONAL);
+    const auto& output_shape = bc->get_output_partial_shape(0);
+
+    EXPECT_EQ(output_shape, ov::PartialShape({exp_dim}));
+}
diff --git a/src/core/tests/uint4.cpp b/src/core/tests/uint4.cpp
index 5c3b0a5e06af20..8285fdb3cd5e1c 100644
--- a/src/core/tests/uint4.cpp
+++ b/src/core/tests/uint4.cpp
@@ -13,9 +13,9 @@ TEST(uint4, convert_u4_to_string) {
     vector<uint8_t> values{171, 16};
     auto constant = make_shared<ov::op::v0::Constant>(element::u4, Shape{3}, &values[0]);
 
-    vector<string> ref{"10", "11", "1"};
+    vector<string> ref{"11", "10", "0"};
     for (size_t i = 0; i < 3; ++i) {
-        ASSERT_EQ(constant->convert_value_to_string(i), ref[i]);
+        EXPECT_EQ(constant->convert_value_to_string(i), ref[i]);
     }
 }
 
diff --git a/src/core/tests/visitors/visitors.hpp b/src/core/tests/visitors/visitors.hpp
index 838eade854181b..893d982a59b3eb 100644
--- a/src/core/tests/visitors/visitors.hpp
+++ b/src/core/tests/visitors/visitors.hpp
@@ -10,12 +10,13 @@
 #include <vector>
 
 #include "ngraph/factory.hpp"
-#include "ngraph/runtime/aligned_buffer.hpp"
 #include "openvino/core/attribute_visitor.hpp"
 #include "openvino/core/deprecated.hpp"
 #include "openvino/op/util/framework_node.hpp"
 #include "openvino/op/util/sub_graph_base.hpp"
 #include "openvino/op/util/variable.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
+#include "openvino/runtime/tensor.hpp"
 
 namespace ov {
 namespace test {
@@ -217,10 +218,9 @@ class DeserializeAttributeVisitor : public AttributeVisitor {
     }
 
     void on_adapter(const std::string& name, ValueAccessor<void>& adapter) override {
-        OPENVINO_SUPPRESS_DEPRECATED_START
-        if (auto a = ::ov::as_type<::ov::AttributeAdapter<std::shared_ptr<ngraph::runtime::AlignedBuffer>>>(&adapter)) {
-            auto& data = m_values.get<ngraph::HostTensorPtr>(name);
-            data->read(a->get()->get_ptr(), a->get()->size());
+        if (auto a = ::ov::as_type<::ov::AttributeAdapter<std::shared_ptr<ov::AlignedBuffer>>>(&adapter)) {
+            auto& data = m_values.get<ov::Tensor>(name);
+            std::memcpy(a->get()->get_ptr(), data.data(), a->get()->size());
         } else if (auto a = ov::as_type<
                        ov::AttributeAdapter<std::vector<std::shared_ptr<ov::op::util::SubGraphOp::OutputDescription>>>>(
                        &adapter)) {
@@ -240,7 +240,6 @@ class DeserializeAttributeVisitor : public AttributeVisitor {
         } else {
             OPENVINO_THROW("Attribute \"", name, "\" cannot be unmarshalled");
         }
-        OPENVINO_SUPPRESS_DEPRECATED_END
     }
     // The remaining adapter methods fall back on the void adapter if not implemented
     void on_adapter(const std::string& name, ValueAccessor<std::string>& adapter) override {
@@ -309,10 +308,9 @@ class SerializeAttributeVisitor : public AttributeVisitor {
     }
 
     void on_adapter(const std::string& name, ValueAccessor<void>& adapter) override {
-        OPENVINO_SUPPRESS_DEPRECATED_START
-        if (auto a = ::ov::as_type<::ov::AttributeAdapter<std::shared_ptr<ngraph::runtime::AlignedBuffer>>>(&adapter)) {
-            ngraph::HostTensorPtr data = std::make_shared<ngraph::HostTensor>(element::u8, Shape{a->get()->size()});
-            data->write(a->get()->get_ptr(), a->get()->size());
+        if (auto a = ::ov::as_type<::ov::AttributeAdapter<std::shared_ptr<ov::AlignedBuffer>>>(&adapter)) {
+            ov::Tensor data(element::u8, Shape{a->get()->size()});
+            std::memcpy(data.data(), a->get()->get_ptr(), a->get()->size());
             m_values.insert(name, data);
         } else if (auto a = ov::as_type<
                        ov::AttributeAdapter<std::vector<std::shared_ptr<ov::op::util::SubGraphOp::OutputDescription>>>>(
@@ -333,7 +331,6 @@ class SerializeAttributeVisitor : public AttributeVisitor {
         } else {
             OPENVINO_THROW("Attribute \"", name, "\" cannot be marshalled");
         }
-        OPENVINO_SUPPRESS_DEPRECATED_END
     }
     // The remaining adapter methods fall back on the void adapter if not implemented
     void on_adapter(const std::string& name, ValueAccessor<std::string>& adapter) override {
diff --git a/src/frontends/common/src/manager.cpp b/src/frontends/common/src/manager.cpp
index 35df484c2cab26..6194fca7583937 100644
--- a/src/frontends/common/src/manager.cpp
+++ b/src/frontends/common/src/manager.cpp
@@ -20,21 +20,6 @@ class FrontEndManager::Impl {
     std::mutex m_loading_mutex;
     std::vector<PluginInfo> m_plugins;
 
-    // Note, static methods below are required to create an order of initialization of static variables
-    // e.g. if users (not encouraged) created ov::Model globally, we need to ensure proper order of initialization
-
-    /// \return map of shared object per frontend <frontend_name, frontend_so_ptr>
-    static std::unordered_map<std::string, std::shared_ptr<void>>& get_shared_objects_map() {
-        static std::unordered_map<std::string, std::shared_ptr<void>> shared_objects_map;
-        return shared_objects_map;
-    }
-
-    /// \return Mutex to guard access the shared object map
-    static std::mutex& get_shared_objects_mutex() {
-        static std::mutex shared_objects_map_mutex;
-        return shared_objects_map_mutex;
-    }
-
 public:
     Impl() {
         search_all_plugins();
@@ -46,10 +31,6 @@ class FrontEndManager::Impl {
         auto fe_obj = std::make_shared<FrontEnd>();
         fe_obj->m_shared_object = std::make_shared<FrontEndSharedData>(plugin.get_so_pointer());
         fe_obj->m_actual = plugin.get_creator().m_creator();
-
-        std::lock_guard<std::mutex> guard(get_shared_objects_mutex());
-        get_shared_objects_map().emplace(plugin.get_creator().m_name, fe_obj->m_shared_object);
-
         return fe_obj;
     }
 
@@ -164,6 +145,7 @@ class FrontEndManager::Impl {
             {".xml", {"ir", "ir"}},
             {".onnx", {"onnx", "onnx"}},
             {".pb", {"tf", "tensorflow"}},
+            {".pbtxt", {"tf", "tensorflow"}},
             {".tflite", {"tflite", "tensorflow_lite"}},
             {".pdmodel", {"paddle", "paddle"}},
             // {".ts", {"pytorch", "pytorch"}},
diff --git a/src/frontends/common/src/plugin_loader.cpp b/src/frontends/common/src/plugin_loader.cpp
index a044152d8d590d..a98eff766bbc0d 100644
--- a/src/frontends/common/src/plugin_loader.cpp
+++ b/src/frontends/common/src/plugin_loader.cpp
@@ -16,17 +16,32 @@
 
 #include <sys/stat.h>
 
-#include <openvino/util/log.hpp>
 #include <string>
 #include <vector>
 
 #include "openvino/util/file_util.hpp"
+#include "openvino/util/log.hpp"
 #include "openvino/util/shared_object.hpp"
 #include "plugin_loader.hpp"
 
 using namespace ov;
 using namespace ov::frontend;
 
+// Note, static methods below are required to create an order of initialization of static variables
+// e.g. if users (not encouraged) created ov::Model globally, we need to ensure proper order of initialization
+
+/// \return map of shared object per frontend <frontend_name, frontend_so_ptr>
+std::unordered_map<std::string, std::shared_ptr<void>>& ov::frontend::get_shared_objects_map() {
+    static std::unordered_map<std::string, std::shared_ptr<void>> shared_objects_map;
+    return shared_objects_map;
+}
+
+/// \return Mutex to guard access the shared object map
+std::mutex& ov::frontend::get_shared_objects_mutex() {
+    static std::mutex shared_objects_map_mutex;
+    return shared_objects_map_mutex;
+}
+
 #ifdef OPENVINO_STATIC_LIBRARY
 
 #    include "ov_frontends.hpp"
@@ -131,6 +146,10 @@ bool PluginInfo::load() {
         m_load_failed = true;
         return false;
     }
+
+    std::lock_guard<std::mutex> guard(get_shared_objects_mutex());
+    get_shared_objects_map().emplace(get_creator().m_name, get_so_pointer());
+
     return true;
 }
 
diff --git a/src/frontends/common/src/plugin_loader.hpp b/src/frontends/common/src/plugin_loader.hpp
index 93e6a5cc2eb5a3..dccf8ddf7a39f3 100644
--- a/src/frontends/common/src/plugin_loader.hpp
+++ b/src/frontends/common/src/plugin_loader.hpp
@@ -4,7 +4,12 @@
 
 #pragma once
 
-#include <openvino/frontend/manager.hpp>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <unordered_map>
+
+#include "openvino/frontend/manager.hpp"
 
 #ifdef _WIN32
 static const char PathSeparator[] = ";";
@@ -15,6 +20,9 @@ static const char PathSeparator[] = ":";
 namespace ov {
 namespace frontend {
 
+std::unordered_map<std::string, std::shared_ptr<void>>& get_shared_objects_map();
+std::mutex& get_shared_objects_mutex();
+
 /// \brief Internal data structure holding by each frontend. Includes library handle and extensions.
 class FrontEndSharedData {
     friend inline void add_extension_to_shared_data(std::shared_ptr<void>& obj,
diff --git a/src/frontends/ir/src/frontend.cpp b/src/frontends/ir/src/frontend.cpp
index 8b8dca4d995ffb..ba515b5560641f 100644
--- a/src/frontends/ir/src/frontend.cpp
+++ b/src/frontends/ir/src/frontend.cpp
@@ -9,10 +9,10 @@
 #include <vector>
 
 #include "input_model.hpp"
-#include "ngraph/runtime/aligned_buffer.hpp"
-#include "ngraph/runtime/shared_buffer.hpp"
 #include "openvino/core/any.hpp"
 #include "openvino/core/so_extension.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
+#include "openvino/runtime/shared_buffer.hpp"
 #include "openvino/util/file_util.hpp"
 #include "openvino/util/mmap_object.hpp"
 #include "transformations/resolve_names_collisions.hpp"
@@ -116,8 +116,7 @@ void FrontEnd::add_extension(const ov::Extension::Ptr& ext) {
 InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const {
     std::ifstream local_model_stream;
     std::istream* provided_model_stream = nullptr;
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    std::shared_ptr<ngraph::runtime::AlignedBuffer> weights;
+    std::shared_ptr<ov::AlignedBuffer> weights;
 
     auto create_extensions_map = [&]() -> std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr> {
         std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr> exts;
@@ -180,8 +179,8 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const
         } else if (variant.is<std::wstring>()) {
             weights_path = variant.as<std::wstring>();
 #endif
-        } else if (variant.is<std::shared_ptr<ngraph::runtime::AlignedBuffer>>()) {
-            weights = variant.as<std::shared_ptr<ngraph::runtime::AlignedBuffer>>();
+        } else if (variant.is<std::shared_ptr<ov::AlignedBuffer>>()) {
+            weights = variant.as<std::shared_ptr<ov::AlignedBuffer>>();
         }
     }
     bool enable_mmap = variants[variants.size() - 1].is<bool>() ? variants[variants.size() - 1].as<bool>() : false;
@@ -204,10 +203,9 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const
     if (!weights_path.empty()) {
         if (enable_mmap) {
             auto mapped_memory = ov::load_mmap_object(weights_path);
-            weights =
-                std::make_shared<ngraph::runtime::SharedBuffer<std::shared_ptr<MappedMemory>>>(mapped_memory->data(),
-                                                                                               mapped_memory->size(),
-                                                                                               mapped_memory);
+            weights = std::make_shared<ov::SharedBuffer<std::shared_ptr<MappedMemory>>>(mapped_memory->data(),
+                                                                                        mapped_memory->size(),
+                                                                                        mapped_memory);
         } else {
             std::ifstream bin_stream;
             bin_stream.open(weights_path.c_str(), std::ios::binary);
@@ -222,17 +220,16 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const
             size_t file_size = bin_stream.tellg();
             bin_stream.seekg(0, std::ios::beg);
 
-            auto aligned_weights_buffer = std::make_shared<ngraph::runtime::AlignedBuffer>(file_size);
+            auto aligned_weights_buffer = std::make_shared<ov::AlignedBuffer>(file_size);
             bin_stream.read(aligned_weights_buffer->get_ptr<char>(), aligned_weights_buffer->size());
             bin_stream.close();
 
-            weights = std::make_shared<ngraph::runtime::SharedBuffer<std::shared_ptr<ngraph::runtime::AlignedBuffer>>>(
+            weights = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::AlignedBuffer>>>(
                 aligned_weights_buffer->get_ptr<char>(),
                 aligned_weights_buffer->size(),
                 aligned_weights_buffer);
         }
     }
-    OPENVINO_SUPPRESS_DEPRECATED_END
 
     return create_input_model();
 }
diff --git a/src/frontends/ir/src/input_model.cpp b/src/frontends/ir/src/input_model.cpp
index 2f58a68c94f89b..6a32b22f786b52 100644
--- a/src/frontends/ir/src/input_model.cpp
+++ b/src/frontends/ir/src/input_model.cpp
@@ -18,10 +18,9 @@
 #include "openvino/util/common_util.hpp"
 #include "utils.hpp"
 
-OPENVINO_SUPPRESS_DEPRECATED_START
 namespace {
 void parse_pre_process(pugi::xml_node& root,
-                       std::shared_ptr<ngraph::runtime::AlignedBuffer> weights,
+                       std::shared_ptr<ov::AlignedBuffer> weights,
                        std::shared_ptr<ov::Model> model) {
     /* Preprocessing block can have two preprocessing types:
      *
@@ -183,7 +182,9 @@ void parse_pre_process(pugi::xml_node& root,
             const char* data = weights->get_ptr<char>() + offset;
             per_channel_values[item.first] = ov::op::v0::Constant::create(input_type, mean_shape, data);
         }
+        OPENVINO_SUPPRESS_DEPRECATED_START
         auto const_node = get_constant_from_source(std::make_shared<ov::op::v0::Concat>(per_channel_values, 0));
+        OPENVINO_SUPPRESS_DEPRECATED_END
         OPENVINO_ASSERT(const_node);
         const auto& consumers = input_node->output(0).get_target_inputs();
         auto add = std::make_shared<ov::op::v1::Subtract>(input_node, const_node);
@@ -193,15 +194,13 @@ void parse_pre_process(pugi::xml_node& root,
     }
 }
 }  // namespace
-OPENVINO_SUPPRESS_DEPRECATED_END
 
 namespace ov {
 namespace frontend {
 namespace ir {
 
-OPENVINO_SUPPRESS_DEPRECATED_START
 class InputModel::InputModelIRImpl {
-    std::shared_ptr<ngraph::runtime::AlignedBuffer> m_weights;
+    std::shared_ptr<ov::AlignedBuffer> m_weights;
     std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr> m_extensions;
     std::unordered_map<std::string, ov::OpSet> m_opsets;
     pugi::xml_node m_root;
@@ -209,7 +208,7 @@ class InputModel::InputModelIRImpl {
 
 public:
     InputModelIRImpl(std::istream& stream,
-                     const std::shared_ptr<ngraph::runtime::AlignedBuffer>& weights,
+                     const std::shared_ptr<ov::AlignedBuffer>& weights,
                      const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions)
         : m_weights(weights),
           m_extensions(extensions) {
@@ -227,11 +226,10 @@ class InputModel::InputModelIRImpl {
 };
 
 InputModel::InputModel(std::istream& stream,
-                       const std::shared_ptr<ngraph::runtime::AlignedBuffer>& weights,
+                       const std::shared_ptr<ov::AlignedBuffer>& weights,
                        const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions) {
     _impl = std::make_shared<InputModelIRImpl>(stream, weights, extensions);
 }
-OPENVINO_SUPPRESS_DEPRECATED_END
 
 std::shared_ptr<ov::Model> InputModel::convert() {
     return _impl->convert();
diff --git a/src/frontends/ir/src/input_model.hpp b/src/frontends/ir/src/input_model.hpp
index 1b4da95f098b64..d5a9b64abaf0f8 100644
--- a/src/frontends/ir/src/input_model.hpp
+++ b/src/frontends/ir/src/input_model.hpp
@@ -7,9 +7,9 @@
 #include <istream>
 #include <memory>
 
-#include "ngraph/runtime/aligned_buffer.hpp"
 #include "openvino/frontend/manager.hpp"
 #include "openvino/frontend/visibility.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
 
 namespace ov {
 namespace frontend {
@@ -20,11 +20,9 @@ class InputModel : public ov::frontend::InputModel {
     std::shared_ptr<InputModelIRImpl> _impl;
 
 public:
-    OPENVINO_SUPPRESS_DEPRECATED_START
     InputModel(std::istream& stream,
-               const std::shared_ptr<ngraph::runtime::AlignedBuffer>& weights,
+               const std::shared_ptr<ov::AlignedBuffer>& weights,
                const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions);
-    OPENVINO_SUPPRESS_DEPRECATED_END
 
     std::shared_ptr<Model> convert();
 };
diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp
index 42be66281d5d24..d245301633e4e3 100644
--- a/src/frontends/ir/src/ir_deserializer.cpp
+++ b/src/frontends/ir/src/ir_deserializer.cpp
@@ -20,6 +20,8 @@
 #include "openvino/op/util/read_value_base.hpp"
 #include "openvino/op/util/sub_graph_base.hpp"
 #include "openvino/op/util/variable.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
+#include "openvino/runtime/shared_buffer.hpp"
 #include "rt_info_deserializer.hpp"
 #include "transformations/rt_info/attributes.hpp"
 #include "utils.hpp"
@@ -258,7 +260,6 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor<
 
     if (skip_names.count(name) && !getStrAttribute(m_node.child("data"), name, val))
         return;
-    OPENVINO_SUPPRESS_DEPRECATED_START
     if (auto a = ov::as_type<ov::AttributeAdapter<ov::element::Type>>(&adapter)) {
         static_cast<ov::element::Type&>(*a) = ov::element::Type(val);
     } else if (auto a = ov::as_type<ov::AttributeAdapter<PartialShape>>(&adapter)) {
@@ -322,7 +323,7 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor<
                 ov::op::util::VariableInfo{ov::PartialShape::dynamic(), ov::element::dynamic, variable_id});
         }
         a->set(m_variables[variable_id]);
-    } else if (auto a = ov::as_type<ov::AttributeAdapter<std::shared_ptr<ngraph::runtime::AlignedBuffer>>>(&adapter)) {
+    } else if (auto a = ov::as_type<ov::AttributeAdapter<std::shared_ptr<ov::AlignedBuffer>>>(&adapter)) {
         std::string value;
         pugi::xml_node dn = m_node.child("data");
         auto type = pugixml::utils::get_str_attr(m_node, "type");
@@ -331,7 +332,7 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor<
             OPENVINO_THROW("No attrtibutes defined for ", type, " op!");
 
         if (getStrAttribute(dn, name, value)) {
-            auto buffer = std::make_shared<ngraph::runtime::AlignedBuffer>(value.size());
+            auto buffer = std::make_shared<ov::AlignedBuffer>(value.size());
             auto data = static_cast<char*>(buffer->get_ptr());
             value.copy(data, value.size());
             a->set(buffer);
@@ -356,11 +357,7 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor<
                 OPENVINO_THROW("Attribute and shape size are inconsistent for ", type, " op!");
 
             char* data = m_weights->get_ptr<char>() + offset;
-            auto buffer =
-                std::make_shared<ngraph::runtime::SharedBuffer<std::shared_ptr<ngraph::runtime::AlignedBuffer>>>(
-                    data,
-                    size,
-                    m_weights);
+            auto buffer = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::AlignedBuffer>>>(data, size, m_weights);
             a->set(buffer);
         }
     } else if (auto a = ov::as_type<ov::AttributeAdapter<ov::op::util::FrameworkNodeAttrs>>(&adapter)) {
@@ -388,7 +385,6 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor<
     } else {
         OPENVINO_THROW("Error IR reading. Attribute adapter can not be found for ", name, " parameter");
     }
-    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor<std::shared_ptr<ov::Model>>& adapter) {
@@ -409,10 +405,8 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor<
     adapter.set(model);
 }
 
-OPENVINO_SUPPRESS_DEPRECATED_START
-std::shared_ptr<ov::Model> ov::XmlDeserializer::parse_function(
-    const pugi::xml_node& root,
-    const std::shared_ptr<ngraph::runtime::AlignedBuffer>& weights) {
+std::shared_ptr<ov::Model> ov::XmlDeserializer::parse_function(const pugi::xml_node& root,
+                                                               const std::shared_ptr<ov::AlignedBuffer>& weights) {
     // OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::V10Reader_RT, "V10Parser", "Parse");
 
     struct FunctionNodes {
@@ -553,7 +547,6 @@ std::shared_ptr<ov::Model> ov::XmlDeserializer::parse_function(
 
     return function;
 }
-OPENVINO_SUPPRESS_DEPRECATED_END
 
 class MetaDataParser : public ov::Meta {
 public:
@@ -751,12 +744,10 @@ static const std::string& translate_type_name(const std::string& name) {
     return name;
 }
 
-OPENVINO_SUPPRESS_DEPRECATED_START
-std::shared_ptr<ov::Node> ov::XmlDeserializer::create_node(
-    const std::vector<ov::Output<ov::Node>>& inputs,
-    const pugi::xml_node& node,
-    const std::shared_ptr<ngraph::runtime::AlignedBuffer>& weights,
-    const GenericLayerParams& params) {
+std::shared_ptr<ov::Node> ov::XmlDeserializer::create_node(const std::vector<ov::Output<ov::Node>>& inputs,
+                                                           const pugi::xml_node& node,
+                                                           const std::shared_ptr<ov::AlignedBuffer>& weights,
+                                                           const GenericLayerParams& params) {
     // Check that inputs are correctly defined
     for (size_t i = 0; i < inputs.size(); i++) {
         if (!inputs[i].get_node())
@@ -959,4 +950,3 @@ std::shared_ptr<ov::Node> ov::XmlDeserializer::create_node(
 
     return ovNode;
 }
-OPENVINO_SUPPRESS_DEPRECATED_END
diff --git a/src/frontends/ir/src/ir_deserializer.hpp b/src/frontends/ir/src/ir_deserializer.hpp
index f2062393f2986a..0b0d606ea4170b 100644
--- a/src/frontends/ir/src/ir_deserializer.hpp
+++ b/src/frontends/ir/src/ir_deserializer.hpp
@@ -10,11 +10,11 @@
 #include <pugixml.hpp>
 
 #include "input_model.hpp"
-#include "ngraph/runtime/aligned_buffer.hpp"
 #include "openvino/core/attribute_visitor.hpp"
 #include "openvino/core/op_extension.hpp"
 #include "openvino/op/loop.hpp"
 #include "openvino/op/util/sub_graph_base.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
 #include "utils.hpp"
 
 namespace ov {
@@ -58,9 +58,8 @@ struct GenericLayerParams {
 
 class XmlDeserializer : public ov::AttributeVisitor {
 public:
-    OPENVINO_SUPPRESS_DEPRECATED_START
     explicit XmlDeserializer(const pugi::xml_node& node,
-                             const std::shared_ptr<ngraph::runtime::AlignedBuffer>& weights,
+                             const std::shared_ptr<ov::AlignedBuffer>& weights,
                              const std::unordered_map<std::string, ov::OpSet>& opsets,
                              const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions,
                              std::unordered_map<std::string, std::shared_ptr<ov::op::util::Variable>>& variables,
@@ -71,7 +70,6 @@ class XmlDeserializer : public ov::AttributeVisitor {
           m_extensions(extensions),
           m_variables(variables),
           m_version(version) {}
-    OPENVINO_SUPPRESS_DEPRECATED_END
 
     void on_adapter(const std::string& name, ov::ValueAccessor<std::string>& value) override {
         std::string val;
@@ -164,14 +162,12 @@ class XmlDeserializer : public ov::AttributeVisitor {
     // TODO consider to call only once per layer/TI-Loop node
     IoMap updated_io_map(const pugi::xml_node& node, const pugi::xml_node& body_node);
 
-    OPENVINO_SUPPRESS_DEPRECATED_START
     /// \brief Traverses xml node representation in order to create ov function for it.
     /// \param node xml node representation
     /// \param weights weights attached to current node
     /// \return shared pointer to function representing input node
     std::shared_ptr<ov::Model> parse_function(const pugi::xml_node& root,
-                                              const std::shared_ptr<ngraph::runtime::AlignedBuffer>& weights);
-    OPENVINO_SUPPRESS_DEPRECATED_END
+                                              const std::shared_ptr<ov::AlignedBuffer>& weights);
     /// \brief Traverses xml node representation in order to get the purpose attribute of
     /// inputs/outputs in the body of Loop op. \param node xml node representation \return struct
     /// with value of purpuse attribute
@@ -179,12 +175,10 @@ class XmlDeserializer : public ov::AttributeVisitor {
 
     GenericLayerParams parse_generic_params(const pugi::xml_node& node);
 
-    OPENVINO_SUPPRESS_DEPRECATED_START
     std::shared_ptr<ov::Node> create_node(const ov::OutputVector& inputs,
                                           const pugi::xml_node& node,
-                                          const std::shared_ptr<ngraph::runtime::AlignedBuffer>& weights,
+                                          const std::shared_ptr<ov::AlignedBuffer>& weights,
                                           const GenericLayerParams& params);
-    OPENVINO_SUPPRESS_DEPRECATED_END
 
     void read_meta_data(const std::shared_ptr<ov::Model>& model, const pugi::xml_node& meta_section);
 
@@ -194,9 +188,7 @@ class XmlDeserializer : public ov::AttributeVisitor {
 
     // -- DATA --
     const pugi::xml_node m_node;
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    const std::shared_ptr<ngraph::runtime::AlignedBuffer>& m_weights;
-    OPENVINO_SUPPRESS_DEPRECATED_END
+    const std::shared_ptr<ov::AlignedBuffer>& m_weights;
     const std::unordered_map<std::string, ov::OpSet>& m_opsets;
     const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& m_extensions;
     std::unordered_map<std::string, std::shared_ptr<ov::op::util::Variable>>& m_variables;
diff --git a/src/frontends/onnx/frontend/src/core/tensor.hpp b/src/frontends/onnx/frontend/src/core/tensor.hpp
index cb54edf8e95e22..76a97b057f2a61 100644
--- a/src/frontends/onnx/frontend/src/core/tensor.hpp
+++ b/src/frontends/onnx/frontend/src/core/tensor.hpp
@@ -15,6 +15,7 @@
 #include "ngraph/shape.hpp"
 #include "ngraph/type/element_type.hpp"
 #include "onnx_common/utils.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
 #include "utils/common.hpp"
 #include "utils/tensor_external_data.hpp"
 
@@ -302,15 +303,13 @@ class Tensor {
     template <typename T>
     std::vector<T> get_external_data() const {
         const auto ext_data = detail::TensorExternalData(*m_tensor_proto);
-        OPENVINO_SUPPRESS_DEPRECATED_START
-        std::shared_ptr<ngraph::runtime::AlignedBuffer> buffer = nullptr;
+        std::shared_ptr<ov::AlignedBuffer> buffer = nullptr;
         if (m_mmap_cache) {
             buffer = ext_data.load_external_mmap_data(m_model_dir, m_mmap_cache);
         } else {
             buffer = ext_data.load_external_data(m_model_dir);
         }
         return std::vector<T>(buffer->get_ptr<char>(), buffer->get_ptr<char>() + buffer->size());
-        OPENVINO_SUPPRESS_DEPRECATED_END
     }
 
     const void* get_data_ptr() const {
diff --git a/src/frontends/onnx/frontend/src/op/blackmanwindow.cpp b/src/frontends/onnx/frontend/src/op/blackmanwindow.cpp
new file mode 100644
index 00000000000000..8ebca88b32f4cf
--- /dev/null
+++ b/src/frontends/onnx/frontend/src/op/blackmanwindow.cpp
@@ -0,0 +1,86 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "op/blackmanwindow.hpp"
+
+#include <memory>
+
+#include "default_opset.hpp"
+#include "utils/common.hpp"
+#define _USE_MATH_DEFINES
+#include <math.h>
+
+OPENVINO_SUPPRESS_DEPRECATED_START
+namespace ngraph {
+namespace onnx_import {
+namespace op {
+namespace set_1 {
+OutputVector blackmanwindow(const Node& node) {
+    const auto size = node.get_ng_inputs().at(0);
+    const auto output_datatype =
+        common::get_ngraph_element_type(node.get_attribute_value<int64_t>("output_datatype", 1));
+    const bool periodic = node.get_attribute_value<int64_t>("periodic", 1) == 1;
+
+    const ov::PartialShape shape = size.get_partial_shape();
+    const std::vector<size_t> axis_lengths = shape.to_shape();
+
+    // Weights as described in ONNX BlackmanWindow docs
+    // https://github.com/onnx/onnx/blob/main/docs/Operators.md#blackmanwindow
+    const auto float_size = std::make_shared<default_opset::Convert>(size, ov::element::f32);
+    const auto a_0 =
+        std::make_shared<default_opset::Constant>(ov::element::f32, ov::Shape(), std::vector<float>{0.42f});
+    const auto a_1 =
+        std::make_shared<default_opset::Constant>(ov::element::f32, ov::Shape(), std::vector<float>{-0.50f});
+    const auto a_2 =
+        std::make_shared<default_opset::Constant>(ov::element::f32, ov::Shape(), std::vector<float>{0.08f});
+
+    const auto start =
+        std::make_shared<default_opset::Constant>(ov::element::f32, ov::Shape(), std::vector<float>{0.0f});
+    const auto one_const =
+        std::make_shared<default_opset::Constant>(ov::element::f32, ov::Shape(), std::vector<float>{1.0f});
+    const auto two_const =
+        std::make_shared<default_opset::Constant>(ov::element::f32, ov::Shape(), std::vector<float>{2.0f});
+    const auto four_const =
+        std::make_shared<default_opset::Constant>(ov::element::f32, ov::Shape(), std::vector<float>{4.0f});
+    const auto range = std::make_shared<default_opset::Range>(start, size, one_const, ov::element::f32);
+    const auto pi =
+        default_opset::Constant::create(ov::element::f32, ov::Shape(), std::vector<float>{static_cast<float>(M_PI)});
+    std::shared_ptr<ov::Node> factor_1, factor_2;
+    if (periodic) {
+        factor_1 = std::make_shared<default_opset::Multiply>(
+            range,
+            std::make_shared<default_opset::Divide>(std::make_shared<default_opset::Multiply>(pi, two_const),
+                                                    float_size));
+        factor_2 = std::make_shared<default_opset::Multiply>(
+            range,
+            std::make_shared<default_opset::Divide>(std::make_shared<default_opset::Multiply>(pi, four_const),
+                                                    float_size));
+    } else {
+        factor_1 = std::make_shared<default_opset::Multiply>(
+            range,
+            std::make_shared<default_opset::Divide>(std::make_shared<default_opset::Multiply>(pi, two_const),
+                                                    std::make_shared<default_opset::Subtract>(float_size, one_const)));
+        factor_2 = std::make_shared<default_opset::Multiply>(
+            range,
+            std::make_shared<default_opset::Divide>(std::make_shared<default_opset::Multiply>(pi, four_const),
+                                                    std::make_shared<default_opset::Subtract>(float_size, one_const)));
+    }
+
+    const auto cos_1 = std::make_shared<default_opset::Cos>(factor_1);
+    const auto cos_2 = std::make_shared<default_opset::Cos>(factor_2);
+    const auto scaled_cos_1 = std::make_shared<default_opset::Multiply>(cos_1, a_1);
+    const auto scaled_cos_2 = std::make_shared<default_opset::Multiply>(cos_2, a_2);
+    const auto y_values =
+        std::make_shared<default_opset::Add>(std::make_shared<default_opset::Add>(a_0, scaled_cos_1), scaled_cos_2);
+
+    if (output_datatype == element::f32) {
+        return {y_values};
+    } else {
+        return {std::make_shared<default_opset::Convert>(y_values, output_datatype)};
+    }
+}
+}  // namespace set_1
+}  // namespace op
+}  // namespace onnx_import
+}  // namespace ngraph
+OPENVINO_SUPPRESS_DEPRECATED_END
\ No newline at end of file
diff --git a/src/frontends/onnx/frontend/src/op/blackmanwindow.hpp b/src/frontends/onnx/frontend/src/op/blackmanwindow.hpp
new file mode 100644
index 00000000000000..ccff09c84817af
--- /dev/null
+++ b/src/frontends/onnx/frontend/src/op/blackmanwindow.hpp
@@ -0,0 +1,23 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "openvino/core/deprecated.hpp"
+OPENVINO_SUPPRESS_DEPRECATED_START
+
+#include "ngraph/node.hpp"
+#include "onnx_import/core/node.hpp"
+
+namespace ngraph {
+namespace onnx_import {
+namespace op {
+namespace set_1 {
+
+OutputVector blackmanwindow(const Node& node);
+
+}  // namespace set_1
+}  // namespace op
+}  // namespace onnx_import
+}  // namespace ngraph
+OPENVINO_SUPPRESS_DEPRECATED_END
\ No newline at end of file
diff --git a/src/frontends/onnx/frontend/src/op/hammingwindow.cpp b/src/frontends/onnx/frontend/src/op/hammingwindow.cpp
new file mode 100644
index 00000000000000..25d557f7de6bdc
--- /dev/null
+++ b/src/frontends/onnx/frontend/src/op/hammingwindow.cpp
@@ -0,0 +1,72 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "op/hammingwindow.hpp"
+
+#include <memory>
+
+#include "default_opset.hpp"
+#include "utils/common.hpp"
+#define _USE_MATH_DEFINES
+#include <math.h>
+
+OPENVINO_SUPPRESS_DEPRECATED_START
+namespace ngraph {
+namespace onnx_import {
+namespace op {
+namespace set_1 {
+OutputVector hammingwindow(const Node& node) {
+    const auto size = node.get_ng_inputs().at(0);
+    const auto output_datatype =
+        common::get_ngraph_element_type(node.get_attribute_value<int64_t>("output_datatype", 1));
+    const bool periodic = node.get_attribute_value<int64_t>("periodic", 1) == 1;
+
+    const ov::PartialShape shape = size.get_partial_shape();
+    const std::vector<size_t> axis_lengths = shape.to_shape();
+
+    // Weights as described in ONNX HammingWindow docs
+    // https://github.com/onnx/onnx/blob/main/docs/Operators.md#hammingwindow
+    const auto float_size = std::make_shared<default_opset::Convert>(size, ov::element::f32);
+    const auto a_0 = std::make_shared<default_opset::Divide>(
+        std::make_shared<default_opset::Constant>(ov::element::f32, ov::Shape(), std::vector<float>{25.0f}),
+        std::make_shared<default_opset::Constant>(ov::element::f32, ov::Shape(), std::vector<float>{46.0f}));
+    const auto a_1 = std::make_shared<default_opset::Subtract>(
+        std::make_shared<default_opset::Constant>(ov::element::f32, ov::Shape(), std::vector<float>{1.0f}),
+        a_0);
+
+    const auto start =
+        std::make_shared<default_opset::Constant>(ov::element::f32, ov::Shape(), std::vector<float>{0.0f});
+    const auto one_const =
+        std::make_shared<default_opset::Constant>(ov::element::f32, ov::Shape(), std::vector<float>{1.0f});
+    const auto two_const =
+        std::make_shared<default_opset::Constant>(ov::element::f32, ov::Shape(), std::vector<float>{2.0f});
+    const auto range = std::make_shared<default_opset::Range>(start, size, one_const, ov::element::f32);
+    const auto pi =
+        default_opset::Constant::create(ov::element::f32, ov::Shape(), std::vector<float>{static_cast<float>(M_PI)});
+    std::shared_ptr<ov::Node> factor;
+    if (periodic) {
+        factor = std::make_shared<default_opset::Multiply>(
+            range,
+            std::make_shared<default_opset::Divide>(std::make_shared<default_opset::Multiply>(pi, two_const),
+                                                    float_size));
+    } else {
+        factor = std::make_shared<default_opset::Multiply>(
+            range,
+            std::make_shared<default_opset::Divide>(std::make_shared<default_opset::Multiply>(pi, two_const),
+                                                    std::make_shared<default_opset::Subtract>(float_size, one_const)));
+    }
+
+    const auto cos = std::make_shared<default_opset::Cos>(factor);
+    const auto scaled_cos = std::make_shared<default_opset::Multiply>(cos, a_1);
+    const auto y_values = std::make_shared<default_opset::Subtract>(a_0, scaled_cos);
+    if (output_datatype == element::f32) {
+        return {y_values};
+    } else {
+        return {std::make_shared<default_opset::Convert>(y_values, output_datatype)};
+    }
+}
+}  // namespace set_1
+}  // namespace op
+}  // namespace onnx_import
+}  // namespace ngraph
+OPENVINO_SUPPRESS_DEPRECATED_END
\ No newline at end of file
diff --git a/src/frontends/onnx/frontend/src/op/hammingwindow.hpp b/src/frontends/onnx/frontend/src/op/hammingwindow.hpp
new file mode 100644
index 00000000000000..d088b4105abc3a
--- /dev/null
+++ b/src/frontends/onnx/frontend/src/op/hammingwindow.hpp
@@ -0,0 +1,23 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "openvino/core/deprecated.hpp"
+OPENVINO_SUPPRESS_DEPRECATED_START
+
+#include "ngraph/node.hpp"
+#include "onnx_import/core/node.hpp"
+
+namespace ngraph {
+namespace onnx_import {
+namespace op {
+namespace set_1 {
+
+OutputVector hammingwindow(const Node& node);
+
+}  // namespace set_1
+}  // namespace op
+}  // namespace onnx_import
+}  // namespace ngraph
+OPENVINO_SUPPRESS_DEPRECATED_END
\ No newline at end of file
diff --git a/src/frontends/onnx/frontend/src/op/hannwindow.cpp b/src/frontends/onnx/frontend/src/op/hannwindow.cpp
new file mode 100644
index 00000000000000..b0e28afd2e5570
--- /dev/null
+++ b/src/frontends/onnx/frontend/src/op/hannwindow.cpp
@@ -0,0 +1,68 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "op/hannwindow.hpp"
+
+#include <memory>
+
+#include "default_opset.hpp"
+#include "utils/common.hpp"
+#define _USE_MATH_DEFINES
+#include <math.h>
+
+OPENVINO_SUPPRESS_DEPRECATED_START
+namespace ngraph {
+namespace onnx_import {
+namespace op {
+namespace set_1 {
+OutputVector hannwindow(const Node& node) {
+    const auto size = node.get_ng_inputs().at(0);
+    const auto output_datatype =
+        common::get_ngraph_element_type(node.get_attribute_value<int64_t>("output_datatype", 1));
+    const bool periodic = node.get_attribute_value<int64_t>("periodic", 1) == 1;
+
+    const ov::PartialShape shape = size.get_partial_shape();
+    const std::vector<size_t> axis_lengths = shape.to_shape();
+
+    // Weights as described in ONNX HannWindow docs
+    // https://github.com/onnx/onnx/blob/main/docs/Operators.md#hannwindow
+    const auto float_size = std::make_shared<default_opset::Convert>(size, ov::element::f32);
+    const auto a_0 = std::make_shared<default_opset::Constant>(ov::element::f32, ov::Shape(), std::vector<float>{0.5f});
+    const auto a_1 = a_0;
+
+    const auto start =
+        std::make_shared<default_opset::Constant>(ov::element::f32, ov::Shape(), std::vector<float>{0.0f});
+    const auto one_const =
+        std::make_shared<default_opset::Constant>(ov::element::f32, ov::Shape(), std::vector<float>{1.0f});
+    const auto two_const =
+        std::make_shared<default_opset::Constant>(ov::element::f32, ov::Shape(), std::vector<float>{2.0f});
+    const auto range = std::make_shared<default_opset::Range>(start, size, one_const, ov::element::f32);
+    const auto pi =
+        default_opset::Constant::create(ov::element::f32, ov::Shape(), std::vector<float>{static_cast<float>(M_PI)});
+    std::shared_ptr<ov::Node> factor;
+    if (periodic) {
+        factor = std::make_shared<default_opset::Multiply>(
+            range,
+            std::make_shared<default_opset::Divide>(std::make_shared<default_opset::Multiply>(pi, two_const),
+                                                    float_size));
+    } else {
+        factor = std::make_shared<default_opset::Multiply>(
+            range,
+            std::make_shared<default_opset::Divide>(std::make_shared<default_opset::Multiply>(pi, two_const),
+                                                    std::make_shared<default_opset::Subtract>(float_size, one_const)));
+    }
+
+    const auto cos = std::make_shared<default_opset::Cos>(factor);
+    const auto scaled_cos = std::make_shared<default_opset::Multiply>(cos, a_1);
+    const auto y_values = std::make_shared<default_opset::Subtract>(a_0, scaled_cos);
+    if (output_datatype == element::f32) {
+        return {y_values};
+    } else {
+        return {std::make_shared<default_opset::Convert>(y_values, output_datatype)};
+    }
+}
+}  // namespace set_1
+}  // namespace op
+}  // namespace onnx_import
+}  // namespace ngraph
+OPENVINO_SUPPRESS_DEPRECATED_END
\ No newline at end of file
diff --git a/src/frontends/onnx/frontend/src/op/hannwindow.hpp b/src/frontends/onnx/frontend/src/op/hannwindow.hpp
new file mode 100644
index 00000000000000..0c9e6993048ef3
--- /dev/null
+++ b/src/frontends/onnx/frontend/src/op/hannwindow.hpp
@@ -0,0 +1,23 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "openvino/core/deprecated.hpp"
+OPENVINO_SUPPRESS_DEPRECATED_START
+
+#include "ngraph/node.hpp"
+#include "onnx_import/core/node.hpp"
+
+namespace ngraph {
+namespace onnx_import {
+namespace op {
+namespace set_1 {
+
+OutputVector hannwindow(const Node& node);
+
+}  // namespace set_1
+}  // namespace op
+}  // namespace onnx_import
+}  // namespace ngraph
+OPENVINO_SUPPRESS_DEPRECATED_END
\ No newline at end of file
diff --git a/src/frontends/onnx/frontend/src/ops_bridge.cpp b/src/frontends/onnx/frontend/src/ops_bridge.cpp
index e6707335afd0b8..c4d9a50c4ca637 100644
--- a/src/frontends/onnx/frontend/src/ops_bridge.cpp
+++ b/src/frontends/onnx/frontend/src/ops_bridge.cpp
@@ -29,6 +29,7 @@
 #include "op/average_pool.hpp"
 #include "op/batch_norm.hpp"
 #include "op/bitshift.hpp"
+#include "op/blackmanwindow.hpp"
 #include "op/cast.hpp"
 #include "op/cast_like.hpp"
 #include "op/ceil.hpp"
@@ -75,6 +76,8 @@
 #include "op/greater.hpp"
 #include "op/grid_sample.hpp"
 #include "op/gru.hpp"
+#include "op/hammingwindow.hpp"
+#include "op/hannwindow.hpp"
 #include "op/hard_sigmoid.hpp"
 #include "op/hard_swish.hpp"
 #include "op/hardmax.hpp"
@@ -345,6 +348,7 @@ OperatorsBridge::OperatorsBridge() {
     REGISTER_OPERATOR("BatchNormalization", 1, batch_norm);
     REGISTER_OPERATOR("BatchNormalization", 7, batch_norm);
     REGISTER_OPERATOR("BitShift", 1, bitshift);
+    REGISTER_OPERATOR("BlackmanWindow", 1, blackmanwindow);
     REGISTER_OPERATOR("Cast", 1, cast);
     REGISTER_OPERATOR("CastLike", 1, cast_like);
     REGISTER_OPERATOR("Ceil", 1, ceil);
@@ -392,6 +396,8 @@ OperatorsBridge::OperatorsBridge() {
     REGISTER_OPERATOR("Greater", 1, greater);
     REGISTER_OPERATOR("GridSample", 1, grid_sample);
     REGISTER_OPERATOR("GRU", 1, gru);
+    REGISTER_OPERATOR("HannWindow", 1, hannwindow);
+    REGISTER_OPERATOR("HammingWindow", 1, hammingwindow);
     REGISTER_OPERATOR("Hardmax", 1, hardmax);
     REGISTER_OPERATOR("Hardmax", 13, hardmax);
     REGISTER_OPERATOR("HardSigmoid", 1, hard_sigmoid);
diff --git a/src/frontends/onnx/frontend/src/utils/tensor_external_data.cpp b/src/frontends/onnx/frontend/src/utils/tensor_external_data.cpp
index 53e83e5d714101..9a40d1fc6d7595 100644
--- a/src/frontends/onnx/frontend/src/utils/tensor_external_data.cpp
+++ b/src/frontends/onnx/frontend/src/utils/tensor_external_data.cpp
@@ -12,7 +12,6 @@
 #include "openvino/util/file_util.hpp"
 #include "openvino/util/log.hpp"
 
-OPENVINO_SUPPRESS_DEPRECATED_START
 namespace ngraph {
 namespace onnx_import {
 namespace detail {
@@ -51,13 +50,13 @@ Buffer<ov::MappedMemory> TensorExternalData::load_external_mmap_data(const std::
     if (m_data_length > mapped_memory->size() || mapped_memory->size() == 0) {
         throw error::invalid_external_data{*this};
     }
-    return std::make_shared<ngraph::runtime::SharedBuffer<std::shared_ptr<ov::MappedMemory>>>(
+    return std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::MappedMemory>>>(
         mapped_memory->data() + m_offset,
         m_data_length > 0 ? m_data_length : static_cast<uint64_t>(file_size) - m_offset,
         mapped_memory);
 }
 
-Buffer<ngraph::runtime::AlignedBuffer> TensorExternalData::load_external_data(const std::string& model_dir) const {
+Buffer<ov::AlignedBuffer> TensorExternalData::load_external_data(const std::string& model_dir) const {
     auto full_path = ov::util::path_join({model_dir, m_data_location});
 #if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
     NGRAPH_SUPPRESS_DEPRECATED_START
@@ -82,14 +81,13 @@ Buffer<ngraph::runtime::AlignedBuffer> TensorExternalData::load_external_data(co
     // default value of m_offset is 0
     external_data_stream.seekg(m_offset, std::ios::beg);
 
-    auto read_data = std::make_shared<ngraph::runtime::AlignedBuffer>(read_data_length);
+    auto read_data = std::make_shared<ov::AlignedBuffer>(read_data_length);
     external_data_stream.read(read_data->get_ptr<char>(), read_data_length);
     external_data_stream.close();
 
-    auto buffer = std::make_shared<ngraph::runtime::SharedBuffer<std::shared_ptr<ngraph::runtime::AlignedBuffer>>>(
-        read_data->get_ptr<char>(),
-        read_data->size(),
-        read_data);
+    auto buffer = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::AlignedBuffer>>>(read_data->get_ptr<char>(),
+                                                                                         read_data->size(),
+                                                                                         read_data);
 
     return buffer;
 }
diff --git a/src/frontends/onnx/frontend/src/utils/tensor_external_data.hpp b/src/frontends/onnx/frontend/src/utils/tensor_external_data.hpp
index a13ccd457f485c..eb04e001e7ed4c 100644
--- a/src/frontends/onnx/frontend/src/utils/tensor_external_data.hpp
+++ b/src/frontends/onnx/frontend/src/utils/tensor_external_data.hpp
@@ -6,15 +6,15 @@
 
 #include <onnx/onnx_pb.h>
 
-#include "ngraph/runtime/shared_buffer.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
+#include "openvino/runtime/shared_buffer.hpp"
 #include "openvino/util/mmap_object.hpp"
 
 namespace ngraph {
 namespace onnx_import {
 namespace detail {
-OPENVINO_SUPPRESS_DEPRECATED_START
 template <class T>
-using Buffer = std::shared_ptr<ngraph::runtime::SharedBuffer<std::shared_ptr<T>>>;
+using Buffer = std::shared_ptr<ov::SharedBuffer<std::shared_ptr<T>>>;
 using MappedMemoryHandles = std::shared_ptr<std::map<std::string, std::shared_ptr<ov::MappedMemory>>>;
 /// \brief  Helper class used to load tensor data from external files
 class TensorExternalData {
@@ -28,7 +28,7 @@ class TensorExternalData {
     ///             the invalid_external_data exception is thrown.
     ///
     /// \return     External binary data loaded into the SharedBuffer
-    Buffer<ngraph::runtime::AlignedBuffer> load_external_data(const std::string& model_dir) const;
+    Buffer<ov::AlignedBuffer> load_external_data(const std::string& model_dir) const;
 
     /// \brief      Map (mmap for lin, MapViewOfFile for win) external data from tensor passed to constructor
     ///
@@ -50,7 +50,6 @@ class TensorExternalData {
     uint64_t m_data_length = 0;
     std::string m_sha1_digest{};
 };
-OPENVINO_SUPPRESS_DEPRECATED_END
 }  // namespace detail
 }  // namespace onnx_import
 }  // namespace ngraph
diff --git a/src/frontends/onnx/tests/__init__.py b/src/frontends/onnx/tests/__init__.py
index 857c3853cf8fd2..87220792d2d349 100644
--- a/src/frontends/onnx/tests/__init__.py
+++ b/src/frontends/onnx/tests/__init__.py
@@ -127,6 +127,7 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True):
                                       "Not equal to tolerance")
 xfail_issue_58033 = xfail_test(reason="Einsum operation misses support for complex ellipsis equations")
 xfail_issue_58676 = xfail_test(reason="AssertionError: Not equal to tolerance rtol=0.001, atol=1e-07")
+skip_issue_58676 = pytest.mark.skip(reason="AssertionError: Not equal to tolerance rtol=0.001, atol=1e-07")
 xfail_issue_onnx_models_140 = xfail_test(reason="https://github.com/onnx/models/issues/140")
 
 xfail_issue_63033 = xfail_test(reason="BatchNormalization: Training mode is not supported")
diff --git a/src/frontends/onnx/tests/models/blackmanwindow_periodic.prototxt b/src/frontends/onnx/tests/models/blackmanwindow_periodic.prototxt
new file mode 100644
index 00000000000000..f8759ce921028a
--- /dev/null
+++ b/src/frontends/onnx/tests/models/blackmanwindow_periodic.prototxt
@@ -0,0 +1,46 @@
+ir_version: 7
+producer_name: "nGraph ONNX Importer"
+graph {
+  node {
+    input: "size"
+    output: "y"
+    op_type: "BlackmanWindow"
+    attribute {
+      name: "output_datatype"
+      i: 1  # Use 1 for f32
+      type: INT
+    }
+    attribute {
+      name: "periodic"
+      i: 1  # Set to 1 for periodic, 0 for non-periodic
+      type: INT
+    }
+  }
+  name: "test_blackmanwindow_periodic"
+  input {
+    name: "size"
+    type {
+      tensor_type {
+        elem_type: 7  # INT64
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 1  # FLOAT
+        shape {
+          dim {
+            dim_value: 10  # Modify this based on your expected output shape
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/models/blackmanwindow_symmetric.prototxt b/src/frontends/onnx/tests/models/blackmanwindow_symmetric.prototxt
new file mode 100644
index 00000000000000..1d60e783ead99a
--- /dev/null
+++ b/src/frontends/onnx/tests/models/blackmanwindow_symmetric.prototxt
@@ -0,0 +1,46 @@
+ir_version: 7
+producer_name: "nGraph ONNX Importer"
+graph {
+  node {
+    input: "size"
+    output: "y"
+    op_type: "BlackmanWindow"
+    attribute {
+      name: "output_datatype"
+      i: 1  # Use 1 for f32
+      type: INT
+    }
+    attribute {
+      name: "periodic"
+      i: 0  # Set to 1 for periodic, 0 for non-periodic
+      type: INT
+    }
+  }
+  name: "test_blackmanwindow_symmetric"
+  input {
+    name: "size"
+    type {
+      tensor_type {
+        elem_type: 7  # INT64
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 1  # FLOAT
+        shape {
+          dim {
+            dim_value: 10  # Modify this based on your expected output shape
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/models/hammingwindow_periodic.prototxt b/src/frontends/onnx/tests/models/hammingwindow_periodic.prototxt
new file mode 100644
index 00000000000000..2bf75ed29fe7f6
--- /dev/null
+++ b/src/frontends/onnx/tests/models/hammingwindow_periodic.prototxt
@@ -0,0 +1,46 @@
+ir_version: 7
+producer_name: "nGraph ONNX Importer"
+graph {
+  node {
+    input: "size"
+    output: "y"
+    op_type: "HammingWindow"
+    attribute {
+      name: "output_datatype"
+      i: 1  # Use 1 for f32
+      type: INT
+    }
+    attribute {
+      name: "periodic"
+      i: 1  # Set to 1 for periodic, 0 for non-periodic
+      type: INT
+    }
+  }
+  name: "test_hammingwindow_periodic"
+  input {
+    name: "size"
+    type {
+      tensor_type {
+        elem_type: 7  # INT64
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 1  # FLOAT
+        shape {
+          dim {
+            dim_value: 10  # Modify this based on your expected output shape
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/models/hammingwindow_symmetric.prototxt b/src/frontends/onnx/tests/models/hammingwindow_symmetric.prototxt
new file mode 100644
index 00000000000000..1c9a9019829383
--- /dev/null
+++ b/src/frontends/onnx/tests/models/hammingwindow_symmetric.prototxt
@@ -0,0 +1,46 @@
+ir_version: 7
+producer_name: "nGraph ONNX Importer"
+graph {
+  node {
+    input: "size"
+    output: "y"
+    op_type: "HammingWindow"
+    attribute {
+      name: "output_datatype"
+      i: 1  # Use 1 for f32
+      type: INT
+    }
+    attribute {
+      name: "periodic"
+      i: 0  # Set to 0 for symmetric, 1 for periodic
+      type: INT
+    }
+  }
+  name: "test_hammingwindow_symmetric"
+  input {
+    name: "size"
+    type {
+      tensor_type {
+        elem_type: 7  # INT64
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 1  # FLOAT
+        shape {
+          dim {
+            dim_value: 10  # Modify this based on your expected output shape
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/models/hannwindow_periodic.prototxt b/src/frontends/onnx/tests/models/hannwindow_periodic.prototxt
new file mode 100644
index 00000000000000..2895bf5ad9b4d9
--- /dev/null
+++ b/src/frontends/onnx/tests/models/hannwindow_periodic.prototxt
@@ -0,0 +1,46 @@
+ir_version: 7
+producer_name: "nGraph ONNX Importer"
+graph {
+  node {
+    input: "size"
+    output: "y"
+    op_type: "HannWindow"
+    attribute {
+      name: "output_datatype"
+      i: 1  # Use 1 for f32
+      type: INT
+    }
+    attribute {
+      name: "periodic"
+      i: 1  # Set to 1 for periodic, 0 for non-periodic
+      type: INT
+    }
+  }
+  name: "test_hannwindow_periodic"
+  input {
+    name: "size"
+    type {
+      tensor_type {
+        elem_type: 7  # INT64
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 1  # FLOAT
+        shape {
+          dim {
+            dim_value: 10  # Modify this based on your expected output shape
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/models/hannwindow_symmetric.prototxt b/src/frontends/onnx/tests/models/hannwindow_symmetric.prototxt
new file mode 100644
index 00000000000000..ec2bc2b8e42bef
--- /dev/null
+++ b/src/frontends/onnx/tests/models/hannwindow_symmetric.prototxt
@@ -0,0 +1,46 @@
+ir_version: 7
+producer_name: "nGraph ONNX Importer"
+graph {
+  node {
+    input: "size"
+    output: "y"
+    op_type: "HannWindow"
+    attribute {
+      name: "output_datatype"
+      i: 1  # Use 1 for f32
+      type: INT
+    }
+    attribute {
+      name: "periodic"
+      i: 0  # Set to 0 for symmetric, 1 for periodic
+      type: INT
+    }
+  }
+  name: "test_hannwindow_symmetric"
+  input {
+    name: "size"
+    type {
+      tensor_type {
+        elem_type: 7  # INT64
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 1  # FLOAT
+        shape {
+          dim {
+            dim_value: 10  # Modify this based on your expected output shape
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/onnx_import.in.cpp b/src/frontends/onnx/tests/onnx_import.in.cpp
index a442160ed2379c..361805e45cf0d4 100644
--- a/src/frontends/onnx/tests/onnx_import.in.cpp
+++ b/src/frontends/onnx/tests/onnx_import.in.cpp
@@ -6716,3 +6716,171 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_unique_3d_with_duplicates_and_axis_2)
 
     test_case.run();
 }
+
+OPENVINO_TEST(${BACKEND_NAME}, onnx_model_blackmanwindow_periodic) {
+    auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(),
+                                                                        SERIALIZED_ZOO,
+                                                                        "onnx/blackmanwindow_periodic.onnx"));
+
+    auto test_case = ov::test::TestCase(function, s_device);
+
+    test_case.add_input<int64_t>({10});
+    test_case.add_expected_output<float>(Shape{10},
+                                         {-0.000000014901161f,
+                                          0.040212844f,
+                                          0.20077012f,
+                                          0.50978714f,
+                                          0.8492299f,
+                                          0.99999994f,
+                                          0.84922975f,
+                                          0.5097869f,
+                                          0.20077008f,
+                                          0.040212862f});
+
+    // GPU has an accuracy drop, need to use different tolerance
+    if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) {
+        test_case.run_with_tolerance_as_fp();
+    } else {
+        test_case.run_with_tolerance_as_fp(0.01f);
+    }
+}
+
+OPENVINO_TEST(${BACKEND_NAME}, onnx_model_blackmanwindow_symmetric) {
+    auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(),
+                                                                        SERIALIZED_ZOO,
+                                                                        "onnx/blackmanwindow_symmetric.onnx"));
+
+    auto test_case = ov::test::TestCase(function, s_device);
+
+    test_case.add_input<int64_t>({10});
+    test_case.add_expected_output<float>(Shape{10},
+                                         {-0.00000001f,
+                                          0.05086961f,
+                                          0.25800052f,
+                                          0.63000000f,
+                                          0.95112991f,
+                                          0.95112979f,
+                                          0.62999994f,
+                                          0.25800028f,
+                                          0.05086958f,
+                                          -0.00000001f});
+
+    // GPU has an accuracy drop, need to use different tolerance
+    if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) {
+        test_case.run_with_tolerance_as_fp();
+    } else {
+        test_case.run_with_tolerance_as_fp(0.01f);
+    }
+}
+
+OPENVINO_TEST(${BACKEND_NAME}, onnx_model_hammingwindow_periodic) {
+    auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(),
+                                                                        SERIALIZED_ZOO,
+                                                                        "onnx/hammingwindow_periodic.onnx"));
+
+    auto test_case = ov::test::TestCase(function, s_device);
+
+    test_case.add_input<int64_t>({10});
+    test_case.add_expected_output<float>(Shape{10},
+                                         {0.08695650f,
+                                          0.17414439f,
+                                          0.40240526f,
+                                          0.68455124f,
+                                          0.91281211f,
+                                          1.00000000f,
+                                          0.91281211f,
+                                          0.68455112f,
+                                          0.40240520f,
+                                          0.17414442f});
+
+    // GPU has an accuracy drop, need to use different tolerance
+    if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) {
+        test_case.run_with_tolerance_as_fp();
+    } else {
+        test_case.run_with_tolerance_as_fp(0.01f);
+    }
+}
+
+OPENVINO_TEST(${BACKEND_NAME}, onnx_model_hammingwindow_symmetric) {
+    auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(),
+                                                                        SERIALIZED_ZOO,
+                                                                        "onnx/hammingwindow_symmetric.onnx"));
+
+    auto test_case = ov::test::TestCase(function, s_device);
+
+    test_case.add_input<int64_t>({10});
+    test_case.add_expected_output<float>(Shape{10},
+                                         {0.08695650f,
+                                          0.19376230f,
+                                          0.46420413f,
+                                          0.77173913f,
+                                          0.97246838f,
+                                          0.97246838f,
+                                          0.77173907f,
+                                          0.46420389f,
+                                          0.19376221f,
+                                          0.08695650f});
+
+    // GPU has an accuracy drop, need to use different tolerance
+    if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) {
+        test_case.run_with_tolerance_as_fp();
+    } else {
+        test_case.run_with_tolerance_as_fp(0.01f);
+    }
+}
+
+OPENVINO_TEST(${BACKEND_NAME}, onnx_model_hannwindow_periodic) {
+    auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(),
+                                                                        SERIALIZED_ZOO,
+                                                                        "onnx/hannwindow_periodic.onnx"));
+
+    auto test_case = ov::test::TestCase(function, s_device);
+
+    test_case.add_input<int64_t>({10});
+    test_case.add_expected_output<float>(Shape{10},
+                                         {0.00000000f,
+                                          0.09549150f,
+                                          0.34549153f,
+                                          0.65450853f,
+                                          0.90450847f,
+                                          1.00000000f,
+                                          0.90450847f,
+                                          0.65450835f,
+                                          0.34549144f,
+                                          0.09549153f});
+
+    // GPU has an accuracy drop, need to use different tolerance
+    if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) {
+        test_case.run_with_tolerance_as_fp();
+    } else {
+        test_case.run_with_tolerance_as_fp(0.01f);
+    }
+}
+
+OPENVINO_TEST(${BACKEND_NAME}, onnx_model_hannwindow_symmetric) {
+    auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(),
+                                                                        SERIALIZED_ZOO,
+                                                                        "onnx/hannwindow_symmetric.onnx"));
+
+    auto test_case = ov::test::TestCase(function, s_device);
+
+    test_case.add_input<int64_t>({10});
+    test_case.add_expected_output<float>(Shape{10},
+                                         {0.00000000f,
+                                          0.11697778f,
+                                          0.41317594f,
+                                          0.75000000f,
+                                          0.96984637f,
+                                          0.96984625f,
+                                          0.74999994f,
+                                          0.41317570f,
+                                          0.11697769f,
+                                          0.00000000f});
+
+    // GPU has an accuracy drop, need to use different tolerance
+    if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) {
+        test_case.run_with_tolerance_as_fp();
+    } else {
+        test_case.run_with_tolerance_as_fp(0.01f);
+    }
+}
diff --git a/src/frontends/onnx/tests/skip_tests_config.cpp b/src/frontends/onnx/tests/skip_tests_config.cpp
index 234cb99dfe9257..99d6bc297a1110 100644
--- a/src/frontends/onnx/tests/skip_tests_config.cpp
+++ b/src/frontends/onnx/tests/skip_tests_config.cpp
@@ -9,9 +9,11 @@
 
 std::vector<std::string> disabledTestPatterns() {
     return {
-#ifndef BUILD_SHARED_LIBS
+#ifdef OPENVINO_STATIC_LIBRARY
         // Disable tests for static libraries
-        ".*FrontendLibCloseTest.*"
+        ".*FrontendLibCloseTest.*",
 #endif
+        // CVS-123201
+        ".*testUnloadLibBeforeDeletingDependentObject.*",
     };
 }
diff --git a/src/frontends/onnx/tests/tests_python/test_backend.py b/src/frontends/onnx/tests/tests_python/test_backend.py
index d1ef686bdd4124..779444658d1e28 100644
--- a/src/frontends/onnx/tests/tests_python/test_backend.py
+++ b/src/frontends/onnx/tests/tests_python/test_backend.py
@@ -2,6 +2,7 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
 import logging
 
 import onnx.backend.test
@@ -24,6 +25,7 @@
     xfail_issue_38735,
     skip_issue_39658,
     skip_issue_39658,
+    skip_issue_58676,
     xfail_issue_44858,
     xfail_issue_44965,
     xfail_issue_45180,
@@ -376,12 +378,6 @@ def expect_fail(test_case_path, xfail):  # type: (str) -> None
     ),
     (
         xfail_issue_90649,
-        "OnnxBackendNodeModelTest.test_blackmanwindow_cpu",
-        "OnnxBackendNodeModelTest.test_blackmanwindow_symmetric_cpu",
-        "OnnxBackendNodeModelTest.test_hammingwindow_cpu",
-        "OnnxBackendNodeModelTest.test_hammingwindow_symmetric_cpu",
-        "OnnxBackendNodeModelTest.test_hannwindow_cpu",
-        "OnnxBackendNodeModelTest.test_hannwindow_symmetric_cpu",
         "OnnxBackendNodeModelTest.test_melweightmatrix_cpu",
         "OnnxBackendNodeModelTest.test_sequence_map_add_1_sequence_1_tensor_cpu",
         "OnnxBackendNodeModelTest.test_sequence_map_add_2_sequences_cpu",
@@ -683,6 +679,22 @@ def expect_fail(test_case_path, xfail):  # type: (str) -> None
     ),
 ]
 
+if platform.system() == 'Darwin':
+    tests_expected_to_fail.extend([
+        (
+            skip_issue_58676,
+            "OnnxBackendNodeModelTest.test_mish_expanded_cpu"
+        ),
+        (
+            skip_issue_58676,
+            "OnnxBackendNodeModelTest.test_resize_downsample_scales_linear_cpu"
+        ),
+        (
+            skip_issue_58676,
+            "OnnxBackendNodeModelTest.test_div_uint8_cpu"
+        )]
+    )
+
 for test_group in tests_expected_to_fail:
     for test_case in test_group[1:]:
         expect_fail(f"{test_case}", test_group[0])
diff --git a/src/frontends/paddle/tests/skip_tests_config.cpp b/src/frontends/paddle/tests/skip_tests_config.cpp
index 234cb99dfe9257..144e9d001ae276 100644
--- a/src/frontends/paddle/tests/skip_tests_config.cpp
+++ b/src/frontends/paddle/tests/skip_tests_config.cpp
@@ -9,7 +9,7 @@
 
 std::vector<std::string> disabledTestPatterns() {
     return {
-#ifndef BUILD_SHARED_LIBS
+#ifdef OPENVINO_STATIC_LIBRARY
         // Disable tests for static libraries
         ".*FrontendLibCloseTest.*"
 #endif
diff --git a/src/frontends/pytorch/README.md b/src/frontends/pytorch/README.md
new file mode 100644
index 00000000000000..92a38d693d9b21
--- /dev/null
+++ b/src/frontends/pytorch/README.md
@@ -0,0 +1,141 @@
+# OpenVINO PyTorch Frontend
+
+The PyTorch Frontend (PT FE) is a C++ based OpenVINO Frontend component that is
+responsible for reading and converting a PyTorch model to an `ov::Model` object
+that can be further serialized into the Intermediate Representation (IR) format.
+
+## Key Contacts
+
+People from the [openvino-pytorch-frontend-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-pytorch-frontend-maintainers)
+have the rights to approve and merge PRs to the PyTorch Frontend component.
+They can assist with any questions about the component.
+
+## Components
+
+The structure of OpenVINO PyTorch Frontend sources includes the following
+directories:
+
+* [include](./include) is a public frontend API.
+* [src](./src/) folder contains the sources of the component.
+
+## Architecture
+
+OpenVINO PyTorch Frontend is a C++ component that uses [TorchScriptPythonDecoder](../../bindings/python/src/openvino/frontend/pytorch/ts_decoder.py)
+in Python code to parse a PyTorch model from a Python object. Usually, the frontend is
+used inside [openvino.convert_model](../../../tools/ovc) in Python code or inside
+openvino backend in `torch.compile_model`, in which case `TorchFXPythonDecoder`
+is used to decode `torch.fx.graph`. The entire model conversion workflow can be
+represented by the following diagram.
+
+```mermaid
+flowchart TD
+    A[(torch.nn.Module)] --> torch.compile
+    subgraph torch.compile
+        subgraph TorchFXPythonDecoder
+            torch.fx.graph_module.GraphModule
+        end
+        TorchFXPythonDecoder --> E("pytorch::FrontEnd::load()")
+        E -->|ov::InputModel| F("pytorch::FrontEnd::convert()")
+        F --> G[(ov::Model)]
+    end
+    A[(torch.nn.Module)] --> openvino.convert_model
+    subgraph openvino.convert_model
+        subgraph TorchScriptPythonDecoder
+            torch.jit.trace ~~~ torch.jit.script
+        end
+        TorchScriptPythonDecoder --> B("pytorch::FrontEnd::load()")
+        B -->|ov::InputModel| C("pytorch::FrontEnd::convert()")
+    end
+    openvino.convert_model --> D[(ov::Model)]
+```
+
+OpenVINO PyTorch Frontend supports extensions. To add an extension, use
+`ov::frontend::pytorch::Frontend::add_extension()` API.
+The following extension types are supported:
+
+* `ov::frontend::tensorflow::ConversionExtension` or `ov::frontend::ConversionExtension` - add a new Loader into the conversion pipeline.
+* `ov::TelemetryExtension` - enable telemetry for the frontend.
+* `ov::BaseOpExtension` - enable support for a custom operation.
+* `ov::detail::SOExtension` - allow support for `ov::BaseOpExtension` extensions loaded from an external library.
+
+## How to Implement Support for a New PyTorch Operation
+
+PyTorch conversion into the OpenVINO opset operations consists of two stages:
+1. Conversion of PyTorch operations to OpenVINO opset using [translators](./src/op/),
+   which directly transforms a PyTorch operation into a sub-graph of the OpenVINO
+   opset. This is a 1->N conversion.
+2. [Internal Transformations](./src/transforms) that transform a sub-graph of
+   operations into a sub-graph of the OpenVINO opset. This is an N->N conversion.
+
+### Operation Translation
+
+Most PyTorch operations can be converted by a single `translator`. The
+dictionary of `translators` is placed in the [op_table.cpp](./src/op_table.cpp)
+file and each translator is located in the [op](../tensorflow_common/src/op/)
+directory:
+
+https://github.com/openvinotoolkit/openvino/blob/491454103ea2f29b242587c6084c19868a879a82/src/frontends/pytorch/src/op_table.cpp#L222-L227
+
+The main rules for translator implementation:
+1. Support dynamic shapes and ranks, undefined types, including future support of new types, such as strings and complex numbers.
+2. Try to maintain the same algorithmic complexity of the decomposition. Fewer operations are usually better.
+3. Use the latest OpenVINO opset version for the translation.
+4. Use helper routines for operation checks and graph construction from `utils.hpp`.
+5. Call `NodeContext::mark_mode()` for each created node.
+
+#### Inplace and Mutable Operations
+
+Some PyTorch operations modify the input tensor rather than the output. For example,
+`aten::add` writes the result of addition to the output, but `aten::add_` writes the result
+to its first input. To correctly convert such an operation:
+* Ensure that the output tensor produced by the translation has the same type and shape as the initial input.
+* Call `NodeContext::mutate_input()` to change the input tensor with the new value.
+
+#### PtFrameworkNode Primitive
+
+`PtFrameworkNode` is used to represent unconverted operation from the original
+model. You can use `FrontEnd::convert_partially()` instead of `Frontend::convert()`
+to get an `ov::Model` containing unconverted operations.
+
+#### Operations Accepting Strings
+
+At the moment, OpenVINO core does not support strings. However, since strings in models are usually constants, you can extract them as `std::string` directly from Python using `NodeContext::const_input<std::string>()`. 
+
+#### Operations with lists, tuples, dicts
+
+These types are also not supported by OpenVINO core and generally require
+implementing transformation for N->N conversion. However, in some simple cases, lists
+and tuples can be processed. Helpers for working with lists can be found in `utils.hpp`.
+For example, `get_list_as_outputs` enables you to get list elements to work with them
+in the translator or transformation.
+
+### Internal Transformations
+
+In rare cases, converting PyTorch operations requires transformation. The main
+difference between transformation and translation is that transformation works on the graph rather
+than on the `NodeContext` of a single operation. This means that some functionality
+provided by `NodeContext` is not accessible in transformation and usually
+requires working with `PtFramworkNode` directly. [General rules](https://docs.openvino.ai/2023.1/openvino_docs_transformations.html)
+for writing transformations also apply to PT FE transformations.
+
+### PyTorch Frontend Layer Tests
+
+The layer tests are Python-based tests that check if a PyTorch operation is
+supported by PT FE. The testing pipeline of the layer tests consists of four
+steps:
+1. Create a simple model containing the PyTorch operation to be tested.
+2. Convert this model into an OpenVINO Model.
+3. Infer the original model using PyTorch and infer the OpenVINO Model.
+4. Compare the inference results between both frameworks.
+
+To set up the environment for running the layer tests, follow these [instructions](../../../tests/layer_tests/README.md).
+
+To test the entire suite of the PyTorch operation set support, run the following command:
+```bash
+python -m pytest layer_tests/pytorch_tests
+```
+
+## See Also
+ * [OpenVINO README](../../../README.md)
+ * [OpenVINO Core Components](../../README.md)
+ * [Developer documentation](../../../docs/dev/index.md)
diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp
index 066c203e3a1938..d5878783c314af 100644
--- a/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp
+++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp
@@ -40,6 +40,9 @@ class TorchDecoder : public IDecoder {
     // Return shape if inputs has torch::Tensor type in the original model, otherwise returns the shape [] of a scalar
     virtual PartialShape get_input_shape(size_t index) const = 0;
 
+    // Return strides if inputs has torch::Tensor type in original model, otherwise return [].
+    virtual const std::vector<size_t>& get_input_strides(size_t index) const = 0;
+
     // Return element::Type when it the original type can be represented, otherwise returns PT-specific data type object
     // (see custom_type.hpp)
     virtual Any get_input_type(size_t index) const = 0;
diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp
index 0910aa3e057e72..36d4027dcc426f 100644
--- a/src/frontends/pytorch/src/frontend.cpp
+++ b/src/frontends/pytorch/src/frontend.cpp
@@ -20,6 +20,7 @@
 #include "transformations/op_conversions/convert_convertlike.hpp"
 #include "transformations/resolve_names_collisions.hpp"
 #include "transforms.hpp"
+#include "transforms/align_types_removal.hpp"
 #include "transforms/append_list_unpack_replacer.hpp"
 #include "transforms/aten_cat_replacer.hpp"
 #include "transforms/aten_getitem_replacer.hpp"
@@ -41,6 +42,7 @@
 #include "transforms/softmax_reshape_elimination.hpp"
 #include "transforms/string_equality_replacer.hpp"
 #include "transforms/tuple_unpack_replacer.hpp"
+#include "transforms/u4_block_repack.hpp"
 #include "translate_session.hpp"
 
 namespace ov {
@@ -176,6 +178,7 @@ void FrontEnd::normalize(const std::shared_ptr<ov::Model>& model) const {
     manager.register_pass<ov::pass::MarkDequantizationSubgraph>(
         element::TypeVector{element::u8, element::i8, element::u4, element::i4});
     manager.register_pass<ov::pass::ConstantFolding>();
+    manager.register_pass<ov::frontend::pytorch::pass::AlignTypesRemoval>();
     manager.register_pass<ov::pass::PushConstantToSubgraph>();
     manager.register_pass<ov::pass::UnrollIf>();
     manager.register_pass<ov::frontend::pytorch::pass::TupleUnpackInBodyReplacer>();
@@ -200,8 +203,11 @@ void FrontEnd::normalize(const std::shared_ptr<ov::Model>& model) const {
     manager.register_pass<ov::frontend::pytorch::pass::IndexLoopGetitemReplacer>();
     manager.register_pass<ov::frontend::pytorch::pass::QuantizedNodeRemover>();
     manager.register_pass<ov::frontend::pytorch::pass::SoftmaxReshapeElimination>();
+    manager.register_pass<ov::frontend::pytorch::pass::U4BlockRepack>();
     manager.register_pass<ov::pass::RemoveMultiSubGraphOpDanglingParamsResults>();
     manager.register_pass<ov::pass::ReverseShapeAndTypeInfer>();
+    // Second pass of AlignTypesRemoval after all converting transformations
+    manager.register_pass<ov::frontend::pytorch::pass::AlignTypesRemoval>();
     manager.register_pass<ov::pass::ResolveNameCollisions>();
     manager.run_passes(model);
 
diff --git a/src/frontends/pytorch/src/helper_ops/align_types.hpp b/src/frontends/pytorch/src/helper_ops/align_types.hpp
new file mode 100644
index 00000000000000..cd69af250fa30d
--- /dev/null
+++ b/src/frontends/pytorch/src/helper_ops/align_types.hpp
@@ -0,0 +1,43 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "internal_op.hpp"
+#include "openvino/frontend/decoder.hpp"
+#include "utils.hpp"
+
+namespace ov {
+namespace frontend {
+namespace pytorch {
+
+class AlignTypes : public InternalOperation {
+public:
+    AlignTypes(const Output<Node>& lhs, const Output<Node>& rhs, bool align_scalars)
+        : InternalOperation("ov::align_types",
+                            {lhs, rhs},
+                            2,
+                            "This is internal operation for type alignment and should be removed "
+                            "at normalization step. It can't be removed if types can't be resolved."),
+          m_align_scalars(align_scalars) {
+        validate_and_infer_types();
+    }
+
+    void validate_and_infer_types() override {
+        auto lhs = input_value(0);
+        auto rhs = input_value(1);
+        auto out_type = infer_types(lhs, rhs, m_align_scalars);
+        set_output_type(0, out_type, get_input_partial_shape(0));
+        set_output_type(1, out_type, get_input_partial_shape(1));
+    }
+
+private:
+    const bool m_align_scalars;
+};
+}  // namespace pytorch
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/pytorch/src/helper_ops/internal_op.hpp b/src/frontends/pytorch/src/helper_ops/internal_op.hpp
new file mode 100644
index 00000000000000..510654dce8620a
--- /dev/null
+++ b/src/frontends/pytorch/src/helper_ops/internal_op.hpp
@@ -0,0 +1,56 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "openvino/frontend/decoder.hpp"
+#include "pt_framework_node.hpp"
+#include "utils.hpp"
+
+namespace ov {
+namespace frontend {
+namespace pytorch {
+
+class InternalOpDecoder : public DummyDecoder {
+public:
+    explicit InternalOpDecoder(const std::string& op_type, const size_t num_outputs)
+        : m_op_type(op_type),
+          m_num_outputs(num_outputs) {}
+    const std::string& get_op_type() const override {
+        return m_op_type;
+    }
+    size_t num_of_outputs() const override {
+        return m_num_outputs;
+    }
+    size_t get_subgraph_size() const override {
+        return 0;
+    }
+    const std::string& decoder_type_name() const override {
+        return m_decoder_type;
+    }
+
+private:
+    const std::string m_op_type;
+    const std::string m_decoder_type = "internal_op";
+    const size_t m_num_outputs;
+};
+
+class InternalOperation : public PtFrameworkNode {
+protected:
+    InternalOperation(const std::string& op_type,
+                      const OutputVector& inputs,
+                      size_t num_outputs,
+                      const std::string& no_conversion_reason)
+        : PtFrameworkNode(std::make_shared<InternalOpDecoder>(op_type, num_outputs), inputs) {
+        auto attrs = get_attrs();
+        attrs[PtFrameworkNode::failed_conversion_key] = no_conversion_reason;
+        set_attrs(attrs);
+    }
+};
+}  // namespace pytorch
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/pytorch/src/op/add.cpp b/src/frontends/pytorch/src/op/add.cpp
index f0a997b6f8fa3a..33699ad90fa39c 100644
--- a/src/frontends/pytorch/src/op/add.cpp
+++ b/src/frontends/pytorch/src/op/add.cpp
@@ -15,7 +15,9 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_add(const NodeContext& context) {
+using namespace ov::op;
+
+OutputVector translate_add_common(const NodeContext& context, bool inplace) {
     num_inputs_check(context, 2, 3);
     auto lhs = context.get_input(0);
     auto rhs = context.get_input(1);
@@ -26,12 +28,28 @@ OutputVector translate_add(const NodeContext& context) {
         // Case when two lists gets concatenated
         FRONT_END_OP_CONVERSION_CHECK(false, "aten::add is used for concatenation of lists, not possible to convert");
     }
-    align_eltwise_input_types(context, lhs, rhs, true);
+    if (inplace) {
+        if (lhs.get_element_type().is_dynamic() || lhs.get_element_type() != rhs.get_element_type())
+            rhs = context.mark_node(std::make_shared<v1::ConvertLike>(rhs, lhs));
+    } else {
+        align_eltwise_input_types(context, lhs, rhs, true);
+    }
     if (!context.input_is_none(2)) {
-        auto converted_alpha = context.mark_node(std::make_shared<ov::op::v1::ConvertLike>(context.get_input(2), rhs));
-        rhs = context.mark_node(std::make_shared<ov::op::v1::Multiply>(converted_alpha, rhs));
+        auto converted_alpha = context.mark_node(std::make_shared<v1::ConvertLike>(context.get_input(2), rhs));
+        rhs = context.mark_node(std::make_shared<v1::Multiply>(converted_alpha, rhs));
     }
-    return {context.mark_node(std::make_shared<ov::op::v1::Add>(lhs, rhs))};
+    auto add = context.mark_node(std::make_shared<v1::Add>(lhs, rhs));
+    if (inplace)
+        context.mutate_input(0, add);
+    return {add};
+};
+
+OutputVector translate_add(const NodeContext& context) {
+    return translate_add_common(context, false);
+};
+
+OutputVector translate_add_(const NodeContext& context) {
+    return translate_add_common(context, true);
 };
 
 }  // namespace op
diff --git a/src/frontends/pytorch/src/op/as_strided.cpp b/src/frontends/pytorch/src/op/as_strided.cpp
new file mode 100644
index 00000000000000..5d1dfe38bdaa17
--- /dev/null
+++ b/src/frontends/pytorch/src/op/as_strided.cpp
@@ -0,0 +1,106 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/frontend/pytorch/node_context.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/gather.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/range.hpp"
+#include "openvino/op/reshape.hpp"
+#include "openvino/op/scatter_update.hpp"
+#include "openvino/op/tile.hpp"
+#include "openvino/op/transpose.hpp"
+#include "utils.hpp"
+
+namespace ov {
+namespace frontend {
+namespace pytorch {
+namespace op {
+
+using namespace ov::op;
+bool compare_strides(const std::tuple<size_t, size_t>& a, const std::tuple<size_t, size_t>& b) {
+    return std::get<0>(a) > std::get<0>(b);
+}
+OutputVector translate_as_strided(const NodeContext& context) {
+    // "aten::as_strided(Tensor(a) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a)"
+    num_inputs_check(context, 3, 4);
+    auto decoder = context.get_decoder();
+    auto input = context.get_input(0);
+    auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1}));
+    auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0}));
+    auto const_neg_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1}));
+    auto input_strides = decoder->get_input_strides(0);
+    FRONT_END_OP_CONVERSION_CHECK(input_strides.size() != 0,
+                                  "aten::as_strided: Couldn't retrive input stride information from torchscript.");
+
+    std::vector<size_t> idxs(input_strides.size());
+    iota(idxs.begin(), idxs.end(), 0);
+    std::vector<std::tuple<size_t, size_t>> stride_idxs(idxs.size());
+    std::for_each(idxs.rbegin(), idxs.rend(), [&](size_t& idx) {
+        stride_idxs[idx] = {input_strides[idx], idx};
+    });
+
+    std::sort(stride_idxs.begin(), stride_idxs.end(), compare_strides);
+    std::vector<uint64_t> transpose_idx(idxs.size());
+    int transpose_counter = 0;
+    std::for_each(stride_idxs.begin(), stride_idxs.end(), [&](std::tuple<size_t, size_t>& pair) {
+        transpose_idx[transpose_counter] = uint64_t(std::get<1>(pair));
+        transpose_counter++;
+    });
+    auto transpose_idx_const =
+        context.mark_node(v0::Constant::create(element::i32, Shape{transpose_idx.size()}, transpose_idx));
+    auto transposed_input = context.mark_node(std::make_shared<v1::Transpose>(input, transpose_idx_const));
+    auto flat_input = context.mark_node(std::make_shared<v1::Reshape>(transposed_input, const_neg_1, false));
+    std::deque<Output<Node>> sizes;
+    std::deque<Output<Node>> strides;
+    if (std::dynamic_pointer_cast<v0::Constant>(context.get_input_from_visible_context(1).get_node_shared_ptr())) {
+        auto input_vector = context.const_input<std::vector<int64_t>>(1);
+        std::for_each(input_vector.rbegin(), input_vector.rend(), [&](int64_t input_val) {
+            auto const_input = context.mark_node(v0::Constant::create(element::i32, Shape{}, {input_val}));
+            sizes.push_front(const_input);
+        });
+    } else {
+        sizes = get_list_as_outputs(context.get_input(1));
+    }
+    if (std::dynamic_pointer_cast<v0::Constant>(context.get_input_from_visible_context(2).get_node_shared_ptr())) {
+        auto input_vector = context.const_input<std::vector<int64_t>>(2);
+        std::for_each(input_vector.rbegin(), input_vector.rend(), [&](int64_t input_val) {
+            auto const_input = context.mark_node(v0::Constant::create(element::i32, Shape{}, {input_val}));
+            strides.push_front(const_input);
+        });
+    } else {
+        strides = get_list_as_outputs(context.get_input(2));
+    }
+    auto offset = const_0->output(0);
+    if (!context.input_is_none(3)) {
+        offset = context.get_input(3);
+    }
+    FRONT_END_OP_CONVERSION_CHECK(sizes.size() == strides.size(),
+                                  "aten::as_strided: Vector for strides and sizes need to have equal length.");
+    auto strides_size = strides.size() - 1;
+    auto i = 0;
+    auto strides_length_const = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {strides.size()}));
+    auto ones_strides_len = context.mark_node(std::make_shared<v0::Tile>(const_1, strides_length_const));
+    auto indices = const_0;
+    std::for_each(strides.rbegin(), strides.rend(), [&](Output<Node>& stride) {
+        auto const_num_iter = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {strides_size - i}));
+        stride = context.mark_node(std::make_shared<v0::Convert>(stride, element::i32));
+        auto size = sizes.at(strides_size - i);
+        auto range = context.mark_node(std::make_shared<v4::Range>(const_0, size, const_1, element::i32));
+        range = context.mark_node(std::make_shared<v1::Multiply>(range, stride));
+        auto iteration_shape = context.mark_node(
+            std::make_shared<v3::ScatterUpdate>(ones_strides_len, const_num_iter, const_neg_1, const_0));
+        range = context.mark_node(std::make_shared<v1::Reshape>(range, iteration_shape, false));
+        indices = context.mark_node(std::make_shared<v1::Add>(indices, range));
+        i++;
+    });
+    indices = context.mark_node(std::make_shared<v1::Add>(indices, offset));
+    auto gather = context.mark_node(std::make_shared<v8::Gather>(flat_input, indices, const_0));
+    return {gather};
+};
+}  // namespace op
+}  // namespace pytorch
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/pytorch/src/op/bitwise.cpp b/src/frontends/pytorch/src/op/bitwise.cpp
index 6e3b1fe5f49ee4..8cbae192ca6bef 100644
--- a/src/frontends/pytorch/src/op/bitwise.cpp
+++ b/src/frontends/pytorch/src/op/bitwise.cpp
@@ -17,7 +17,7 @@ OutputVector translate_bitwise_not(const NodeContext& context) {
     num_inputs_check(context, 1, 2);
     auto x = context.get_input(0);
     FRONT_END_OP_CONVERSION_CHECK(x.get_element_type().compatible(element::boolean),
-                                  "aten::bitwise_not suppored only for boolean input");
+                                  "aten::bitwise_not supported only for boolean input");
     auto not_x = context.mark_node(std::make_shared<ov::op::v1::LogicalNot>(x));
     if (!context.input_is_none(1)) {
         context.mutate_input(1, not_x);
@@ -30,7 +30,7 @@ OutputVector translate_bitwise_and(const NodeContext& context) {
     auto x = context.get_input(0);
     auto y = context.get_input(1);
     FRONT_END_OP_CONVERSION_CHECK(x.get_element_type().compatible(element::boolean),
-                                  "aten::bitwise_not suppored only for boolean input");
+                                  "aten::bitwise_not supported only for boolean input");
     auto and_x = context.mark_node(std::make_shared<ov::op::v1::LogicalAnd>(x, y));
     return {and_x};
 };
@@ -40,7 +40,7 @@ OutputVector translate_bitwise_or(const NodeContext& context) {
     auto x = context.get_input(0);
     auto y = context.get_input(1);
     FRONT_END_OP_CONVERSION_CHECK(x.get_element_type().compatible(element::boolean),
-                                  "aten::bitwise_not suppored only for boolean input");
+                                  "aten::bitwise_not supported only for boolean input");
     auto or_x = context.mark_node(std::make_shared<ov::op::v1::LogicalOr>(x, y));
     return {or_x};
 };
diff --git a/src/frontends/pytorch/src/op/div.cpp b/src/frontends/pytorch/src/op/div.cpp
index 7fb12ec253413a..dbbb6c89af7e6c 100644
--- a/src/frontends/pytorch/src/op/div.cpp
+++ b/src/frontends/pytorch/src/op/div.cpp
@@ -17,7 +17,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_div(const NodeContext& context) {
+OutputVector translate_div_common(const NodeContext& context, bool inplace) {
     num_inputs_check(context, 2, 3);
     auto x = context.get_input(0);
     auto y = context.get_input(1);
@@ -34,7 +34,12 @@ OutputVector translate_div(const NodeContext& context) {
             y = context.mark_node(std::make_shared<v0::Convert>(y, element::f32));
         }
     }
-    align_eltwise_input_types(context, x, y, true);
+    if (inplace) {
+        if (x.get_element_type().is_dynamic() || x.get_element_type() != y.get_element_type())
+            y = context.mark_node(std::make_shared<v1::ConvertLike>(x, y));
+    } else {
+        align_eltwise_input_types(context, x, y, true);
+    }
     auto res = context.mark_node(std::make_shared<v1::Divide>(x, y, true));
     // TODO: ticket 103296; Temporarily disable ConvertDivide transformation
     disable_divide_conversion(res);
@@ -44,9 +49,19 @@ OutputVector translate_div(const NodeContext& context) {
         const auto convert = context.mark_node(std::make_shared<v0::Convert>(res, element::i32));
         res = context.mark_node(std::make_shared<v1::ConvertLike>(convert, x));
     }
+    if (inplace)
+        context.mutate_input(0, res);
     return {res};
 };
 
+OutputVector translate_div(const NodeContext& context) {
+    return translate_div_common(context, false);
+};
+
+OutputVector translate_div_(const NodeContext& context) {
+    return translate_div_common(context, true);
+};
+
 }  // namespace op
 }  // namespace pytorch
 }  // namespace frontend
diff --git a/src/frontends/pytorch/src/op/scaled_dot_product_attention.cpp b/src/frontends/pytorch/src/op/scaled_dot_product_attention.cpp
index 735324405d1f11..82231472e401be 100644
--- a/src/frontends/pytorch/src/op/scaled_dot_product_attention.cpp
+++ b/src/frontends/pytorch/src/op/scaled_dot_product_attention.cpp
@@ -15,6 +15,7 @@
 #include "openvino/op/matmul.hpp"
 #include "openvino/op/multiply.hpp"
 #include "openvino/op/range.hpp"
+#include "openvino/op/reshape.hpp"
 #include "openvino/op/select.hpp"
 #include "openvino/op/shape_of.hpp"
 #include "openvino/op/softmax.hpp"
@@ -22,6 +23,7 @@
 #include "openvino/op/squeeze.hpp"
 #include "openvino/op/transpose.hpp"
 #include "openvino/op/unsqueeze.hpp"
+#include "openvino/op/util/framework_node.hpp"
 #include "utils.hpp"
 
 namespace ov {
@@ -31,10 +33,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_scaled_dot_product_attention(const NodeContext& context) {
-    // aten::scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float
-    // dropout_p=0., bool is_causal=False)
-    num_inputs_check(context, 6, 6);
+std::shared_ptr<ov::Node> translate_scaled_dot_product_attention_common(const NodeContext& context) {
     auto query = context.get_input(0);
     auto key = context.get_input(1);
     auto value = context.get_input(2);
@@ -68,7 +67,10 @@ OutputVector translate_scaled_dot_product_attention(const NodeContext& context)
     minus_inf = context.mark_node(std::make_shared<v1::ConvertLike>(minus_inf, scaled_atten));
     // two types of masks are supported. A boolean mask where a value of True indicates that the element should take
     // part in attention. A float mask of the same type as query, key, value that is added to the attention score.
-    auto is_causal = context.const_input<bool>(5);
+    auto is_causal = false;
+    if (!context.input_is_none(5)) {
+        is_causal = context.const_input<bool>(5);
+    }
     if (is_causal || !context.input_is_none(3)) {
         Output<Node> mask;
         Output<Node> atten_mask;
@@ -100,10 +102,30 @@ OutputVector translate_scaled_dot_product_attention(const NodeContext& context)
         scaled_atten = context.mark_node(std::make_shared<v1::Add>(scaled_atten, atten_mask));
     }
     scaled_atten = context.mark_node(std::make_shared<v8::Softmax>(scaled_atten, -1));
-    return {context.mark_node(std::make_shared<v0::MatMul>(scaled_atten, value))};
+    return context.mark_node(std::make_shared<v0::MatMul>(scaled_atten, value));
+};
+
+OutputVector translate_scaled_dot_product_attention(const NodeContext& context) {
+    // aten::scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float
+    // dropout_p=0., bool is_causal=False)
+    num_inputs_check(context, 6, 6);
+    return {translate_scaled_dot_product_attention_common(context)};
+};
+
+OutputVector translate_scaled_dot_product_attention_fx(const NodeContext& context) {
+    // aten::scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float
+    // dropout_p=0., bool is_causal=False)
+    num_inputs_check(context, 3, 6);
+    auto output = translate_scaled_dot_product_attention_common(context);
+    // TODO: scaled_dot_product_flash_attention has 9 outputs but for most cases only
+    // the first input is used. Rest of the outputs should be returned properly as
+    // needed.
+    ov::OutputVector out_vec;
+    out_vec.push_back(output);
+    return {context.mark_node(make_list_construct(out_vec))};
 };
 
 }  // namespace op
 }  // namespace pytorch
 }  // namespace frontend
-}  // namespace ov
\ No newline at end of file
+}  // namespace ov
diff --git a/src/frontends/pytorch/src/op/sub.cpp b/src/frontends/pytorch/src/op/sub.cpp
index 94963ed9bdb61f..62534aee53864b 100644
--- a/src/frontends/pytorch/src/op/sub.cpp
+++ b/src/frontends/pytorch/src/op/sub.cpp
@@ -15,18 +15,34 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_sub(const NodeContext& context) {
+OutputVector translate_sub_common(const NodeContext& context, bool inplace) {
     num_inputs_check(context, 2, 3);
     auto x = context.get_input(0);
     auto y = context.get_input(1);
-    align_eltwise_input_types(context, x, y);
+    if (inplace) {
+        if (x.get_element_type().is_dynamic() || x.get_element_type() != y.get_element_type())
+            y = context.mark_node(std::make_shared<v1::ConvertLike>(x, y));
+    } else {
+        align_eltwise_input_types(context, x, y);
+    }
     // default alpha is 1 so no need to multiply if alpha is not provided
     if (!context.input_is_none(2)) {
         auto alpha = context.get_input(2);
         auto casted_alpha = context.mark_node(std::make_shared<v1::ConvertLike>(alpha, y));
         y = context.mark_node(std::make_shared<v1::Multiply>(casted_alpha, y));
     }
-    return {context.mark_node(std::make_shared<v1::Subtract>(x, y))};
+    auto sub = context.mark_node(std::make_shared<v1::Subtract>(x, y));
+    if (inplace)
+        context.mutate_input(0, sub);
+    return {sub};
+};
+
+OutputVector translate_sub(const NodeContext& context) {
+    return translate_sub_common(context, false);
+};
+
+OutputVector translate_sub_(const NodeContext& context) {
+    return translate_sub_common(context, true);
 };
 
 }  // namespace op
diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp
index eaf66fa0cd8094..1124c9f7ec6de9 100644
--- a/src/frontends/pytorch/src/op_table.cpp
+++ b/src/frontends/pytorch/src/op_table.cpp
@@ -23,6 +23,7 @@ OP_CONVERTER(translate_adaptive_max_pool3d);
 OP_CONVERTER(translate_adaptive_max_pool2d);
 OP_CONVERTER(translate_adaptive_max_pool1d);
 OP_CONVERTER(translate_add);
+OP_CONVERTER(translate_add_);
 OP_CONVERTER(translate_addcmul);
 OP_CONVERTER(translate_addmm);
 OP_CONVERTER(translate_all);
@@ -34,6 +35,7 @@ OP_CONVERTER(translate_argmax);
 OP_CONVERTER(translate_argsort);
 OP_CONVERTER(translate_argmax);
 OP_CONVERTER(translate_argmin);
+OP_CONVERTER(translate_as_strided);
 OP_CONVERTER(translate_as_tensor);
 OP_CONVERTER(translate_avg_poolnd);
 OP_CONVERTER(translate_bool);
@@ -56,6 +58,7 @@ OP_CONVERTER(translate_deform_conv);
 OP_CONVERTER(translate_derive_index);
 OP_CONVERTER(translate_dim);
 OP_CONVERTER(translate_div);
+OP_CONVERTER(translate_div_);
 OP_CONVERTER(translate_elu);
 OP_CONVERTER(translate_embedding);
 OP_CONVERTER(translate_embedding_bag);
@@ -175,6 +178,7 @@ OP_CONVERTER(translate_squeeze);
 OP_CONVERTER(translate_std);
 OP_CONVERTER(translate_std_mean);
 OP_CONVERTER(translate_sub);
+OP_CONVERTER(translate_sub_);
 OP_CONVERTER(translate_sum);
 OP_CONVERTER(translate_t);
 OP_CONVERTER(translate_to);
@@ -214,6 +218,7 @@ OP_CONVERTER(translate_group_norm_fx);
 OP_CONVERTER(translate_index_fx);
 OP_CONVERTER(translate_layer_norm_fx);
 OP_CONVERTER(translate_max_poolnd_fx);
+OP_CONVERTER(translate_scaled_dot_product_attention_fx);
 OP_CONVERTER(translate_slice_fx);
 OP_CONVERTER(translate_softmax_fx);
 OP_CONVERTER(translate_transpose_fx);
@@ -246,7 +251,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::adaptive_max_pool2d", op::quantizable_op<op::translate_adaptive_max_pool2d>},
         {"aten::adaptive_max_pool3d", op::quantizable_op<op::translate_adaptive_max_pool3d>},
         {"aten::add", op::translate_add},
-        {"aten::add_", op::inplace_op<op::translate_add>},
+        {"aten::add_", op::translate_add_},
         {"aten::addcmul", op::translate_addcmul},
         {"aten::addmm", op::translate_addmm},
         {"aten::all", op::translate_all},
@@ -256,6 +261,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::argmax", op::translate_argmax},
         {"aten::argmin", op::translate_argmin},
         {"aten::argsort", op::translate_argsort},
+        {"aten::as_strided", op::translate_as_strided},
         {"aten::as_tensor", op::translate_as_tensor},
         {"aten::asin", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Asin>},
         {"aten::asin_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Asin>>},
@@ -307,7 +313,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::dequantize", op::skip_node},  // we convert model to fp32 using FQ, so dequantization is not needed
         {"aten::dim", op::translate_dim},
         {"aten::div", op::translate_div},
-        {"aten::div_", op::inplace_op<op::translate_div>},
+        {"aten::div_", op::translate_div_},
         {"aten::dropout", op::skip_node},
         {"aten::dropout_", op::skip_node},
         {"aten::elu", op::translate_elu},
@@ -403,9 +409,9 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::minimum", op::translate_minimum},
         {"aten::mm", op::translate_1to1_match_2_inputs<opset10::MatMul>},
         {"aten::mul", op::translate_1to1_match_2_inputs_align_types<opset10::Multiply>},
-        {"aten::mul_", op::inplace_op<op::translate_1to1_match_2_inputs_align_types<opset10::Multiply>>},
+        {"aten::mul_", op::inplace_translate_1to1_match_2_inputs_align_types<opset10::Multiply>},
         {"aten::multiply", op::translate_1to1_match_2_inputs_align_types<opset10::Multiply>},
-        {"aten::multiply_", op::inplace_op<op::translate_1to1_match_2_inputs_align_types<opset10::Multiply>>},
+        {"aten::multiply_", op::inplace_translate_1to1_match_2_inputs_align_types<opset10::Multiply>},
         {"aten::narrow", op::translate_narrow},
         {"aten::ne", op::translate_1to1_match_2_inputs_align_types<opset10::NotEqual>},
         {"aten::neg", op::translate_neg},
@@ -476,7 +482,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::std", op::translate_std},
         {"aten::std_mean", op::translate_std_mean},
         {"aten::sub", op::translate_sub},
-        {"aten::sub_", op::inplace_op<op::translate_sub>},
+        {"aten::sub_", op::translate_sub_},
         {"aten::sum", op::translate_sum},
         {"aten::swapaxes", op::quantizable_op<op::translate_transpose>},
         {"aten::t", op::translate_t},
@@ -557,6 +563,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops_fx() {
         {"aten.arange.default", op::translate_arange_fx},
         {"aten.argmax.default", op::translate_argmax},
         {"aten.avg_pool2d.default", op::translate_avg_poolnd},
+        {"aten.baddbmm.default", op::translate_addmm},
         {"aten.bitwise_and.Tensor", op::translate_bitwise_and},
         {"aten.bmm.default", op::translate_1to1_match_2_inputs_align_types<opset10::MatMul>},
         {"aten.cat.default", op::translate_cat_fx},
@@ -583,6 +590,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops_fx() {
         {"aten.hardswish_.default", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::HSwish>>},
         {"aten.hardtanh_.default", op::inplace_op<op::translate_hardtanh>},
         {"aten.index.Tensor", op::translate_index_fx},
+        {"aten.leaky_relu_.default", op::inplace_op<op::translate_1to1_match_2_inputs<opset10::PRelu>>},
         {"aten.lift_fresh_copy.default", op::skip_node},
         {"aten.linalg_vector_norm.default", op::translate_linalg_vector_norm},
         {"aten.log.default", op::translate_log},
@@ -605,6 +613,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops_fx() {
         {"aten.relu.default", op::translate_1to1_match_1_inputs<opset10::Relu>},
         {"aten.relu_.default", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Relu>>},
         {"aten.rsub.Scalar", op::translate_rsub},
+        {"aten._scaled_dot_product_flash_attention.default", op::translate_scaled_dot_product_attention_fx},
         {"aten.select.int", op::translate_select},
         {"aten.sigmoid.default", op::translate_1to1_match_1_inputs<opset10::Sigmoid>},
         {"aten.silu.default", op::translate_1to1_match_1_inputs<opset10::Swish>},
diff --git a/src/frontends/pytorch/src/pt_framework_node.hpp b/src/frontends/pytorch/src/pt_framework_node.hpp
index 04b71d1169ae81..00d967200405cb 100644
--- a/src/frontends/pytorch/src/pt_framework_node.hpp
+++ b/src/frontends/pytorch/src/pt_framework_node.hpp
@@ -20,14 +20,17 @@ class PtFrameworkNode : public ov::op::util::FrameworkNode {
     PtFrameworkNode(const std::shared_ptr<TorchDecoder>& decoder,
                     const OutputVector& inputs,
                     size_t output_size,
-                    bool is_backprop = false)
+                    bool is_reverseprop = false)
         : ov::op::util::FrameworkNode(inputs, output_size, decoder->get_subgraph_size()),
           m_decoder(decoder) {
         ov::op::util::FrameworkNodeAttrs attrs;
         attrs.set_type_name("PTFrameworkNode");
-        if (is_backprop) {
-            attrs[op_type_key] = m_decoder->get_op_type() + "_backprop";
+        if (is_reverseprop) {
+            attrs[op_type_key] = m_decoder->get_op_type() + "_reverseprop";
             attrs[schema_key] = "None";
+            attrs[failed_conversion_key] =
+                "This is an internal openvino operation representing reverse data propagation. It should not appear in "
+                "graph in normal conversion flow and might be result of other failures.";
         } else {
             attrs[op_type_key] = m_decoder->get_op_type();
             attrs[schema_key] = m_decoder->get_schema();
diff --git a/src/frontends/pytorch/src/transforms/align_types_removal.cpp b/src/frontends/pytorch/src/transforms/align_types_removal.cpp
new file mode 100644
index 00000000000000..c5e43d8af13004
--- /dev/null
+++ b/src/frontends/pytorch/src/transforms/align_types_removal.cpp
@@ -0,0 +1,60 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "align_types_removal.hpp"
+
+#include <memory>
+#include <utility>
+
+#include "helper_ops/align_types.hpp"
+#include "openvino/core/rt_info.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/split.hpp"
+#include "openvino/op/squeeze.hpp"
+#include "openvino/op/util/framework_node.hpp"
+#include "openvino/pass/pattern/matcher.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "utils.hpp"
+
+namespace ov {
+namespace frontend {
+namespace pytorch {
+namespace pass {
+
+using namespace ov::op;
+
+AlignTypesRemoval::AlignTypesRemoval() {
+    auto align_types_pattern = ov::pass::pattern::wrap_type<ov::op::util::FrameworkNode>();
+
+    ov::matcher_pass_callback callback = [](ov::pass::pattern::Matcher& m) {
+        auto align_types = std::dynamic_pointer_cast<AlignTypes>(m.get_match_root());
+        if (!align_types)
+            return false;
+        auto lhs_itype = align_types->get_input_element_type(0);
+        auto rhs_itype = align_types->get_input_element_type(1);
+        auto lhs_otype = align_types->get_output_element_type(0);
+        auto rhs_otype = align_types->get_output_element_type(1);
+        if (lhs_otype.is_static() && rhs_otype.is_static()) {
+            auto out1 = align_types->input_value(0);
+            auto out2 = align_types->input_value(1);
+            if (lhs_itype != lhs_otype)
+                out1 = std::make_shared<v0::Convert>(align_types->input_value(0), lhs_otype);
+            if (rhs_itype != rhs_otype)
+                out2 = std::make_shared<v0::Convert>(align_types->input_value(1), rhs_otype);
+            align_types->output(0).replace(out1);
+            align_types->output(1).replace(out2);
+            return true;
+        }
+        return false;
+    };
+
+    auto m = std::make_shared<ov::pass::pattern::Matcher>(align_types_pattern,
+                                                          "ov::frontend::pytorch::pass::AlignTypesRemoval");
+    this->register_matcher(m, callback);
+};
+
+}  // namespace pass
+}  // namespace pytorch
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/pytorch/src/transforms/align_types_removal.hpp b/src/frontends/pytorch/src/transforms/align_types_removal.hpp
new file mode 100644
index 00000000000000..bba81df9e0e086
--- /dev/null
+++ b/src/frontends/pytorch/src/transforms/align_types_removal.hpp
@@ -0,0 +1,24 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/graph_rewrite.hpp"
+#include "openvino/pass/pass.hpp"
+
+namespace ov {
+namespace frontend {
+namespace pytorch {
+namespace pass {
+
+class AlignTypesRemoval : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ov::frontend::pytorch::pass::AlignTypesRemoval");
+    AlignTypesRemoval();
+};
+
+}  // namespace pass
+}  // namespace pytorch
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp b/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp
index f8de5275b69ae8..67ea5f4f9e1ff9 100644
--- a/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp
+++ b/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp
@@ -12,6 +12,7 @@
 #include "openvino/pass/pattern/matcher.hpp"
 #include "openvino/pass/pattern/op/wrap_type.hpp"
 #include "utils.hpp"
+#include "utils_quantize.hpp"
 
 namespace ov {
 namespace frontend {
@@ -38,22 +39,31 @@ AtenStackListConstructReplacer::AtenStackListConstructReplacer() {
         auto axis_node = pattern_map.at(axis).get_node_shared_ptr();
         auto axis_const = std::dynamic_pointer_cast<v0::Constant>(axis_node);
         auto axis = axis_const->cast_vector<int64_t>();
+        if (axis.size() != 1) {
+            add_exception_to_fw_node(stack, "aten::stack has multiple axes, only one is supported.");
+            return false;
+        }
         // Check if ListConstruct is an input
         if (auto list_construct_node = cast_fw_node(input_node, "prim::ListConstruct")) {
             const auto& list_inputs = list_construct_node->input_values();
-            OutputVector node_vector;
-            auto zero = v0::Constant::create(element::i32, Shape{}, {0});
-            // Iterate over values in ListConstruct
-            for (const auto& list_input : list_inputs) {
-                auto node = concat_list_construct(list_input);
-                auto unsqueezed_node = std::make_shared<v0::Unsqueeze>(node, axis_const);
-                node_vector.push_back(unsqueezed_node);
+            std::shared_ptr<Node> node;
+            if (auto compression = u4_compression_stack(list_inputs, axis[0])) {
+                node = compression;
+            } else {
+                OutputVector node_vector;
+                auto zero = v0::Constant::create(element::i32, Shape{}, {0});
+                // Iterate over values in ListConstruct
+                for (const auto& list_input : list_inputs) {
+                    auto node = concat_list_construct(list_input);
+                    auto unsqueezed_node = std::make_shared<v0::Unsqueeze>(node, axis_const);
+                    node_vector.push_back(unsqueezed_node);
+                }
+                // Concat vectors on provided axis
+                node = std::make_shared<v0::Concat>(node_vector, axis[0]);
             }
-            // Concat vectors on provided axis
-            auto concat = std::make_shared<v0::Concat>(node_vector, axis[0]);
 
-            copy_runtime_info_and_name(stack, {concat}, {input_node});
-            replace_node(stack, concat);
+            copy_runtime_info_and_name(stack, {node}, {input_node});
+            replace_node(stack, node);
             return true;
         }
         add_exception_to_fw_node(stack, "Unsupported case of aten::stack.");
diff --git a/src/frontends/pytorch/src/transforms/string_equality_replacer.cpp b/src/frontends/pytorch/src/transforms/string_equality_replacer.cpp
index 0219600799a3c0..f7e5e80b604a76 100644
--- a/src/frontends/pytorch/src/transforms/string_equality_replacer.cpp
+++ b/src/frontends/pytorch/src/transforms/string_equality_replacer.cpp
@@ -26,16 +26,8 @@ using namespace ov::op;
 StringEqualityReplacer::StringEqualityReplacer() {
     auto framework_node_lhs = pattern::wrap_type<PtFrameworkNode>();
     auto framework_node_rhs = pattern::wrap_type<PtFrameworkNode>();
-    auto convert_lhs = pattern::wrap_type<v0::Convert>({framework_node_lhs});
-    auto convert_like_lhs = pattern::wrap_type<v1::ConvertLike>({framework_node_lhs, framework_node_rhs});
-    auto convert_rhs = pattern::wrap_type<v0::Convert>({framework_node_rhs});
-    auto convert_like_rhs = pattern::wrap_type<v1::ConvertLike>({framework_node_rhs, framework_node_lhs});
-    auto lhs_pattern =
-        std::make_shared<pattern::op::Or>(OutputVector{framework_node_lhs, convert_lhs, convert_like_lhs});
-    auto rhs_pattern =
-        std::make_shared<pattern::op::Or>(OutputVector{framework_node_rhs, convert_rhs, convert_like_rhs});
-    auto equal_op = pattern::wrap_type<v1::Equal>({lhs_pattern, rhs_pattern});
-    auto not_equal_op = pattern::wrap_type<v1::NotEqual>({lhs_pattern, rhs_pattern});
+    auto equal_op = pattern::wrap_type<v1::Equal>({framework_node_lhs, framework_node_rhs});
+    auto not_equal_op = pattern::wrap_type<v1::NotEqual>({framework_node_lhs, framework_node_rhs});
 
     auto string_equality_pattern = std::make_shared<pattern::op::Or>(OutputVector{equal_op, not_equal_op});
 
diff --git a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp
new file mode 100644
index 00000000000000..e08ebd728b050e
--- /dev/null
+++ b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp
@@ -0,0 +1,98 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "u4_block_repack.hpp"
+
+#include "openvino/core/rt_info.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/reshape.hpp"
+#include "openvino/op/transpose.hpp"
+#include "openvino/pass/pattern/matcher.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "utils.hpp"
+#include "utils_quantize.hpp"
+
+namespace ov {
+namespace frontend {
+namespace pytorch {
+namespace pass {
+
+using namespace ov::op;
+using namespace ov::pass::pattern;
+
+U4BlockRepack::U4BlockRepack() {
+    const auto m_constant = ov::pass::pattern::wrap_type<ov::op::v0::Constant>();
+    const auto m_reshape1 = ov::pass::pattern::wrap_type<ov::op::v1::Reshape>({m_constant, any_input()});
+    const auto m_transpose = ov::pass::pattern::wrap_type<ov::op::v1::Transpose>({m_reshape1, any_input()});
+    const auto m_reshape2 = ov::pass::pattern::wrap_type<ov::op::v1::Reshape>({m_transpose, any_input()});
+
+    auto pack_byte = [](uint8_t lo, uint8_t hi) {
+        return (hi << 4) | (lo & 0x0F);
+    };  // swap halfs because Convert op assumes this layout
+
+    auto get_u4 = [](const uint8_t* src, size_t idx) {
+        const size_t byte_idx = idx / 2;
+        const uint8_t bit_shift = 4 * (idx % 2);
+        return (src[byte_idx] >> bit_shift) & 0xF;
+    };
+
+    register_matcher(
+        std::make_shared<ov::pass::pattern::Matcher>(m_reshape2, "ov::frontend::pytorch::pass::U4BlockRepack"),
+        [=](ov::pass::pattern::Matcher& m) {
+            auto& pattern_to_output = m.get_pattern_value_map();
+            auto constant =
+                std::dynamic_pointer_cast<ov::op::v0::Constant>(pattern_to_output[m_constant].get_node_shared_ptr());
+            if (!constant)
+                return false;
+            auto reshape1 = pattern_to_output[m_reshape1].get_node_shared_ptr();
+            auto transpose = pattern_to_output[m_transpose].get_node_shared_ptr();
+            auto reshape2 = pattern_to_output[m_reshape2].get_node_shared_ptr();
+
+            if (constant->get_element_type() != element::u4)
+                return false;
+
+            // FIXME: Check reshape/transpose/reshape target shapes and axes permutation; now they are supposed to be
+            // always in expected form
+
+            auto source_shape = reshape1->get_output_shape(0);
+
+            if (source_shape.size() != 3)
+                return false;
+
+            auto destination_shape = reshape2->get_output_shape(0);
+
+            size_t n_blocks = source_shape[0];
+            size_t block_height = source_shape[1];
+            size_t lane_size = source_shape[2];                // size in u4 units
+            size_t block_size = block_height * lane_size / 2;  // size in bytes
+
+            auto src = constant->get_data_ptr<uint8_t>();
+
+            auto new_const = std::make_shared<v0::Constant>(element::u4, destination_shape);
+            auto dst = const_cast<uint8_t*>(                                   // const_cast?
+                reinterpret_cast<const uint8_t*>(new_const->get_data_ptr()));  // TODO: How to better accees u4 data?
+
+            for (size_t iblock = 0; iblock < n_blocks; ++iblock) {
+                auto src_block = src + iblock * block_size;
+                auto dst_block = dst + iblock * block_size;
+                for (size_t i = 0; i < lane_size; ++i) {
+                    for (size_t j = 0; j < block_height / 2; ++j) {  // /2 because we handle two bytes at once
+                        uint8_t lo = get_u4(src_block, 2 * j * lane_size + i);
+                        uint8_t hi = get_u4(src_block, (2 * j + 1) * lane_size + i);
+                        dst_block[i * block_height / 2 + j] = pack_byte(lo, hi);
+                    }
+                }
+            }
+
+            copy_runtime_info(NodeVector{constant, reshape1, transpose, reshape2}, new_const);
+            replace_node(reshape2, new_const);
+
+            return true;
+        });
+};
+
+}  // namespace pass
+}  // namespace pytorch
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/pytorch/src/transforms/u4_block_repack.hpp b/src/frontends/pytorch/src/transforms/u4_block_repack.hpp
new file mode 100644
index 00000000000000..aa6e00f70e564c
--- /dev/null
+++ b/src/frontends/pytorch/src/transforms/u4_block_repack.hpp
@@ -0,0 +1,24 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/graph_rewrite.hpp"
+#include "openvino/pass/pass.hpp"
+
+namespace ov {
+namespace frontend {
+namespace pytorch {
+namespace pass {
+
+class U4BlockRepack : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ov::frontend::pytorch::pass::U4BlockRepack");
+    U4BlockRepack();
+};
+
+}  // namespace pass
+}  // namespace pytorch
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/pytorch/src/utils.cpp b/src/frontends/pytorch/src/utils.cpp
index afd9b5ebf85216..7decae35b30bbb 100644
--- a/src/frontends/pytorch/src/utils.cpp
+++ b/src/frontends/pytorch/src/utils.cpp
@@ -4,6 +4,7 @@
 
 #include "utils.hpp"
 
+#include "helper_ops/align_types.hpp"
 #include "op_table.hpp"
 #include "openvino/core/rt_info.hpp"
 #include "openvino/frontend/pytorch/decoder.hpp"
@@ -381,33 +382,17 @@ std::unordered_map<size_t, element::Type> bit_to_int{
 };
 }  // namespace
 
-void align_eltwise_input_types(const NodeContext& context, Output<Node>& lhs, Output<Node>& rhs, bool align_scalars) {
+element::Type infer_types(const Output<Node>& lhs, const Output<Node>& rhs, bool align_scalars) {
     const auto& lhs_type = lhs.get_element_type();
     const auto& rhs_type = rhs.get_element_type();
-    auto out_type = context.get_output_type(0);
-    if (out_type.is<element::Type>()) {
-        auto otype = out_type.as<element::Type>();
-        if (otype.is_real()) {
-            if (otype != lhs_type) {
-                lhs = context.mark_node(std::make_shared<ov::op::v0::Convert>(lhs, otype));
-            }
-            if (otype != rhs_type) {
-                rhs = context.mark_node(std::make_shared<ov::op::v0::Convert>(rhs, otype));
-            }
-            return;
-        }
-    }
     if (lhs_type.is_dynamic() || rhs_type.is_dynamic()) {
-        // if any of types is not known, align to lhs type.
-        // TODO: can be fixed with special operation?
-        rhs = context.mark_node(std::make_shared<opset10::ConvertLike>(rhs, lhs));
-        return;
+        return element::dynamic;
     }
 
     // Both types are static, align types. If float and int types are used convert int type to f32, after that align
     // to the largest bitness, if both float or both int, just align bitness
     if (lhs_type == rhs_type)
-        return;
+        return lhs_type;
 
     // if one of operands is scalar, the resulting type is taken from the other operand except when scalar is float
     // type and other operand is int, in that case BOTH operands get fp32 type
@@ -429,11 +414,9 @@ void align_eltwise_input_types(const NodeContext& context, Output<Node>& lhs, Ou
         if (!align_scalars)
             rhs_dst_type = element::f32;
     } else if (is_lhs_scalar && rhs_type != element::boolean) {
-        lhs = context.mark_node(std::make_shared<opset10::ConvertLike>(lhs, rhs));
-        return;
+        return rhs_type;
     } else if (is_rhs_scalar && lhs_type != element::boolean) {
-        rhs = context.mark_node(std::make_shared<opset10::ConvertLike>(rhs, lhs));
-        return;
+        return lhs_type;
     }
 
     if (!lhs_dst_type.is_real() && rhs_dst_type.is_real()) {
@@ -470,13 +453,39 @@ void align_eltwise_input_types(const NodeContext& context, Output<Node>& lhs, Ou
             }
         }
     }
+    return lhs_dst_type;
+}
 
-    // Cast to destination types
-    if (lhs_dst_type != lhs_type) {
-        lhs = context.mark_node(std::make_shared<opset10::Convert>(lhs, lhs_dst_type));
+void align_eltwise_input_types(const NodeContext& context, Output<Node>& lhs, Output<Node>& rhs, bool align_scalars) {
+    const auto& lhs_type = lhs.get_element_type();
+    const auto& rhs_type = rhs.get_element_type();
+    auto out_type = context.get_output_type(0);
+    if (out_type.is<element::Type>()) {
+        auto otype = out_type.as<element::Type>();
+        if (otype.is_real()) {
+            if (otype != lhs_type) {
+                lhs = context.mark_node(std::make_shared<ov::op::v0::Convert>(lhs, otype));
+            }
+            if (otype != rhs_type) {
+                rhs = context.mark_node(std::make_shared<ov::op::v0::Convert>(rhs, otype));
+            }
+            return;
+        }
+    }
+    auto dst_type = infer_types(lhs, rhs, align_scalars);
+    if (dst_type.is_dynamic()) {
+        // We can't decide the type at this point, create a special operation
+        auto at = std::make_shared<AlignTypes>(lhs, rhs, align_scalars);
+        lhs = at->output(0);
+        rhs = at->output(1);
+        return;
+    }
+    // Cast to destination type
+    if (dst_type != lhs_type) {
+        lhs = context.mark_node(std::make_shared<opset10::Convert>(lhs, dst_type));
     }
-    if (rhs_dst_type != rhs_type) {
-        rhs = context.mark_node(std::make_shared<opset10::Convert>(rhs, rhs_dst_type));
+    if (dst_type != rhs_type) {
+        rhs = context.mark_node(std::make_shared<opset10::Convert>(rhs, dst_type));
     }
 }
 
diff --git a/src/frontends/pytorch/src/utils.hpp b/src/frontends/pytorch/src/utils.hpp
index 1635296e612dff..20bae6fa62f5c3 100644
--- a/src/frontends/pytorch/src/utils.hpp
+++ b/src/frontends/pytorch/src/utils.hpp
@@ -7,6 +7,7 @@
 #include "openvino/frontend/pytorch/node_context.hpp"
 #include "openvino/op/constant.hpp"
 #include "openvino/op/convert.hpp"
+#include "openvino/op/convert_like.hpp"
 
 namespace ov {
 
@@ -65,11 +66,11 @@ Any simplified_type_interpret(Any type);
 
 void add_exception_to_fw_node(std::shared_ptr<Node> node, const std::string& msg);
 
+element::Type infer_types(const Output<Node>& lhs, const Output<Node>& rhs, bool align_scalars);
 void align_eltwise_input_types(const NodeContext& context,
                                Output<Node>& lhs,
                                Output<Node>& rhs,
                                bool align_scalars = false);
-
 void align_output_types(const NodeContext& context, OutputVector& outputs);
 
 std::deque<Output<Node>> get_list_as_outputs(const Output<Node>& start);
@@ -125,12 +126,31 @@ OutputVector translate_1to1_match_2_inputs_align_types(const NodeContext& contex
     FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(0) && !context.input_is_none(1), "Inputs should not be None.");
     auto lhs = context.get_input(0);
     auto rhs = context.get_input(1);
-    align_eltwise_input_types(context, lhs, rhs, true);
+    auto lhs_type = context.get_input_type(0);
+    auto rhs_type = context.get_input_type(1);
+    // If type is string or None, we shouldn't align
+    if (!lhs_type.is<type::Str>() && !rhs_type.is<type::Str>() && !lhs_type.is<type::PyNone>() &&
+        !rhs_type.is<type::PyNone>())
+        align_eltwise_input_types(context, lhs, rhs, true);
     OutputVector res = {context.mark_node(std::make_shared<T>(lhs, rhs))};
     align_output_types(context, res);
     return res;
 }
 
+template <typename T, size_t idx = 0>
+OutputVector inplace_translate_1to1_match_2_inputs_align_types(const NodeContext& context) {
+    num_inputs_check(context, 2, 2);
+    FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(0) && !context.input_is_none(1), "Inputs should not be None.");
+    auto lhs = context.get_input(0);
+    auto rhs = context.get_input(1);
+    // For inplace op we know direction of type alignment
+    if (lhs.get_element_type().is_dynamic() || lhs.get_element_type() != rhs.get_element_type())
+        rhs = context.mark_node(std::make_shared<ov::op::v1::ConvertLike>(rhs, lhs));
+    OutputVector res = {context.mark_node(std::make_shared<T>(lhs, rhs))};
+    context.mutate_input(idx, res[0]);
+    return res;
+}
+
 inline OutputVector return_false_scalar(const NodeContext& context) {
     return {context.mark_node(ov::op::v0::Constant::create(element::boolean, Shape{}, {false}))};
 }
@@ -158,6 +178,9 @@ class DummyDecoder : public TorchDecoder {
     virtual PartialShape get_input_shape(size_t index) const override {
         FRONT_END_NOT_IMPLEMENTED(get_input_shape);
     }
+    virtual const std::vector<size_t>& get_input_strides(size_t index) const override {
+        FRONT_END_NOT_IMPLEMENTED(get_input_strides);
+    }
     virtual Any get_input_type(size_t index) const override {
         FRONT_END_NOT_IMPLEMENTED(get_input_type);
     }
@@ -165,7 +188,7 @@ class DummyDecoder : public TorchDecoder {
         FRONT_END_NOT_IMPLEMENTED(get_output_debug_name);
     }
     virtual PartialShape get_output_shape(size_t index) const override {
-        FRONT_END_NOT_IMPLEMENTED(get_output_shape);
+        return PartialShape::dynamic();
     }
     virtual Any get_output_type(size_t index) const override {
         FRONT_END_NOT_IMPLEMENTED(get_output_type);
@@ -186,7 +209,7 @@ class DummyDecoder : public TorchDecoder {
         FRONT_END_NOT_IMPLEMENTED(get_op_type);
     }
     virtual const std::string& get_schema() const override {
-        FRONT_END_NOT_IMPLEMENTED(get_schema);
+        return m_schema;
     }
     virtual size_t num_of_outputs() const override {
         FRONT_END_NOT_IMPLEMENTED(num_of_outputs);
@@ -215,6 +238,9 @@ class DummyDecoder : public TorchDecoder {
     virtual OutputVector inlined_inputs(size_t start_index) const override {
         FRONT_END_NOT_IMPLEMENTED(inlined_inputs);
     }
+
+private:
+    const std::string m_schema = "NONE";
 };
 
 }  // namespace pytorch
diff --git a/src/frontends/pytorch/src/utils_quantize.cpp b/src/frontends/pytorch/src/utils_quantize.cpp
index 5af546f3d5be5d..1346fd76971fcc 100644
--- a/src/frontends/pytorch/src/utils_quantize.cpp
+++ b/src/frontends/pytorch/src/utils_quantize.cpp
@@ -6,6 +6,7 @@
 
 #include "openvino/frontend/pytorch/node_context.hpp"
 #include "openvino/op/broadcast.hpp"
+#include "openvino/op/constant.hpp"
 #include "openvino/op/convert.hpp"
 #include "openvino/op/convert_like.hpp"
 #include "openvino/op/fake_quantize.hpp"
@@ -13,6 +14,7 @@
 #include "openvino/op/reshape.hpp"
 #include "openvino/op/scatter_elements_update.hpp"
 #include "openvino/op/subtract.hpp"
+#include "transformations/utils/utils.hpp"
 
 namespace ov {
 namespace frontend {
@@ -168,6 +170,53 @@ std::shared_ptr<QuantizedPtNode> cast_quantized_fw_node(std::shared_ptr<Node> no
     return quant_node;
 }
 
+std::shared_ptr<Node> u4_compression_stack(const OutputVector& list_elems, int64_t axis) {
+    // Part 1: Detect pattern
+
+    if (list_elems.size() != 2)
+        return nullptr;
+    auto bitwise_and = cast_fw_node(list_elems[0].get_node_shared_ptr(), "aten::bitwise_and");
+    if (!bitwise_and)
+        return nullptr;
+    auto bitwise_shift = cast_fw_node(list_elems[1].get_node_shared_ptr(), "aten::bitwise_right_shift");
+    if (!bitwise_shift)
+        return nullptr;
+
+    auto weights_u8 = std::dynamic_pointer_cast<v0::Constant>(bitwise_and->get_input_node_shared_ptr(0));
+    if (weights_u8 != std::dynamic_pointer_cast<v0::Constant>(bitwise_shift->get_input_node_shared_ptr(0)))
+        return nullptr;
+
+    if (weights_u8->get_output_element_type(0) != element::u8)
+        return nullptr;
+
+    if (axis != -1 && static_cast<uint64_t>(axis) != weights_u8->get_shape().size() - 1)
+        return nullptr;
+
+    if (!ov::op::util::has_constant_value<uint64_t>(bitwise_and->get_input_node_shared_ptr(1), 0x0F))
+        return nullptr;
+
+    if (!ov::op::util::has_constant_value<uint64_t>(bitwise_shift->get_input_node_shared_ptr(1), 4))
+        return nullptr;
+
+    // Pattern detected, weights_u8 is target u8 packed constant with weights
+
+    // Part 2: Form u4 constant by repacking of the original weights_u8
+    // Repacking transformes half of lanes to interleaved representation.
+
+    auto u8_shape = weights_u8->get_shape();
+    size_t full_size = shape_size(u8_shape);
+    auto src = weights_u8->get_data_ptr<uint8_t>();
+
+    auto u4_shape = u8_shape;
+    u4_shape.push_back(2);
+    auto new_const = std::make_shared<v0::Constant>(element::u4, u4_shape);
+    auto dst = const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(new_const->get_data_ptr()));
+
+    std::copy(src, src + full_size, dst);  // TODO: Avoid copying, reuse the same constant
+    copy_runtime_info_and_name(weights_u8, {new_const}, {weights_u8, bitwise_and, bitwise_shift});
+    return new_const;
+}
+
 }  // namespace pytorch
 }  // namespace frontend
 }  // namespace ov
diff --git a/src/frontends/pytorch/src/utils_quantize.hpp b/src/frontends/pytorch/src/utils_quantize.hpp
index 69917e7b8bce3e..e02bce880d2480 100644
--- a/src/frontends/pytorch/src/utils_quantize.hpp
+++ b/src/frontends/pytorch/src/utils_quantize.hpp
@@ -166,6 +166,12 @@ OutputVector quantizable_op(const NodeContext& context) {
 }
 }  // namespace op
 
+/**
+ * Captures aten::stack([aten::bitwise_and(Constant(u8)), aten::bitwise_right_shift(Constant(u8))], dim=-1).
+ * This pattern is transformed to a single Constant with element_type=u4.
+ */
+std::shared_ptr<Node> u4_compression_stack(const OutputVector& list_elems, int64_t axis);
+
 }  // namespace pytorch
 }  // namespace frontend
 }  // namespace ov
diff --git a/src/frontends/tensorflow/README.md b/src/frontends/tensorflow/README.md
index 7fc421cd92c7f4..4a48203a2b41dc 100644
--- a/src/frontends/tensorflow/README.md
+++ b/src/frontends/tensorflow/README.md
@@ -205,6 +205,7 @@ py.test tensorflow_tests/test_tf_Unique.py --use_new_frontend
 ```
 
 ## See also
+ * [Supported Operations](./docs/supported_ops.md)
  * [OpenVINO README](../../../README.md)
  * [OpenVINO Core Components](../../README.md)
  * [Developer documentation](../../../docs/dev/index.md)
diff --git a/src/frontends/tensorflow/docs/supported_ops.md b/src/frontends/tensorflow/docs/supported_ops.md
new file mode 100644
index 00000000000000..5794e3f16653fd
--- /dev/null
+++ b/src/frontends/tensorflow/docs/supported_ops.md
@@ -0,0 +1,1406 @@
+# TensorFlow Operations Supported by OpenVINO TensorFlow Frontend 
+
+Here is a table of operations supported by the TensorFlow Frontend from [tf.raw_ops](https://www.tensorflow.org/api_docs/python/tf/raw_ops).
+A "supported operation" is one that TensorFlow Frontend can convert to the OpenVINO representation.
+
+| Operation Name                                          | Supported                     | Limitation                    |
+|---------------------------------------------------------|-------------------------------|-------------------------------|
+| Abort                                                   | NO                            |                               |
+| Abs                                                     | YES                           |                               |
+| AccumulateNV2                                           | NO                            |                               |
+| AccumulatorApplyGradient                                | NO                            |                               |
+| AccumulatorNumAccumulated                               | NO                            |                               |
+| AccumulatorSetGlobalStep                                | NO                            |                               |
+| AccumulatorTakeGradient                                 | NO                            |                               |
+| Acos                                                    | YES                           |                               |
+| Acosh                                                   | YES                           |                               |
+| Add                                                     | YES                           |                               |
+| AddManySparseToTensorsMap                               | NO                            |                               |
+| AddN                                                    | YES                           |                               |
+| AddSparseToTensorsMap                                   | NO                            |                               |
+| AddV2                                                   | YES                           |                               |
+| AdjustContrast                                          | NO                            |                               |
+| AdjustContrastv2                                        | YES                           |                               |
+| AdjustHue                                               | NO                            |                               |
+| AdjustSaturation                                        | NO                            |                               |
+| All                                                     | YES                           |                               |
+| AllCandidateSampler                                     | NO                            |                               |
+| AllToAll                                                | NO                            |                               |
+| Angle                                                   | NO                            |                               |
+| AnonymousHashTable                                      | NO                            |                               |
+| AnonymousIterator                                       | NO                            |                               |
+| AnonymousIteratorV2                                     | NO                            |                               |
+| AnonymousIteratorV3                                     | NO                            |                               |
+| AnonymousMemoryCache                                    | NO                            |                               |
+| AnonymousMultiDeviceIterator                            | NO                            |                               |
+| AnonymousMultiDeviceIteratorV3                          | NO                            |                               |
+| AnonymousMutableDenseHashTable                          | NO                            |                               |
+| AnonymousMutableHashTable                               | NO                            |                               |
+| AnonymousMutableHashTableOfTensors                      | NO                            |                               |
+| AnonymousRandomSeedGenerator                            | NO                            |                               |
+| AnonymousSeedGenerator                                  | NO                            |                               |
+| Any                                                     | YES                           |                               |
+| ApplyAdaMax                                             | NO                            |                               |
+| ApplyAdadelta                                           | NO                            |                               |
+| ApplyAdagrad                                            | NO                            |                               |
+| ApplyAdagradDA                                          | NO                            |                               |
+| ApplyAdagradV2                                          | NO                            |                               |
+| ApplyAdam                                               | NO                            |                               |
+| ApplyAddSign                                            | NO                            |                               |
+| ApplyCenteredRMSProp                                    | NO                            |                               |
+| ApplyFtrl                                               | NO                            |                               |
+| ApplyFtrlV2                                             | NO                            |                               |
+| ApplyGradientDescent                                    | NO                            |                               |
+| ApplyMomentum                                           | NO                            |                               |
+| ApplyPowerSign                                          | NO                            |                               |
+| ApplyProximalAdagrad                                    | NO                            |                               |
+| ApplyProximalGradientDescent                            | NO                            |                               |
+| ApplyRMSProp                                            | NO                            |                               |
+| ApproxTopK                                              | NO                            |                               |
+| ApproximateEqual                                        | NO                            |                               |
+| ArgMax                                                  | YES                           |                               |
+| ArgMin                                                  | YES                           |                               |
+| AsString                                                | NO                            |                               |
+| Asin                                                    | YES                           |                               |
+| Asinh                                                   | YES                           |                               |
+| Assert                                                  | YES                           |                               |
+| AssertCardinalityDataset                                | NO                            |                               |
+| AssertNextDataset                                       | NO                            |                               |
+| AssertPrevDataset                                       | NO                            |                               |
+| Assign                                                  | YES                           |                               |
+| AssignAdd                                               | NO                            |                               |
+| AssignAddVariableOp                                     | NO                            |                               |
+| AssignSub                                               | NO                            |                               |
+| AssignSubVariableOp                                     | NO                            |                               |
+| AssignVariableOp                                        | YES                           |                               |
+| AssignVariableXlaConcatND                               | NO                            |                               |
+| Atan                                                    | YES                           |                               |
+| Atan2                                                   | NO                            |                               |
+| Atanh                                                   | YES                           |                               |
+| AudioSpectrogram                                        | NO                            |                               |
+| AudioSummary                                            | NO                            |                               |
+| AudioSummaryV2                                          | NO                            |                               |
+| AutoShardDataset                                        | NO                            |                               |
+| AvgPool                                                 | YES                           |                               |
+| AvgPool3D                                               | YES                           |                               |
+| AvgPool3DGrad                                           | NO                            |                               |
+| AvgPoolGrad                                             | NO                            |                               |
+| BandedTriangularSolve                                   | NO                            |                               |
+| Barrier                                                 | NO                            |                               |
+| BarrierClose                                            | NO                            |                               |
+| BarrierIncompleteSize                                   | NO                            |                               |
+| BarrierInsertMany                                       | NO                            |                               |
+| BarrierReadySize                                        | NO                            |                               |
+| BarrierTakeMany                                         | NO                            |                               |
+| Batch                                                   | NO                            |                               |
+| BatchCholesky                                           | NO                            |                               |
+| BatchCholeskyGrad                                       | NO                            |                               |
+| BatchDataset                                            | NO                            |                               |
+| BatchDatasetV2                                          | NO                            |                               |
+| BatchFFT                                                | NO                            |                               |
+| BatchFFT2D                                              | NO                            |                               |
+| BatchFFT3D                                              | NO                            |                               |
+| BatchFunction                                           | NO                            |                               |
+| BatchIFFT                                               | NO                            |                               |
+| BatchIFFT2D                                             | NO                            |                               |
+| BatchIFFT3D                                             | NO                            |                               |
+| BatchMatMul                                             | YES                           |                               |
+| BatchMatMulV2                                           | YES                           |                               |
+| BatchMatMulV3                                           | YES                           |                               |
+| BatchMatrixBandPart                                     | NO                            |                               |
+| BatchMatrixDeterminant                                  | NO                            |                               |
+| BatchMatrixDiag                                         | NO                            |                               |
+| BatchMatrixDiagPart                                     | NO                            |                               |
+| BatchMatrixInverse                                      | NO                            |                               |
+| BatchMatrixSetDiag                                      | NO                            |                               |
+| BatchMatrixSolve                                        | NO                            |                               |
+| BatchMatrixSolveLs                                      | NO                            |                               |
+| BatchMatrixTriangularSolve                              | NO                            |                               |
+| BatchNormWithGlobalNormalization                        | NO                            |                               |
+| BatchNormWithGlobalNormalizationGrad                    | NO                            |                               |
+| BatchSelfAdjointEig                                     | NO                            |                               |
+| BatchSelfAdjointEigV2                                   | NO                            |                               |
+| BatchSvd                                                | NO                            |                               |
+| BatchToSpace                                            | NO                            |                               |
+| BatchToSpaceND                                          | YES                           |                               |
+| BesselI0                                                | NO                            |                               |
+| BesselI0e                                               | NO                            |                               |
+| BesselI1                                                | NO                            |                               |
+| BesselI1e                                               | NO                            |                               |
+| BesselJ0                                                | NO                            |                               |
+| BesselJ1                                                | NO                            |                               |
+| BesselK0                                                | NO                            |                               |
+| BesselK0e                                               | NO                            |                               |
+| BesselK1                                                | NO                            |                               |
+| BesselK1e                                               | NO                            |                               |
+| BesselY0                                                | NO                            |                               |
+| BesselY1                                                | NO                            |                               |
+| Betainc                                                 | NO                            |                               |
+| BiasAdd                                                 | YES                           |                               |
+| BiasAddGrad                                             | NO                            |                               |
+| BiasAddV1                                               | NO                            |                               |
+| Bincount                                                | NO                            |                               |
+| Bitcast                                                 | NO                            |                               |
+| BitwiseAnd                                              | NO                            |                               |
+| BitwiseOr                                               | NO                            |                               |
+| BitwiseXor                                              | NO                            |                               |
+| BlockLSTM                                               | YES                           |                               |
+| BlockLSTMGrad                                           | NO                            |                               |
+| BlockLSTMGradV2                                         | NO                            |                               |
+| BlockLSTMV2                                             | NO                            |                               |
+| BoostedTreesAggregateStats                              | NO                            |                               |
+| BoostedTreesBucketize                                   | NO                            |                               |
+| BoostedTreesCalculateBestFeatureSplit                   | NO                            |                               |
+| BoostedTreesCalculateBestFeatureSplitV2                 | NO                            |                               |
+| BoostedTreesCalculateBestGainsPerFeature                | NO                            |                               |
+| BoostedTreesCenterBias                                  | NO                            |                               |
+| BoostedTreesCreateEnsemble                              | NO                            |                               |
+| BoostedTreesCreateQuantileStreamResource                | NO                            |                               |
+| BoostedTreesDeserializeEnsemble                         | NO                            |                               |
+| BoostedTreesEnsembleResourceHandleOp                    | NO                            |                               |
+| BoostedTreesExampleDebugOutputs                         | NO                            |                               |
+| BoostedTreesFlushQuantileSummaries                      | NO                            |                               |
+| BoostedTreesGetEnsembleStates                           | NO                            |                               |
+| BoostedTreesMakeQuantileSummaries                       | NO                            |                               |
+| BoostedTreesMakeStatsSummary                            | NO                            |                               |
+| BoostedTreesPredict                                     | NO                            |                               |
+| BoostedTreesQuantileStreamResourceAddSummaries          | NO                            |                               |
+| BoostedTreesQuantileStreamResourceDeserialize           | NO                            |                               |
+| BoostedTreesQuantileStreamResourceFlush                 | NO                            |                               |
+| BoostedTreesQuantileStreamResourceGetBucketBoundaries   | NO                            |                               |
+| BoostedTreesQuantileStreamResourceHandleOp              | NO                            |                               |
+| BoostedTreesSerializeEnsemble                           | NO                            |                               |
+| BoostedTreesSparseAggregateStats                        | NO                            |                               |
+| BoostedTreesSparseCalculateBestFeatureSplit             | NO                            |                               |
+| BoostedTreesTrainingPredict                             | NO                            |                               |
+| BoostedTreesUpdateEnsemble                              | NO                            |                               |
+| BoostedTreesUpdateEnsembleV2                            | NO                            |                               |
+| BroadcastArgs                                           | YES                           |                               |
+| BroadcastGradientArgs                                   | NO                            |                               |
+| BroadcastTo                                             | YES                           |                               |
+| Bucketize                                               | YES                           |                               |
+| BytesProducedStatsDataset                               | NO                            |                               |
+| CSRSparseMatrixComponents                               | NO                            |                               |
+| CSRSparseMatrixToDense                                  | NO                            |                               |
+| CSRSparseMatrixToSparseTensor                           | NO                            |                               |
+| CSVDataset                                              | NO                            |                               |
+| CSVDatasetV2                                            | NO                            |                               |
+| CTCBeamSearchDecoder                                    | NO                            |                               |
+| CTCGreedyDecoder                                        | YES                           |                               |
+| CTCLoss                                                 | YES                           |                               |
+| CTCLossV2                                               | NO                            |                               |
+| CacheDataset                                            | NO                            |                               |
+| CacheDatasetV2                                          | NO                            |                               |
+| Case                                                    | NO                            |                               |
+| Cast                                                    | YES                           |                               |
+| Ceil                                                    | YES                           |                               |
+| CheckNumerics                                           | YES                           |                               |
+| CheckNumericsV2                                         | YES                           |                               |
+| Cholesky                                                | NO                            |                               |
+| CholeskyGrad                                            | NO                            |                               |
+| ChooseFastestBranchDataset                              | NO                            |                               |
+| ChooseFastestDataset                                    | NO                            |                               |
+| ClipByValue                                             | YES                           |                               |
+| CloseSummaryWriter                                      | NO                            |                               |
+| CollectiveAllToAllV2                                    | NO                            |                               |
+| CollectiveAllToAllV3                                    | NO                            |                               |
+| CollectiveAssignGroupV2                                 | NO                            |                               |
+| CollectiveBcastRecv                                     | NO                            |                               |
+| CollectiveBcastRecvV2                                   | NO                            |                               |
+| CollectiveBcastSend                                     | NO                            |                               |
+| CollectiveBcastSendV2                                   | NO                            |                               |
+| CollectiveGather                                        | NO                            |                               |
+| CollectiveGatherV2                                      | NO                            |                               |
+| CollectiveInitializeCommunicator                        | NO                            |                               |
+| CollectivePermute                                       | NO                            |                               |
+| CollectiveReduce                                        | NO                            |                               |
+| CollectiveReduceScatterV2                               | NO                            |                               |
+| CollectiveReduceV2                                      | NO                            |                               |
+| CollectiveReduceV3                                      | NO                            |                               |
+| CombinedNonMaxSuppression                               | NO                            |                               |
+| Complex                                                 | NO                            |                               |
+| ComplexAbs                                              | NO                            |                               |
+| CompositeTensorVariantFromComponents                    | NO                            |                               |
+| CompositeTensorVariantToComponents                      | NO                            |                               |
+| CompressElement                                         | NO                            |                               |
+| ComputeAccidentalHits                                   | NO                            |                               |
+| ComputeBatchSize                                        | NO                            |                               |
+| Concat                                                  | YES                           |                               |
+| ConcatOffset                                            | NO                            |                               |
+| ConcatV2                                                | YES                           |                               |
+| ConcatenateDataset                                      | NO                            |                               |
+| ConditionalAccumulator                                  | NO                            |                               |
+| ConfigureDistributedTPU                                 | NO                            |                               |
+| ConfigureTPUEmbedding                                   | NO                            |                               |
+| Conj                                                    | NO                            |                               |
+| ConjugateTranspose                                      | NO                            |                               |
+| Const                                                   | YES                           |                               |
+| ConsumeMutexLock                                        | NO                            |                               |
+| ControlTrigger                                          | NO                            |                               |
+| Conv                                                    | NO                            |                               |
+| Conv2D                                                  | YES                           |                               |
+| Conv2DBackpropFilter                                    | NO                            |                               |
+| Conv2DBackpropFilterV2                                  | NO                            |                               |
+| Conv2DBackpropInput                                     | YES                           |                               |
+| Conv2DBackpropInputV2                                   | NO                            |                               |
+| Conv3D                                                  | YES                           |                               |
+| Conv3DBackpropFilter                                    | NO                            |                               |
+| Conv3DBackpropFilterV2                                  | NO                            |                               |
+| Conv3DBackpropInput                                     | NO                            |                               |
+| Conv3DBackpropInputV2                                   | YES                           |                               |
+| Copy                                                    | NO                            |                               |
+| CopyHost                                                | NO                            |                               |
+| Cos                                                     | YES                           |                               |
+| Cosh                                                    | YES                           |                               |
+| CountUpTo                                               | NO                            |                               |
+| CreateSummaryDbWriter                                   | NO                            |                               |
+| CreateSummaryFileWriter                                 | NO                            |                               |
+| CropAndResize                                           | YES                           |                               |
+| CropAndResizeGradBoxes                                  | NO                            |                               |
+| CropAndResizeGradImage                                  | NO                            |                               |
+| Cross                                                   | NO                            |                               |
+| CrossReplicaSum                                         | NO                            |                               |
+| CudnnRNN                                                | NO                            |                               |
+| CudnnRNNBackprop                                        | NO                            |                               |
+| CudnnRNNBackpropV2                                      | NO                            |                               |
+| CudnnRNNBackpropV3                                      | NO                            |                               |
+| CudnnRNNCanonicalToParams                               | NO                            |                               |
+| CudnnRNNCanonicalToParamsV2                             | NO                            |                               |
+| CudnnRNNParamsSize                                      | NO                            |                               |
+| CudnnRNNParamsToCanonical                               | NO                            |                               |
+| CudnnRNNParamsToCanonicalV2                             | NO                            |                               |
+| CudnnRNNV2                                              | NO                            |                               |
+| CudnnRNNV3                                              | NO                            |                               |
+| Cumprod                                                 | NO                            |                               |
+| Cumsum                                                  | YES                           |                               |
+| CumulativeLogsumexp                                     | NO                            |                               |
+| DataFormatDimMap                                        | NO                            |                               |
+| DataFormatVecPermute                                    | NO                            |                               |
+| DataServiceDataset                                      | NO                            |                               |
+| DataServiceDatasetV2                                    | NO                            |                               |
+| DataServiceDatasetV3                                    | NO                            |                               |
+| DataServiceDatasetV4                                    | NO                            |                               |
+| DatasetCardinality                                      | NO                            |                               |
+| DatasetFromGraph                                        | NO                            |                               |
+| DatasetToGraph                                          | NO                            |                               |
+| DatasetToGraphV2                                        | NO                            |                               |
+| DatasetToSingleElement                                  | NO                            |                               |
+| DatasetToTFRecord                                       | NO                            |                               |
+| Dawsn                                                   | NO                            |                               |
+| DebugGradientIdentity                                   | NO                            |                               |
+| DebugGradientRefIdentity                                | NO                            |                               |
+| DebugIdentity                                           | NO                            |                               |
+| DebugIdentityV2                                         | NO                            |                               |
+| DebugIdentityV3                                         | NO                            |                               |
+| DebugNanCount                                           | NO                            |                               |
+| DebugNumericSummary                                     | NO                            |                               |
+| DebugNumericSummaryV2                                   | NO                            |                               |
+| DecodeAndCropJpeg                                       | NO                            |                               |
+| DecodeBase64                                            | NO                            |                               |
+| DecodeBmp                                               | NO                            |                               |
+| DecodeCSV                                               | NO                            |                               |
+| DecodeCompressed                                        | NO                            |                               |
+| DecodeGif                                               | NO                            |                               |
+| DecodeImage                                             | NO                            |                               |
+| DecodeJSONExample                                       | NO                            |                               |
+| DecodeJpeg                                              | NO                            |                               |
+| DecodePaddedRaw                                         | NO                            |                               |
+| DecodePng                                               | NO                            |                               |
+| DecodeProtoV2                                           | NO                            |                               |
+| DecodeRaw                                               | NO                            |                               |
+| DecodeWav                                               | NO                            |                               |
+| DeepCopy                                                | NO                            |                               |
+| DeleteIterator                                          | NO                            |                               |
+| DeleteMemoryCache                                       | NO                            |                               |
+| DeleteMultiDeviceIterator                               | NO                            |                               |
+| DeleteRandomSeedGenerator                               | NO                            |                               |
+| DeleteSeedGenerator                                     | NO                            |                               |
+| DeleteSessionTensor                                     | NO                            |                               |
+| DenseBincount                                           | NO                            |                               |
+| DenseCountSparseOutput                                  | NO                            |                               |
+| DenseToCSRSparseMatrix                                  | NO                            |                               |
+| DenseToDenseSetOperation                                | NO                            |                               |
+| DenseToSparseBatchDataset                               | NO                            |                               |
+| DenseToSparseSetOperation                               | NO                            |                               |
+| DepthToSpace                                            | YES                           |                               |
+| DepthwiseConv2dNative                                   | YES                           |                               |
+| DepthwiseConv2dNativeBackpropFilter                     | NO                            |                               |
+| DepthwiseConv2dNativeBackpropInput                      | NO                            |                               |
+| Dequantize                                              | NO                            |                               |
+| DeserializeIterator                                     | NO                            |                               |
+| DeserializeManySparse                                   | NO                            |                               |
+| DeserializeSparse                                       | NO                            |                               |
+| DestroyResourceOp                                       | NO                            |                               |
+| DestroyTemporaryVariable                                | NO                            |                               |
+| DeviceIndex                                             | NO                            |                               |
+| Diag                                                    | NO                            |                               |
+| DiagPart                                                | NO                            |                               |
+| Digamma                                                 | NO                            |                               |
+| Dilation2D                                              | NO                            |                               |
+| Dilation2DBackpropFilter                                | NO                            |                               |
+| Dilation2DBackpropInput                                 | NO                            |                               |
+| DirectedInterleaveDataset                               | NO                            |                               |
+| DisableCopyOnRead                                       | NO                            |                               |
+| DistributedSave                                         | NO                            |                               |
+| Div                                                     | NO                            |                               |
+| DivNoNan                                                | YES                           |                               |
+| DrawBoundingBoxes                                       | NO                            |                               |
+| DrawBoundingBoxesV2                                     | NO                            |                               |
+| DummyIterationCounter                                   | NO                            |                               |
+| DummyMemoryCache                                        | NO                            |                               |
+| DummySeedGenerator                                      | NO                            |                               |
+| DynamicEnqueueTPUEmbeddingArbitraryTensorBatch          | NO                            |                               |
+| DynamicPartition                                        | YES                           |                               |
+| DynamicStitch                                           | YES                           |                               |
+| EagerPyFunc                                             | NO                            |                               |
+| EditDistance                                            | NO                            |                               |
+| Eig                                                     | NO                            |                               |
+| Einsum                                                  | YES                           |                               |
+| Elu                                                     | YES                           |                               |
+| EluGrad                                                 | NO                            |                               |
+| Empty                                                   | NO                            |                               |
+| EmptyTensorList                                         | YES                           |                               |
+| EncodeBase64                                            | NO                            |                               |
+| EncodeJpeg                                              | NO                            |                               |
+| EncodeJpegVariableQuality                               | NO                            |                               |
+| EncodePng                                               | NO                            |                               |
+| EncodeProto                                             | NO                            |                               |
+| EncodeWav                                               | NO                            |                               |
+| EnqueueTPUEmbeddingArbitraryTensorBatch                 | NO                            |                               |
+| EnqueueTPUEmbeddingIntegerBatch                         | NO                            |                               |
+| EnqueueTPUEmbeddingRaggedTensorBatch                    | NO                            |                               |
+| EnqueueTPUEmbeddingSparseBatch                          | NO                            |                               |
+| EnqueueTPUEmbeddingSparseTensorBatch                    | NO                            |                               |
+| EnsureShape                                             | YES                           |                               |
+| Enter                                                   | YES                           |                               |
+| Equal                                                   | YES                           |                               |
+| Erf                                                     | YES                           |                               |
+| Erfc                                                    | NO                            |                               |
+| Erfinv                                                  | NO                            |                               |
+| EuclideanNorm                                           | YES                           |                               |
+| Exit                                                    | YES                           |                               |
+| Exp                                                     | YES                           |                               |
+| ExpandDims                                              | YES                           |                               |
+| ExperimentalAssertNextDataset                           | NO                            |                               |
+| ExperimentalAutoShardDataset                            | NO                            |                               |
+| ExperimentalBytesProducedStatsDataset                   | NO                            |                               |
+| ExperimentalCSVDataset                                  | NO                            |                               |
+| ExperimentalChooseFastestDataset                        | NO                            |                               |
+| ExperimentalDatasetCardinality                          | NO                            |                               |
+| ExperimentalDatasetToTFRecord                           | NO                            |                               |
+| ExperimentalDenseToSparseBatchDataset                   | NO                            |                               |
+| ExperimentalDirectedInterleaveDataset                   | NO                            |                               |
+| ExperimentalGroupByReducerDataset                       | NO                            |                               |
+| ExperimentalGroupByWindowDataset                        | NO                            |                               |
+| ExperimentalIgnoreErrorsDataset                         | NO                            |                               |
+| ExperimentalIteratorGetDevice                           | NO                            |                               |
+| ExperimentalLMDBDataset                                 | NO                            |                               |
+| ExperimentalLatencyStatsDataset                         | NO                            |                               |
+| ExperimentalMapAndBatchDataset                          | NO                            |                               |
+| ExperimentalMapDataset                                  | NO                            |                               |
+| ExperimentalMatchingFilesDataset                        | NO                            |                               |
+| ExperimentalMaxIntraOpParallelismDataset                | NO                            |                               |
+| ExperimentalNonSerializableDataset                      | NO                            |                               |
+| ExperimentalParallelInterleaveDataset                   | NO                            |                               |
+| ExperimentalParseExampleDataset                         | NO                            |                               |
+| ExperimentalPrivateThreadPoolDataset                    | NO                            |                               |
+| ExperimentalRandomDataset                               | NO                            |                               |
+| ExperimentalRebatchDataset                              | NO                            |                               |
+| ExperimentalScanDataset                                 | NO                            |                               |
+| ExperimentalSetStatsAggregatorDataset                   | NO                            |                               |
+| ExperimentalSleepDataset                                | NO                            |                               |
+| ExperimentalSlidingWindowDataset                        | NO                            |                               |
+| ExperimentalSqlDataset                                  | NO                            |                               |
+| ExperimentalStatsAggregatorHandle                       | NO                            |                               |
+| ExperimentalStatsAggregatorSummary                      | NO                            |                               |
+| ExperimentalTakeWhileDataset                            | NO                            |                               |
+| ExperimentalThreadPoolDataset                           | NO                            |                               |
+| ExperimentalThreadPoolHandle                            | NO                            |                               |
+| ExperimentalUnbatchDataset                              | NO                            |                               |
+| ExperimentalUniqueDataset                               | NO                            |                               |
+| Expint                                                  | NO                            |                               |
+| Expm1                                                   | NO                            |                               |
+| ExtractGlimpse                                          | NO                            |                               |
+| ExtractGlimpseV2                                        | NO                            |                               |
+| ExtractImagePatches                                     | YES                           |                               |
+| ExtractJpegShape                                        | NO                            |                               |
+| ExtractVolumePatches                                    | NO                            |                               |
+| FFT                                                     | NO                            |                               |
+| FFT2D                                                   | NO                            |                               |
+| FFT3D                                                   | NO                            |                               |
+| FIFOQueue                                               | YES                           |                               |
+| FIFOQueueV2                                             | YES                           |                               |
+| Fact                                                    | NO                            |                               |
+| FakeParam                                               | NO                            |                               |
+| FakeQuantWithMinMaxArgs                                 | YES                           |                               |
+| FakeQuantWithMinMaxArgsGradient                         | NO                            |                               |
+| FakeQuantWithMinMaxVars                                 | YES                           |                               |
+| FakeQuantWithMinMaxVarsGradient                         | NO                            |                               |
+| FakeQuantWithMinMaxVarsPerChannel                       | YES                           |                               |
+| FakeQuantWithMinMaxVarsPerChannelGradient               | NO                            |                               |
+| FakeQueue                                               | NO                            |                               |
+| Fill                                                    | YES                           |                               |
+| FilterByLastComponentDataset                            | NO                            |                               |
+| FilterDataset                                           | NO                            |                               |
+| FinalizeDataset                                         | NO                            |                               |
+| Fingerprint                                             | NO                            |                               |
+| FixedLengthRecordDataset                                | NO                            |                               |
+| FixedLengthRecordDatasetV2                              | NO                            |                               |
+| FixedLengthRecordReader                                 | NO                            |                               |
+| FixedLengthRecordReaderV2                               | NO                            |                               |
+| FixedUnigramCandidateSampler                            | NO                            |                               |
+| FlatMapDataset                                          | NO                            |                               |
+| Floor                                                   | YES                           |                               |
+| FloorDiv                                                | YES                           |                               |
+| FloorMod                                                | YES                           |                               |
+| FlushSummaryWriter                                      | NO                            |                               |
+| For                                                     | NO                            |                               |
+| FractionalAvgPool                                       | NO                            |                               |
+| FractionalAvgPoolGrad                                   | NO                            |                               |
+| FractionalMaxPool                                       | NO                            |                               |
+| FractionalMaxPoolGrad                                   | NO                            |                               |
+| FresnelCos                                              | NO                            |                               |
+| FresnelSin                                              | NO                            |                               |
+| FusedBatchNorm                                          | YES                           |                               |
+| FusedBatchNormGrad                                      | NO                            |                               |
+| FusedBatchNormGradV2                                    | NO                            |                               |
+| FusedBatchNormGradV3                                    | NO                            |                               |
+| FusedBatchNormV2                                        | YES                           |                               |
+| FusedBatchNormV3                                        | YES                           |                               |
+| FusedPadConv2D                                          | NO                            |                               |
+| FusedResizeAndPadConv2D                                 | NO                            |                               |
+| GRUBlockCell                                            | YES                           |                               |
+| GRUBlockCellGrad                                        | NO                            |                               |
+| Gather                                                  | YES                           |                               |
+| GatherNd                                                | YES                           |                               |
+| GatherV2                                                | YES                           |                               |
+| GenerateBoundingBoxProposals                            | NO                            |                               |
+| GenerateVocabRemapping                                  | NO                            |                               |
+| GeneratorDataset                                        | NO                            |                               |
+| GetElementAtIndex                                       | NO                            |                               |
+| GetOptions                                              | NO                            |                               |
+| GetSessionHandle                                        | NO                            |                               |
+| GetSessionHandleV2                                      | NO                            |                               |
+| GetSessionTensor                                        | NO                            |                               |
+| Greater                                                 | YES                           |                               |
+| GreaterEqual                                            | YES                           |                               |
+| GroupByReducerDataset                                   | NO                            |                               |
+| GroupByWindowDataset                                    | NO                            |                               |
+| GuaranteeConst                                          | NO                            |                               |
+| HSVToRGB                                                | NO                            |                               |
+| HashTable                                               | YES                           |                               |
+| HashTableV2                                             | YES                           |                               |
+| HistogramFixedWidth                                     | NO                            |                               |
+| HistogramSummary                                        | NO                            |                               |
+| IFFT                                                    | NO                            |                               |
+| IFFT2D                                                  | NO                            |                               |
+| IFFT3D                                                  | NO                            |                               |
+| IRFFT                                                   | NO                            |                               |
+| IRFFT2D                                                 | NO                            |                               |
+| IRFFT3D                                                 | NO                            |                               |
+| Identity                                                | YES                           |                               |
+| IdentityN                                               | YES                           |                               |
+| IdentityReader                                          | NO                            |                               |
+| IdentityReaderV2                                        | NO                            |                               |
+| If                                                      | YES                           |                               |
+| Igamma                                                  | NO                            |                               |
+| IgammaGradA                                             | NO                            |                               |
+| Igammac                                                 | NO                            |                               |
+| IgnoreErrorsDataset                                     | NO                            |                               |
+| Imag                                                    | NO                            |                               |
+| ImageProjectiveTransformV2                              | NO                            |                               |
+| ImageProjectiveTransformV3                              | NO                            |                               |
+| ImageSummary                                            | NO                            |                               |
+| ImmutableConst                                          | NO                            |                               |
+| ImportEvent                                             | NO                            |                               |
+| InTopK                                                  | NO                            |                               |
+| InTopKV2                                                | NO                            |                               |
+| InfeedDequeue                                           | NO                            |                               |
+| InfeedDequeueTuple                                      | NO                            |                               |
+| InfeedEnqueue                                           | NO                            |                               |
+| InfeedEnqueuePrelinearizedBuffer                        | NO                            |                               |
+| InfeedEnqueueTuple                                      | NO                            |                               |
+| InitializeTable                                         | NO                            |                               |
+| InitializeTableFromDataset                              | NO                            |                               |
+| InitializeTableFromTextFile                             | NO                            |                               |
+| InitializeTableFromTextFileV2                           | NO                            |                               |
+| InitializeTableV2                                       | NO                            |                               |
+| InplaceAdd                                              | NO                            |                               |
+| InplaceSub                                              | NO                            |                               |
+| InplaceUpdate                                           | NO                            |                               |
+| InterleaveDataset                                       | NO                            |                               |
+| Inv                                                     | NO                            |                               |
+| InvGrad                                                 | NO                            |                               |
+| Invert                                                  | NO                            |                               |
+| InvertPermutation                                       | YES                           |                               |
+| IsBoostedTreesEnsembleInitialized                       | NO                            |                               |
+| IsBoostedTreesQuantileStreamResourceInitialized         | NO                            |                               |
+| IsFinite                                                | YES                           |                               |
+| IsInf                                                   | YES                           |                               |
+| IsNan                                                   | YES                           |                               |
+| IsTPUEmbeddingInitialized                               | NO                            |                               |
+| IsVariableInitialized                                   | YES                           |                               |
+| IsotonicRegression                                      | NO                            |                               |
+| Iterator                                                | YES                           |                               |
+| IteratorFromStringHandle                                | NO                            |                               |
+| IteratorFromStringHandleV2                              | NO                            |                               |
+| IteratorGetDevice                                       | NO                            |                               |
+| IteratorGetNext                                         | YES                           |                               |
+| IteratorGetNextAsOptional                               | NO                            |                               |
+| IteratorGetNextSync                                     | NO                            |                               |
+| IteratorToStringHandle                                  | NO                            |                               |
+| IteratorV2                                              | YES                           |                               |
+| L2Loss                                                  | YES                           |                               |
+| LMDBDataset                                             | NO                            |                               |
+| LMDBReader                                              | NO                            |                               |
+| LRN                                                     | YES                           |                               |
+| LRNGrad                                                 | NO                            |                               |
+| LSTMBlockCell                                           | NO                            |                               |
+| LSTMBlockCellGrad                                       | NO                            |                               |
+| LatencyStatsDataset                                     | NO                            |                               |
+| LeakyRelu                                               | YES                           |                               |
+| LeakyReluGrad                                           | NO                            |                               |
+| LearnedUnigramCandidateSampler                          | NO                            |                               |
+| LeftShift                                               | NO                            |                               |
+| LegacyParallelInterleaveDatasetV2                       | NO                            |                               |
+| Less                                                    | YES                           |                               |
+| LessEqual                                               | YES                           |                               |
+| Lgamma                                                  | NO                            |                               |
+| LinSpace                                                | YES                           |                               |
+| ListDataset                                             | NO                            |                               |
+| ListDiff                                                | YES                           |                               |
+| LoadAndRemapMatrix                                      | NO                            |                               |
+| LoadDataset                                             | NO                            |                               |
+| LoadTPUEmbeddingADAMParameters                          | NO                            |                               |
+| LoadTPUEmbeddingAdadeltaParameters                      | NO                            |                               |
+| LoadTPUEmbeddingAdagradMomentumParameters               | NO                            |                               |
+| LoadTPUEmbeddingAdagradParameters                       | NO                            |                               |
+| LoadTPUEmbeddingCenteredRMSPropParameters               | NO                            |                               |
+| LoadTPUEmbeddingFTRLParameters                          | NO                            |                               |
+| LoadTPUEmbeddingFrequencyEstimatorParameters            | NO                            |                               |
+| LoadTPUEmbeddingMDLAdagradLightParameters               | NO                            |                               |
+| LoadTPUEmbeddingMomentumParameters                      | NO                            |                               |
+| LoadTPUEmbeddingProximalAdagradParameters               | NO                            |                               |
+| LoadTPUEmbeddingProximalYogiParameters                  | NO                            |                               |
+| LoadTPUEmbeddingRMSPropParameters                       | NO                            |                               |
+| LoadTPUEmbeddingStochasticGradientDescentParameters     | NO                            |                               |
+| Log                                                     | YES                           |                               |
+| Log1p                                                   | YES                           |                               |
+| LogMatrixDeterminant                                    | NO                            |                               |
+| LogSoftmax                                              | YES                           |                               |
+| LogUniformCandidateSampler                              | NO                            |                               |
+| LogicalAnd                                              | YES                           |                               |
+| LogicalNot                                              | YES                           |                               |
+| LogicalOr                                               | YES                           |                               |
+| LookupTableExport                                       | NO                            |                               |
+| LookupTableExportV2                                     | NO                            |                               |
+| LookupTableFind                                         | NO                            |                               |
+| LookupTableFindV2                                       | NO                            |                               |
+| LookupTableImport                                       | NO                            |                               |
+| LookupTableImportV2                                     | NO                            |                               |
+| LookupTableInsert                                       | YES                           |                               |
+| LookupTableInsertV2                                     | YES                           |                               |
+| LookupTableRemoveV2                                     | NO                            |                               |
+| LookupTableSize                                         | NO                            |                               |
+| LookupTableSizeV2                                       | NO                            |                               |
+| LoopCond                                                | YES                           |                               |
+| LowerBound                                              | NO                            |                               |
+| Lu                                                      | NO                            |                               |
+| MakeIterator                                            | NO                            |                               |
+| MapAndBatchDataset                                      | NO                            |                               |
+| MapClear                                                | NO                            |                               |
+| MapDataset                                              | NO                            |                               |
+| MapDefun                                                | NO                            |                               |
+| MapIncompleteSize                                       | NO                            |                               |
+| MapPeek                                                 | NO                            |                               |
+| MapSize                                                 | NO                            |                               |
+| MapStage                                                | NO                            |                               |
+| MapUnstage                                              | NO                            |                               |
+| MapUnstageNoKey                                         | NO                            |                               |
+| MatMul                                                  | YES                           |                               |
+| MatchingFiles                                           | NO                            |                               |
+| MatchingFilesDataset                                    | NO                            |                               |
+| MatrixBandPart                                          | NO                            |                               |
+| MatrixDeterminant                                       | NO                            |                               |
+| MatrixDiag                                              | YES                           |                               |
+| MatrixDiagPart                                          | NO                            |                               |
+| MatrixDiagPartV2                                        | NO                            |                               |
+| MatrixDiagPartV3                                        | NO                            |                               |
+| MatrixDiagV2                                            | NO                            |                               |
+| MatrixDiagV3                                            | NO                            |                               |
+| MatrixExponential                                       | NO                            |                               |
+| MatrixInverse                                           | NO                            |                               |
+| MatrixLogarithm                                         | NO                            |                               |
+| MatrixSetDiag                                           | NO                            |                               |
+| MatrixSetDiagV2                                         | NO                            |                               |
+| MatrixSetDiagV3                                         | NO                            |                               |
+| MatrixSolve                                             | NO                            |                               |
+| MatrixSolveLs                                           | NO                            |                               |
+| MatrixSquareRoot                                        | NO                            |                               |
+| MatrixTriangularSolve                                   | NO                            |                               |
+| Max                                                     | YES                           |                               |
+| MaxIntraOpParallelismDataset                            | NO                            |                               |
+| MaxPool                                                 | YES                           |                               |
+| MaxPool3D                                               | YES                           |                               |
+| MaxPool3DGrad                                           | NO                            |                               |
+| MaxPool3DGradGrad                                       | NO                            |                               |
+| MaxPoolGrad                                             | NO                            |                               |
+| MaxPoolGradGrad                                         | NO                            |                               |
+| MaxPoolGradGradV2                                       | NO                            |                               |
+| MaxPoolGradGradWithArgmax                               | NO                            |                               |
+| MaxPoolGradV2                                           | NO                            |                               |
+| MaxPoolGradWithArgmax                                   | NO                            |                               |
+| MaxPoolV2                                               | YES                           |                               |
+| MaxPoolWithArgmax                                       | YES                           |                               |
+| Maximum                                                 | YES                           |                               |
+| Mean                                                    | YES                           |                               |
+| Merge                                                   | YES                           |                               |
+| MergeSummary                                            | NO                            |                               |
+| MergeV2Checkpoints                                      | YES                           |                               |
+| Mfcc                                                    | NO                            |                               |
+| Min                                                     | YES                           |                               |
+| Minimum                                                 | YES                           |                               |
+| MirrorPad                                               | YES                           |                               |
+| MirrorPadGrad                                           | NO                            |                               |
+| Mod                                                     | YES                           |                               |
+| ModelDataset                                            | NO                            |                               |
+| Mul                                                     | YES                           |                               |
+| MulNoNan                                                | NO                            |                               |
+| MultiDeviceIterator                                     | NO                            |                               |
+| MultiDeviceIteratorFromStringHandle                     | NO                            |                               |
+| MultiDeviceIteratorGetNextFromShard                     | NO                            |                               |
+| MultiDeviceIteratorInit                                 | NO                            |                               |
+| MultiDeviceIteratorToStringHandle                       | NO                            |                               |
+| Multinomial                                             | NO                            |                               |
+| MutableDenseHashTable                                   | NO                            |                               |
+| MutableDenseHashTableV2                                 | NO                            |                               |
+| MutableHashTable                                        | YES                           |                               |
+| MutableHashTableOfTensors                               | NO                            |                               |
+| MutableHashTableOfTensorsV2                             | NO                            |                               |
+| MutableHashTableV2                                      | YES                           |                               |
+| MutexLock                                               | NO                            |                               |
+| MutexV2                                                 | NO                            |                               |
+| NcclAllReduce                                           | NO                            |                               |
+| NcclBroadcast                                           | NO                            |                               |
+| NcclReduce                                              | NO                            |                               |
+| Ndtri                                                   | NO                            |                               |
+| Neg                                                     | YES                           |                               |
+| NextAfter                                               | NO                            |                               |
+| NextIteration                                           | YES                           |                               |
+| NoOp                                                    | YES                           |                               |
+| NonDeterministicInts                                    | NO                            |                               |
+| NonMaxSuppression                                       | YES                           |                               |
+| NonMaxSuppressionV2                                     | YES                           |                               |
+| NonMaxSuppressionV3                                     | YES                           |                               |
+| NonMaxSuppressionV4                                     | YES                           |                               |
+| NonMaxSuppressionV5                                     | YES                           |                               |
+| NonMaxSuppressionWithOverlaps                           | NO                            |                               |
+| NonSerializableDataset                                  | NO                            |                               |
+| NotEqual                                                | YES                           |                               |
+| NthElement                                              | NO                            |                               |
+| OneHot                                                  | YES                           |                               |
+| OneShotIterator                                         | YES                           |                               |
+| OnesLike                                                | YES                           |                               |
+| OptimizeDataset                                         | NO                            |                               |
+| OptimizeDatasetV2                                       | NO                            |                               |
+| OptionalFromValue                                       | NO                            |                               |
+| OptionalGetValue                                        | NO                            |                               |
+| OptionalHasValue                                        | NO                            |                               |
+| OptionalNone                                            | NO                            |                               |
+| OptionsDataset                                          | NO                            |                               |
+| OrderedMapClear                                         | NO                            |                               |
+| OrderedMapIncompleteSize                                | NO                            |                               |
+| OrderedMapPeek                                          | NO                            |                               |
+| OrderedMapSize                                          | NO                            |                               |
+| OrderedMapStage                                         | NO                            |                               |
+| OrderedMapUnstage                                       | NO                            |                               |
+| OrderedMapUnstageNoKey                                  | NO                            |                               |
+| OutfeedDequeue                                          | NO                            |                               |
+| OutfeedDequeueTuple                                     | NO                            |                               |
+| OutfeedDequeueTupleV2                                   | NO                            |                               |
+| OutfeedDequeueV2                                        | NO                            |                               |
+| OutfeedEnqueue                                          | NO                            |                               |
+| OutfeedEnqueueTuple                                     | NO                            |                               |
+| Pack                                                    | YES                           |                               |
+| Pad                                                     | YES                           |                               |
+| PadV2                                                   | YES                           |                               |
+| PaddedBatchDataset                                      | NO                            |                               |
+| PaddedBatchDatasetV2                                    | NO                            |                               |
+| PaddingFIFOQueue                                        | NO                            |                               |
+| PaddingFIFOQueueV2                                      | NO                            |                               |
+| ParallelBatchDataset                                    | NO                            |                               |
+| ParallelConcat                                          | NO                            |                               |
+| ParallelDynamicStitch                                   | YES                           |                               |
+| ParallelFilterDataset                                   | NO                            |                               |
+| ParallelInterleaveDataset                               | NO                            |                               |
+| ParallelInterleaveDatasetV2                             | NO                            |                               |
+| ParallelInterleaveDatasetV3                             | NO                            |                               |
+| ParallelInterleaveDatasetV4                             | NO                            |                               |
+| ParallelMapDataset                                      | NO                            |                               |
+| ParallelMapDatasetV2                                    | NO                            |                               |
+| ParameterizedTruncatedNormal                            | NO                            |                               |
+| ParseExample                                            | NO                            |                               |
+| ParseExampleDataset                                     | NO                            |                               |
+| ParseExampleDatasetV2                                   | NO                            |                               |
+| ParseExampleV2                                          | NO                            |                               |
+| ParseSequenceExample                                    | NO                            |                               |
+| ParseSequenceExampleV2                                  | NO                            |                               |
+| ParseSingleExample                                      | NO                            |                               |
+| ParseSingleSequenceExample                              | NO                            |                               |
+| ParseTensor                                             | NO                            |                               |
+| PartitionedCall                                         | YES                           |                               |
+| Placeholder                                             | YES                           |                               |
+| PlaceholderV2                                           | NO                            |                               |
+| PlaceholderWithDefault                                  | YES                           |                               |
+| Polygamma                                               | NO                            |                               |
+| PopulationCount                                         | NO                            |                               |
+| Pow                                                     | YES                           |                               |
+| PrefetchDataset                                         | NO                            |                               |
+| Prelinearize                                            | NO                            |                               |
+| PrelinearizeTuple                                       | NO                            |                               |
+| PreventGradient                                         | YES                           |                               |
+| Print                                                   | NO                            |                               |
+| PrintV2                                                 | NO                            |                               |
+| PriorityQueue                                           | NO                            |                               |
+| PriorityQueueV2                                         | NO                            |                               |
+| PrivateThreadPoolDataset                                | NO                            |                               |
+| Prod                                                    | YES                           |                               |
+| PyFunc                                                  | NO                            |                               |
+| PyFuncStateless                                         | NO                            |                               |
+| Qr                                                      | NO                            |                               |
+| QuantizeAndDequantize                                   | NO                            |                               |
+| QuantizeAndDequantizeV2                                 | NO                            |                               |
+| QuantizeAndDequantizeV3                                 | NO                            |                               |
+| QuantizeAndDequantizeV4                                 | NO                            |                               |
+| QuantizeAndDequantizeV4Grad                             | NO                            |                               |
+| QuantizeDownAndShrinkRange                              | NO                            |                               |
+| QuantizeV2                                              | NO                            |                               |
+| QuantizedAdd                                            | NO                            |                               |
+| QuantizedAvgPool                                        | NO                            |                               |
+| QuantizedBatchNormWithGlobalNormalization               | NO                            |                               |
+| QuantizedBiasAdd                                        | NO                            |                               |
+| QuantizedConcat                                         | NO                            |                               |
+| QuantizedConv2D                                         | NO                            |                               |
+| QuantizedConv2DAndRelu                                  | NO                            |                               |
+| QuantizedConv2DAndReluAndRequantize                     | NO                            |                               |
+| QuantizedConv2DAndRequantize                            | NO                            |                               |
+| QuantizedConv2DPerChannel                               | NO                            |                               |
+| QuantizedConv2DWithBias                                 | NO                            |                               |
+| QuantizedConv2DWithBiasAndRelu                          | NO                            |                               |
+| QuantizedConv2DWithBiasAndReluAndRequantize             | NO                            |                               |
+| QuantizedConv2DWithBiasAndRequantize                    | NO                            |                               |
+| QuantizedConv2DWithBiasSignedSumAndReluAndRequantize    | NO                            |                               |
+| QuantizedConv2DWithBiasSumAndRelu                       | NO                            |                               |
+| QuantizedConv2DWithBiasSumAndReluAndRequantize          | NO                            |                               |
+| QuantizedDepthwiseConv2D                                | NO                            |                               |
+| QuantizedDepthwiseConv2DWithBias                        | NO                            |                               |
+| QuantizedDepthwiseConv2DWithBiasAndRelu                 | NO                            |                               |
+| QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize    | NO                            |                               |
+| QuantizedInstanceNorm                                   | NO                            |                               |
+| QuantizedMatMul                                         | NO                            |                               |
+| QuantizedMatMulWithBias                                 | NO                            |                               |
+| QuantizedMatMulWithBiasAndDequantize                    | NO                            |                               |
+| QuantizedMatMulWithBiasAndRelu                          | NO                            |                               |
+| QuantizedMatMulWithBiasAndReluAndRequantize             | NO                            |                               |
+| QuantizedMatMulWithBiasAndRequantize                    | NO                            |                               |
+| QuantizedMaxPool                                        | NO                            |                               |
+| QuantizedMul                                            | NO                            |                               |
+| QuantizedRelu                                           | NO                            |                               |
+| QuantizedRelu6                                          | NO                            |                               |
+| QuantizedReluX                                          | NO                            |                               |
+| QuantizedReshape                                        | NO                            |                               |
+| QuantizedResizeBilinear                                 | NO                            |                               |
+| QueueClose                                              | NO                            |                               |
+| QueueCloseV2                                            | NO                            |                               |
+| QueueDequeue                                            | YES                           |                               |
+| QueueDequeueMany                                        | YES                           |                               |
+| QueueDequeueManyV2                                      | NO                            |                               |
+| QueueDequeueUpTo                                        | YES                           |                               |
+| QueueDequeueUpToV2                                      | YES                           |                               |
+| QueueDequeueV2                                          | YES                           |                               |
+| QueueEnqueue                                            | NO                            |                               |
+| QueueEnqueueMany                                        | NO                            |                               |
+| QueueEnqueueManyV2                                      | NO                            |                               |
+| QueueEnqueueV2                                          | NO                            |                               |
+| QueueIsClosed                                           | NO                            |                               |
+| QueueIsClosedV2                                         | NO                            |                               |
+| QueueSize                                               | NO                            |                               |
+| QueueSizeV2                                             | NO                            |                               |
+| RFFT                                                    | NO                            |                               |
+| RFFT2D                                                  | NO                            |                               |
+| RFFT3D                                                  | NO                            |                               |
+| RGBToHSV                                                | NO                            |                               |
+| RaggedBincount                                          | NO                            |                               |
+| RaggedCountSparseOutput                                 | NO                            |                               |
+| RaggedCross                                             | NO                            |                               |
+| RaggedFillEmptyRows                                     | NO                            |                               |
+| RaggedFillEmptyRowsGrad                                 | NO                            |                               |
+| RaggedGather                                            | NO                            |                               |
+| RaggedRange                                             | NO                            |                               |
+| RaggedTensorFromVariant                                 | NO                            |                               |
+| RaggedTensorToSparse                                    | NO                            |                               |
+| RaggedTensorToTensor                                    | NO                            |                               |
+| RaggedTensorToVariant                                   | NO                            |                               |
+| RaggedTensorToVariantGradient                           | NO                            |                               |
+| RandomCrop                                              | NO                            |                               |
+| RandomDataset                                           | NO                            |                               |
+| RandomDatasetV2                                         | NO                            |                               |
+| RandomGamma                                             | NO                            |                               |
+| RandomGammaGrad                                         | NO                            |                               |
+| RandomIndexShuffle                                      | NO                            |                               |
+| RandomPoisson                                           | NO                            |                               |
+| RandomPoissonV2                                         | NO                            |                               |
+| RandomShuffle                                           | NO                            |                               |
+| RandomShuffleQueue                                      | NO                            |                               |
+| RandomShuffleQueueV2                                    | NO                            |                               |
+| RandomStandardNormal                                    | NO                            |                               |
+| RandomUniform                                           | YES                           |                               |
+| RandomUniformInt                                        | YES                           |                               |
+| Range                                                   | YES                           |                               |
+| RangeDataset                                            | NO                            |                               |
+| Rank                                                    | YES                           |                               |
+| ReadFile                                                | NO                            |                               |
+| ReadVariableOp                                          | YES                           |                               |
+| ReadVariableXlaSplitND                                  | NO                            |                               |
+| ReaderNumRecordsProduced                                | NO                            |                               |
+| ReaderNumRecordsProducedV2                              | NO                            |                               |
+| ReaderNumWorkUnitsCompleted                             | NO                            |                               |
+| ReaderNumWorkUnitsCompletedV2                           | NO                            |                               |
+| ReaderRead                                              | NO                            |                               |
+| ReaderReadUpTo                                          | NO                            |                               |
+| ReaderReadUpToV2                                        | NO                            |                               |
+| ReaderReadV2                                            | NO                            |                               |
+| ReaderReset                                             | NO                            |                               |
+| ReaderResetV2                                           | NO                            |                               |
+| ReaderRestoreState                                      | NO                            |                               |
+| ReaderRestoreStateV2                                    | NO                            |                               |
+| ReaderSerializeState                                    | NO                            |                               |
+| ReaderSerializeStateV2                                  | NO                            |                               |
+| Real                                                    | NO                            |                               |
+| RealDiv                                                 | YES                           |                               |
+| RebatchDataset                                          | NO                            |                               |
+| RebatchDatasetV2                                        | NO                            |                               |
+| Reciprocal                                              | YES                           |                               |
+| ReciprocalGrad                                          | NO                            |                               |
+| RecordInput                                             | NO                            |                               |
+| Recv                                                    | NO                            |                               |
+| RecvTPUEmbeddingActivations                             | NO                            |                               |
+| ReduceDataset                                           | NO                            |                               |
+| ReduceJoin                                              | NO                            |                               |
+| RefEnter                                                | NO                            |                               |
+| RefExit                                                 | NO                            |                               |
+| RefIdentity                                             | NO                            |                               |
+| RefMerge                                                | NO                            |                               |
+| RefNextIteration                                        | NO                            |                               |
+| RefSelect                                               | NO                            |                               |
+| RefSwitch                                               | NO                            |                               |
+| RegexFullMatch                                          | NO                            |                               |
+| RegexReplace                                            | NO                            |                               |
+| RegisterDataset                                         | NO                            |                               |
+| RegisterDatasetV2                                       | NO                            |                               |
+| Relu                                                    | YES                           |                               |
+| Relu6                                                   | YES                           |                               |
+| Relu6Grad                                               | NO                            |                               |
+| ReluGrad                                                | NO                            |                               |
+| RemoteCall                                              | NO                            |                               |
+| RepeatDataset                                           | NO                            |                               |
+| RequantizationRange                                     | NO                            |                               |
+| RequantizationRangePerChannel                           | NO                            |                               |
+| Requantize                                              | NO                            |                               |
+| RequantizePerChannel                                    | NO                            |                               |
+| Reshape                                                 | YES                           |                               |
+| ResizeArea                                              | NO                            |                               |
+| ResizeBicubic                                           | NO                            |                               |
+| ResizeBicubicGrad                                       | NO                            |                               |
+| ResizeBilinear                                          | YES                           |                               |
+| ResizeBilinearGrad                                      | NO                            |                               |
+| ResizeNearestNeighbor                                   | YES                           |                               |
+| ResizeNearestNeighborGrad                               | NO                            |                               |
+| ResourceAccumulatorApplyGradient                        | NO                            |                               |
+| ResourceAccumulatorNumAccumulated                       | NO                            |                               |
+| ResourceAccumulatorSetGlobalStep                        | NO                            |                               |
+| ResourceAccumulatorTakeGradient                         | NO                            |                               |
+| ResourceApplyAdaMax                                     | NO                            |                               |
+| ResourceApplyAdadelta                                   | NO                            |                               |
+| ResourceApplyAdagrad                                    | NO                            |                               |
+| ResourceApplyAdagradDA                                  | NO                            |                               |
+| ResourceApplyAdagradV2                                  | NO                            |                               |
+| ResourceApplyAdam                                       | NO                            |                               |
+| ResourceApplyAdamWithAmsgrad                            | NO                            |                               |
+| ResourceApplyAddSign                                    | NO                            |                               |
+| ResourceApplyCenteredRMSProp                            | NO                            |                               |
+| ResourceApplyFtrl                                       | NO                            |                               |
+| ResourceApplyFtrlV2                                     | NO                            |                               |
+| ResourceApplyGradientDescent                            | NO                            |                               |
+| ResourceApplyKerasMomentum                              | NO                            |                               |
+| ResourceApplyMomentum                                   | NO                            |                               |
+| ResourceApplyPowerSign                                  | NO                            |                               |
+| ResourceApplyProximalAdagrad                            | NO                            |                               |
+| ResourceApplyProximalGradientDescent                    | NO                            |                               |
+| ResourceApplyRMSProp                                    | NO                            |                               |
+| ResourceConditionalAccumulator                          | NO                            |                               |
+| ResourceCountUpTo                                       | NO                            |                               |
+| ResourceGather                                          | YES                           |                               |
+| ResourceGatherNd                                        | NO                            |                               |
+| ResourceScatterAdd                                      | NO                            |                               |
+| ResourceScatterDiv                                      | NO                            |                               |
+| ResourceScatterMax                                      | NO                            |                               |
+| ResourceScatterMin                                      | NO                            |                               |
+| ResourceScatterMul                                      | NO                            |                               |
+| ResourceScatterNdAdd                                    | NO                            |                               |
+| ResourceScatterNdMax                                    | NO                            |                               |
+| ResourceScatterNdMin                                    | NO                            |                               |
+| ResourceScatterNdSub                                    | NO                            |                               |
+| ResourceScatterNdUpdate                                 | NO                            |                               |
+| ResourceScatterSub                                      | NO                            |                               |
+| ResourceScatterUpdate                                   | NO                            |                               |
+| ResourceSparseApplyAdadelta                             | NO                            |                               |
+| ResourceSparseApplyAdagrad                              | NO                            |                               |
+| ResourceSparseApplyAdagradDA                            | NO                            |                               |
+| ResourceSparseApplyAdagradV2                            | NO                            |                               |
+| ResourceSparseApplyCenteredRMSProp                      | NO                            |                               |
+| ResourceSparseApplyFtrl                                 | NO                            |                               |
+| ResourceSparseApplyFtrlV2                               | NO                            |                               |
+| ResourceSparseApplyKerasMomentum                        | NO                            |                               |
+| ResourceSparseApplyMomentum                             | NO                            |                               |
+| ResourceSparseApplyProximalAdagrad                      | NO                            |                               |
+| ResourceSparseApplyProximalGradientDescent              | NO                            |                               |
+| ResourceSparseApplyRMSProp                              | NO                            |                               |
+| ResourceStridedSliceAssign                              | NO                            |                               |
+| Restore                                                 | NO                            |                               |
+| RestoreSlice                                            | NO                            |                               |
+| RestoreV2                                               | YES                           |                               |
+| RetrieveTPUEmbeddingADAMParameters                      | NO                            |                               |
+| RetrieveTPUEmbeddingAdadeltaParameters                  | NO                            |                               |
+| RetrieveTPUEmbeddingAdagradMomentumParameters           | NO                            |                               |
+| RetrieveTPUEmbeddingAdagradParameters                   | NO                            |                               |
+| RetrieveTPUEmbeddingCenteredRMSPropParameters           | NO                            |                               |
+| RetrieveTPUEmbeddingFTRLParameters                      | NO                            |                               |
+| RetrieveTPUEmbeddingFrequencyEstimatorParameters        | NO                            |                               |
+| RetrieveTPUEmbeddingMDLAdagradLightParameters           | NO                            |                               |
+| RetrieveTPUEmbeddingMomentumParameters                  | NO                            |                               |
+| RetrieveTPUEmbeddingProximalAdagradParameters           | NO                            |                               |
+| RetrieveTPUEmbeddingProximalYogiParameters              | NO                            |                               |
+| RetrieveTPUEmbeddingRMSPropParameters                   | NO                            |                               |
+| RetrieveTPUEmbeddingStochasticGradientDescentParameters | NO                            |                               |
+| Reverse                                                 | YES                           |                               |
+| ReverseSequence                                         | YES                           |                               |
+| ReverseV2                                               | YES                           |                               |
+| RewriteDataset                                          | NO                            |                               |
+| RightShift                                              | NO                            |                               |
+| Rint                                                    | NO                            |                               |
+| RngReadAndSkip                                          | NO                            |                               |
+| RngSkip                                                 | NO                            |                               |
+| Roll                                                    | YES                           |                               |
+| Round                                                   | YES                           |                               |
+| Rsqrt                                                   | YES                           |                               |
+| RsqrtGrad                                               | NO                            |                               |
+| SampleDistortedBoundingBox                              | NO                            |                               |
+| SampleDistortedBoundingBoxV2                            | NO                            |                               |
+| SamplingDataset                                         | NO                            |                               |
+| Save                                                    | NO                            |                               |
+| SaveDataset                                             | NO                            |                               |
+| SaveDatasetV2                                           | NO                            |                               |
+| SaveSlices                                              | NO                            |                               |
+| SaveV2                                                  | YES                           |                               |
+| ScalarSummary                                           | NO                            |                               |
+| ScaleAndTranslate                                       | NO                            |                               |
+| ScaleAndTranslateGrad                                   | NO                            |                               |
+| ScanDataset                                             | NO                            |                               |
+| ScatterAdd                                              | NO                            |                               |
+| ScatterDiv                                              | NO                            |                               |
+| ScatterMax                                              | NO                            |                               |
+| ScatterMin                                              | NO                            |                               |
+| ScatterMul                                              | NO                            |                               |
+| ScatterNd                                               | YES                           |                               |
+| ScatterNdAdd                                            | NO                            |                               |
+| ScatterNdMax                                            | NO                            |                               |
+| ScatterNdMin                                            | NO                            |                               |
+| ScatterNdNonAliasingAdd                                 | NO                            |                               |
+| ScatterNdSub                                            | NO                            |                               |
+| ScatterNdUpdate                                         | NO                            |                               |
+| ScatterSub                                              | NO                            |                               |
+| ScatterUpdate                                           | NO                            |                               |
+| SdcaFprint                                              | NO                            |                               |
+| SdcaOptimizer                                           | NO                            |                               |
+| SdcaOptimizerV2                                         | NO                            |                               |
+| SdcaShrinkL1                                            | NO                            |                               |
+| SegmentMax                                              | NO                            |                               |
+| SegmentMaxV2                                            | NO                            |                               |
+| SegmentMean                                             | NO                            |                               |
+| SegmentMin                                              | NO                            |                               |
+| SegmentMinV2                                            | NO                            |                               |
+| SegmentProd                                             | NO                            |                               |
+| SegmentProdV2                                           | NO                            |                               |
+| SegmentSum                                              | YES                           |                               |
+| SegmentSumV2                                            | NO                            |                               |
+| Select                                                  | YES                           |                               |
+| SelectV2                                                | YES                           |                               |
+| SelfAdjointEig                                          | NO                            |                               |
+| SelfAdjointEigV2                                        | NO                            |                               |
+| Selu                                                    | YES                           |                               |
+| SeluGrad                                                | NO                            |                               |
+| Send                                                    | NO                            |                               |
+| SendTPUEmbeddingGradients                               | NO                            |                               |
+| SerializeIterator                                       | NO                            |                               |
+| SerializeManySparse                                     | NO                            |                               |
+| SerializeSparse                                         | NO                            |                               |
+| SerializeTensor                                         | NO                            |                               |
+| SetSize                                                 | NO                            |                               |
+| SetStatsAggregatorDataset                               | NO                            |                               |
+| Shape                                                   | YES                           |                               |
+| ShapeN                                                  | YES                           |                               |
+| ShardDataset                                            | NO                            |                               |
+| ShardedFilename                                         | YES                           |                               |
+| ShardedFilespec                                         | NO                            |                               |
+| ShuffleAndRepeatDataset                                 | NO                            |                               |
+| ShuffleAndRepeatDatasetV2                               | NO                            |                               |
+| ShuffleDataset                                          | NO                            |                               |
+| ShuffleDatasetV2                                        | NO                            |                               |
+| ShuffleDatasetV3                                        | NO                            |                               |
+| ShutdownDistributedTPU                                  | NO                            |                               |
+| Sigmoid                                                 | YES                           |                               |
+| SigmoidGrad                                             | NO                            |                               |
+| Sign                                                    | YES                           |                               |
+| Sin                                                     | YES                           |                               |
+| Sinh                                                    | YES                           |                               |
+| Size                                                    | YES                           |                               |
+| SkipDataset                                             | NO                            |                               |
+| SleepDataset                                            | NO                            |                               |
+| Slice                                                   | YES                           |                               |
+| SlidingWindowDataset                                    | NO                            |                               |
+| Snapshot                                                | YES                           |                               |
+| SnapshotChunkDataset                                    | NO                            |                               |
+| SnapshotDataset                                         | NO                            |                               |
+| SnapshotDatasetReader                                   | NO                            |                               |
+| SnapshotDatasetV2                                       | NO                            |                               |
+| SnapshotNestedDatasetReader                             | NO                            |                               |
+| SobolSample                                             | NO                            |                               |
+| Softmax                                                 | YES                           |                               |
+| SoftmaxCrossEntropyWithLogits                           | NO                            |                               |
+| Softplus                                                | YES                           |                               |
+| SoftplusGrad                                            | NO                            |                               |
+| Softsign                                                | YES                           |                               |
+| SoftsignGrad                                            | NO                            |                               |
+| SpaceToBatch                                            | NO                            |                               |
+| SpaceToBatchND                                          | YES                           |                               |
+| SpaceToDepth                                            | YES                           |                               |
+| SparseAccumulatorApplyGradient                          | NO                            |                               |
+| SparseAccumulatorTakeGradient                           | NO                            |                               |
+| SparseAdd                                               | NO                            |                               |
+| SparseAddGrad                                           | NO                            |                               |
+| SparseApplyAdadelta                                     | NO                            |                               |
+| SparseApplyAdagrad                                      | NO                            |                               |
+| SparseApplyAdagradDA                                    | NO                            |                               |
+| SparseApplyAdagradV2                                    | NO                            |                               |
+| SparseApplyCenteredRMSProp                              | NO                            |                               |
+| SparseApplyFtrl                                         | NO                            |                               |
+| SparseApplyFtrlV2                                       | NO                            |                               |
+| SparseApplyMomentum                                     | NO                            |                               |
+| SparseApplyProximalAdagrad                              | NO                            |                               |
+| SparseApplyProximalGradientDescent                      | NO                            |                               |
+| SparseApplyRMSProp                                      | NO                            |                               |
+| SparseBincount                                          | NO                            |                               |
+| SparseConcat                                            | NO                            |                               |
+| SparseConditionalAccumulator                            | NO                            |                               |
+| SparseCountSparseOutput                                 | NO                            |                               |
+| SparseCross                                             | NO                            |                               |
+| SparseCrossHashed                                       | NO                            |                               |
+| SparseCrossV2                                           | NO                            |                               |
+| SparseDenseCwiseAdd                                     | NO                            |                               |
+| SparseDenseCwiseDiv                                     | NO                            |                               |
+| SparseDenseCwiseMul                                     | NO                            |                               |
+| SparseFillEmptyRows                                     | YES                           |                               |
+| SparseFillEmptyRowsGrad                                 | NO                            |                               |
+| SparseMatMul                                            | NO                            |                               |
+| SparseMatrixAdd                                         | NO                            |                               |
+| SparseMatrixMatMul                                      | NO                            |                               |
+| SparseMatrixMul                                         | NO                            |                               |
+| SparseMatrixNNZ                                         | NO                            |                               |
+| SparseMatrixOrderingAMD                                 | NO                            |                               |
+| SparseMatrixSoftmax                                     | NO                            |                               |
+| SparseMatrixSoftmaxGrad                                 | NO                            |                               |
+| SparseMatrixSparseCholesky                              | NO                            |                               |
+| SparseMatrixSparseMatMul                                | NO                            |                               |
+| SparseMatrixTranspose                                   | NO                            |                               |
+| SparseMatrixZeros                                       | NO                            |                               |
+| SparseReduceMax                                         | NO                            |                               |
+| SparseReduceMaxSparse                                   | NO                            |                               |
+| SparseReduceSum                                         | NO                            |                               |
+| SparseReduceSumSparse                                   | NO                            |                               |
+| SparseReorder                                           | NO                            |                               |
+| SparseReshape                                           | YES                           |                               |
+| SparseSegmentMean                                       | NO                            |                               |
+| SparseSegmentMeanGrad                                   | NO                            |                               |
+| SparseSegmentMeanGradV2                                 | NO                            |                               |
+| SparseSegmentMeanWithNumSegments                        | NO                            |                               |
+| SparseSegmentSqrtN                                      | NO                            |                               |
+| SparseSegmentSqrtNGrad                                  | NO                            |                               |
+| SparseSegmentSqrtNGradV2                                | NO                            |                               |
+| SparseSegmentSqrtNWithNumSegments                       | NO                            |                               |
+| SparseSegmentSum                                        | YES                           |                               |
+| SparseSegmentSumGrad                                    | NO                            |                               |
+| SparseSegmentSumGradV2                                  | NO                            |                               |
+| SparseSegmentSumWithNumSegments                         | NO                            |                               |
+| SparseSlice                                             | NO                            |                               |
+| SparseSliceGrad                                         | NO                            |                               |
+| SparseSoftmax                                           | NO                            |                               |
+| SparseSoftmaxCrossEntropyWithLogits                     | NO                            |                               |
+| SparseSparseMaximum                                     | NO                            |                               |
+| SparseSparseMinimum                                     | NO                            |                               |
+| SparseSplit                                             | NO                            |                               |
+| SparseTensorDenseAdd                                    | NO                            |                               |
+| SparseTensorDenseMatMul                                 | NO                            |                               |
+| SparseTensorSliceDataset                                | NO                            |                               |
+| SparseTensorToCSRSparseMatrix                           | NO                            |                               |
+| SparseToDense                                           | YES                           |                               |
+| SparseToSparseSetOperation                              | NO                            |                               |
+| Spence                                                  | NO                            |                               |
+| Split                                                   | YES                           |                               |
+| SplitV                                                  | YES                           |                               |
+| SqlDataset                                              | NO                            |                               |
+| Sqrt                                                    | YES                           |                               |
+| SqrtGrad                                                | NO                            |                               |
+| Square                                                  | YES                           |                               |
+| SquaredDifference                                       | YES                           |                               |
+| Squeeze                                                 | YES                           |                               |
+| Stack                                                   | NO                            |                               |
+| StackClose                                              | NO                            |                               |
+| StackCloseV2                                            | NO                            |                               |
+| StackPop                                                | NO                            |                               |
+| StackPopV2                                              | NO                            |                               |
+| StackPush                                               | NO                            |                               |
+| StackPushV2                                             | NO                            |                               |
+| StackV2                                                 | NO                            |                               |
+| Stage                                                   | NO                            |                               |
+| StageClear                                              | NO                            |                               |
+| StagePeek                                               | NO                            |                               |
+| StageSize                                               | NO                            |                               |
+| StatefulPartitionedCall                                 | YES                           |                               |
+| StatefulRandomBinomial                                  | NO                            |                               |
+| StatefulStandardNormal                                  | NO                            |                               |
+| StatefulStandardNormalV2                                | NO                            |                               |
+| StatefulTruncatedNormal                                 | NO                            |                               |
+| StatefulUniform                                         | NO                            |                               |
+| StatefulUniformFullInt                                  | NO                            |                               |
+| StatefulUniformInt                                      | NO                            |                               |
+| StatelessCase                                           | NO                            |                               |
+| StatelessIf                                             | YES                           |                               |
+| StatelessMultinomial                                    | NO                            |                               |
+| StatelessParameterizedTruncatedNormal                   | NO                            |                               |
+| StatelessRandomBinomial                                 | NO                            |                               |
+| StatelessRandomGammaV2                                  | NO                            |                               |
+| StatelessRandomGammaV3                                  | NO                            |                               |
+| StatelessRandomGetAlg                                   | NO                            |                               |
+| StatelessRandomGetKeyCounter                            | NO                            |                               |
+| StatelessRandomGetKeyCounterAlg                         | NO                            |                               |
+| StatelessRandomNormal                                   | NO                            |                               |
+| StatelessRandomNormalV2                                 | NO                            |                               |
+| StatelessRandomPoisson                                  | NO                            |                               |
+| StatelessRandomUniform                                  | NO                            |                               |
+| StatelessRandomUniformFullInt                           | NO                            |                               |
+| StatelessRandomUniformFullIntV2                         | NO                            |                               |
+| StatelessRandomUniformInt                               | NO                            |                               |
+| StatelessRandomUniformIntV2                             | NO                            |                               |
+| StatelessRandomUniformV2                                | NO                            |                               |
+| StatelessSampleDistortedBoundingBox                     | NO                            |                               |
+| StatelessShuffle                                        | NO                            |                               |
+| StatelessTruncatedNormal                                | NO                            |                               |
+| StatelessTruncatedNormalV2                              | NO                            |                               |
+| StatelessWhile                                          | YES                           |                               |
+| StaticRegexFullMatch                                    | YES                           |                               |
+| StaticRegexReplace                                      | NO                            |                               |
+| StatsAggregatorHandle                                   | NO                            |                               |
+| StatsAggregatorHandleV2                                 | NO                            |                               |
+| StatsAggregatorSetSummaryWriter                         | NO                            |                               |
+| StatsAggregatorSummary                                  | NO                            |                               |
+| StopGradient                                            | YES                           |                               |
+| StridedSlice                                            | YES                           |                               |
+| StridedSliceAssign                                      | NO                            |                               |
+| StridedSliceGrad                                        | NO                            |                               |
+| StringFormat                                            | NO                            |                               |
+| StringJoin                                              | YES                           |                               |
+| StringLength                                            | NO                            |                               |
+| StringLower                                             | NO                            |                               |
+| StringNGrams                                            | NO                            |                               |
+| StringSplit                                             | NO                            |                               |
+| StringSplitV2                                           | NO                            |                               |
+| StringStrip                                             | NO                            |                               |
+| StringToHashBucket                                      | NO                            |                               |
+| StringToHashBucketFast                                  | NO                            |                               |
+| StringToHashBucketStrong                                | NO                            |                               |
+| StringToNumber                                          | NO                            |                               |
+| StringUpper                                             | NO                            |                               |
+| Sub                                                     | YES                           |                               |
+| Substr                                                  | NO                            |                               |
+| Sum                                                     | YES                           |                               |
+| SummaryWriter                                           | NO                            |                               |
+| Svd                                                     | NO                            |                               |
+| Switch                                                  | YES                           |                               |
+| SymbolicGradient                                        | NO                            |                               |
+| SyncDevice                                              | NO                            |                               |
+| TFRecordDataset                                         | NO                            |                               |
+| TFRecordDatasetV2                                       | NO                            |                               |
+| TFRecordReader                                          | NO                            |                               |
+| TFRecordReaderV2                                        | NO                            |                               |
+| TPUCompilationResult                                    | NO                            |                               |
+| TPUEmbeddingActivations                                 | NO                            |                               |
+| TPUOrdinalSelector                                      | NO                            |                               |
+| TPUPartitionedCall                                      | NO                            |                               |
+| TPUPartitionedInput                                     | NO                            |                               |
+| TPUPartitionedInputV2                                   | NO                            |                               |
+| TPUPartitionedOutput                                    | NO                            |                               |
+| TPUPartitionedOutputV2                                  | NO                            |                               |
+| TPUReplicateMetadata                                    | NO                            |                               |
+| TPUReplicatedInput                                      | NO                            |                               |
+| TPUReplicatedOutput                                     | NO                            |                               |
+| TakeDataset                                             | NO                            |                               |
+| TakeManySparseFromTensorsMap                            | NO                            |                               |
+| TakeWhileDataset                                        | NO                            |                               |
+| Tan                                                     | YES                           |                               |
+| Tanh                                                    | YES                           |                               |
+| TanhGrad                                                | NO                            |                               |
+| TemporaryVariable                                       | NO                            |                               |
+| TensorArray                                             | NO                            |                               |
+| TensorArrayClose                                        | NO                            |                               |
+| TensorArrayCloseV2                                      | NO                            |                               |
+| TensorArrayCloseV3                                      | YES                           |                               |
+| TensorArrayConcat                                       | NO                            |                               |
+| TensorArrayConcatV2                                     | NO                            |                               |
+| TensorArrayConcatV3                                     | YES                           |                               |
+| TensorArrayGather                                       | NO                            |                               |
+| TensorArrayGatherV2                                     | NO                            |                               |
+| TensorArrayGatherV3                                     | YES                           |                               |
+| TensorArrayGrad                                         | NO                            |                               |
+| TensorArrayGradV2                                       | NO                            |                               |
+| TensorArrayGradV3                                       | NO                            |                               |
+| TensorArrayGradWithShape                                | NO                            |                               |
+| TensorArrayPack                                         | NO                            |                               |
+| TensorArrayRead                                         | NO                            |                               |
+| TensorArrayReadV2                                       | NO                            |                               |
+| TensorArrayReadV3                                       | YES                           |                               |
+| TensorArrayScatter                                      | NO                            |                               |
+| TensorArrayScatterV2                                    | NO                            |                               |
+| TensorArrayScatterV3                                    | YES                           |                               |
+| TensorArraySize                                         | NO                            |                               |
+| TensorArraySizeV2                                       | NO                            |                               |
+| TensorArraySizeV3                                       | YES                           |                               |
+| TensorArraySplit                                        | NO                            |                               |
+| TensorArraySplitV2                                      | NO                            |                               |
+| TensorArraySplitV3                                      | NO                            |                               |
+| TensorArrayUnpack                                       | NO                            |                               |
+| TensorArrayV2                                           | NO                            |                               |
+| TensorArrayV3                                           | YES                           |                               |
+| TensorArrayWrite                                        | NO                            |                               |
+| TensorArrayWriteV2                                      | NO                            |                               |
+| TensorArrayWriteV3                                      | YES                           |                               |
+| TensorDataset                                           | NO                            |                               |
+| TensorListConcat                                        | NO                            |                               |
+| TensorListConcatLists                                   | NO                            |                               |
+| TensorListConcatV2                                      | NO                            |                               |
+| TensorListElementShape                                  | NO                            |                               |
+| TensorListFromTensor                                    | YES                           |                               |
+| TensorListGather                                        | NO                            |                               |
+| TensorListGetItem                                       | YES                           |                               |
+| TensorListLength                                        | YES                           |                               |
+| TensorListPopBack                                       | NO                            |                               |
+| TensorListPushBack                                      | YES                           |                               |
+| TensorListPushBackBatch                                 | NO                            |                               |
+| TensorListReserve                                       | YES                           |                               |
+| TensorListResize                                        | YES                           |                               |
+| TensorListScatter                                       | NO                            |                               |
+| TensorListScatterIntoExistingList                       | NO                            |                               |
+| TensorListScatterV2                                     | NO                            |                               |
+| TensorListSetItem                                       | YES                           |                               |
+| TensorListSplit                                         | NO                            |                               |
+| TensorListStack                                         | YES                           |                               |
+| TensorScatterAdd                                        | NO                            |                               |
+| TensorScatterMax                                        | NO                            |                               |
+| TensorScatterMin                                        | NO                            |                               |
+| TensorScatterSub                                        | NO                            |                               |
+| TensorScatterUpdate                                     | NO                            |                               |
+| TensorSliceDataset                                      | NO                            |                               |
+| TensorStridedSliceUpdate                                | NO                            |                               |
+| TensorSummary                                           | NO                            |                               |
+| TensorSummaryV2                                         | NO                            |                               |
+| TextLineDataset                                         | NO                            |                               |
+| TextLineReader                                          | NO                            |                               |
+| TextLineReaderV2                                        | NO                            |                               |
+| ThreadPoolDataset                                       | NO                            |                               |
+| ThreadPoolHandle                                        | NO                            |                               |
+| ThreadUnsafeUnigramCandidateSampler                     | NO                            |                               |
+| Tile                                                    | YES                           |                               |
+| TileGrad                                                | NO                            |                               |
+| Timestamp                                               | NO                            |                               |
+| ToBool                                                  | YES                           |                               |
+| TopK                                                    | YES                           |                               |
+| TopKV2                                                  | YES                           |                               |
+| Transpose                                               | YES                           |                               |
+| TridiagonalMatMul                                       | NO                            |                               |
+| TridiagonalSolve                                        | NO                            |                               |
+| TruncateDiv                                             | YES                           |                               |
+| TruncateMod                                             | YES                           |                               |
+| TruncatedNormal                                         | NO                            |                               |
+| Unbatch                                                 | NO                            |                               |
+| UnbatchDataset                                          | NO                            |                               |
+| UnbatchGrad                                             | NO                            |                               |
+| UncompressElement                                       | NO                            |                               |
+| UnicodeDecode                                           | NO                            |                               |
+| UnicodeDecodeWithOffsets                                | NO                            |                               |
+| UnicodeEncode                                           | NO                            |                               |
+| UnicodeScript                                           | NO                            |                               |
+| UnicodeTranscode                                        | NO                            |                               |
+| UniformCandidateSampler                                 | NO                            |                               |
+| UniformDequantize                                       | NO                            |                               |
+| UniformQuantize                                         | NO                            |                               |
+| UniformQuantizedAdd                                     | NO                            |                               |
+| UniformQuantizedClipByValue                             | NO                            |                               |
+| UniformQuantizedConvolution                             | NO                            |                               |
+| UniformQuantizedConvolutionHybrid                       | NO                            |                               |
+| UniformQuantizedDot                                     | NO                            |                               |
+| UniformQuantizedDotHybrid                               | NO                            |                               |
+| UniformRequantize                                       | NO                            |                               |
+| Unique                                                  | YES                           |                               |
+| UniqueDataset                                           | NO                            |                               |
+| UniqueV2                                                | NO                            |                               |
+| UniqueWithCounts                                        | NO                            |                               |
+| UniqueWithCountsV2                                      | NO                            |                               |
+| Unpack                                                  | YES                           |                               |
+| UnravelIndex                                            | YES                           |                               |
+| UnsortedSegmentJoin                                     | NO                            |                               |
+| UnsortedSegmentMax                                      | NO                            |                               |
+| UnsortedSegmentMin                                      | NO                            |                               |
+| UnsortedSegmentProd                                     | NO                            |                               |
+| UnsortedSegmentSum                                      | YES                           |                               |
+| Unstage                                                 | NO                            |                               |
+| UnwrapDatasetVariant                                    | NO                            |                               |
+| UpperBound                                              | NO                            |                               |
+| VarHandleOp                                             | YES                           |                               |
+| VarIsInitializedOp                                      | YES                           |                               |
+| Variable                                                | YES                           |                               |
+| VariableShape                                           | NO                            |                               |
+| VariableV2                                              | YES                           |                               |
+| Where                                                   | YES                           |                               |
+| While                                                   | YES                           |                               |
+| WholeFileReader                                         | NO                            |                               |
+| WholeFileReaderV2                                       | NO                            |                               |
+| WindowDataset                                           | NO                            |                               |
+| WindowOp                                                | NO                            |                               |
+| WorkerHeartbeat                                         | NO                            |                               |
+| WrapDatasetVariant                                      | NO                            |                               |
+| WriteAudioSummary                                       | NO                            |                               |
+| WriteFile                                               | NO                            |                               |
+| WriteGraphSummary                                       | NO                            |                               |
+| WriteHistogramSummary                                   | NO                            |                               |
+| WriteImageSummary                                       | NO                            |                               |
+| WriteRawProtoSummary                                    | NO                            |                               |
+| WriteScalarSummary                                      | NO                            |                               |
+| WriteSummary                                            | NO                            |                               |
+| Xdivy                                                   | YES                           |                               |
+| XlaConcatND                                             | NO                            |                               |
+| XlaSplitND                                              | NO                            |                               |
+| Xlog1py                                                 | YES                           |                               |
+| Xlogy                                                   | YES                           |                               |
+| ZerosLike                                               | YES                           |                               |
+| Zeta                                                    | NO                            |                               |
+| ZipDataset                                              | NO                            |                               |
diff --git a/src/frontends/tensorflow/src/checkpoint_v1_reader.cpp b/src/frontends/tensorflow/src/checkpoint_v1_reader.cpp
index 33833100ad6c6f..185b374e6bc42f 100644
--- a/src/frontends/tensorflow/src/checkpoint_v1_reader.cpp
+++ b/src/frontends/tensorflow/src/checkpoint_v1_reader.cpp
@@ -7,7 +7,7 @@
 #include "checkpoint_utils.hpp"
 #include "openvino/frontend/exception.hpp"
 #include "openvino/util/file_util.hpp"
-#include "saved_tensor_slice.pb.h"
+#include "ov_tensorflow/saved_tensor_slice.pb.h"
 #include "tf_utils.hpp"
 
 #ifdef ENABLE_SNAPPY_COMPRESSION
diff --git a/src/frontends/tensorflow/src/checkpoint_v1_reader.hpp b/src/frontends/tensorflow/src/checkpoint_v1_reader.hpp
index bfae3b139a2aae..1171fd6a682cb1 100644
--- a/src/frontends/tensorflow/src/checkpoint_v1_reader.hpp
+++ b/src/frontends/tensorflow/src/checkpoint_v1_reader.hpp
@@ -12,9 +12,9 @@
 #include "checkpoint_utils.hpp"
 #include "openvino/core/any.hpp"
 #include "openvino/frontend/exception.hpp"
-#include "saved_tensor_slice.pb.h"
-#include "tensor_shape.pb.h"
-#include "types.pb.h"
+#include "ov_tensorflow/saved_tensor_slice.pb.h"
+#include "ov_tensorflow/tensor_shape.pb.h"
+#include "ov_tensorflow/types.pb.h"
 
 namespace ov {
 namespace frontend {
diff --git a/src/frontends/tensorflow/src/decoder_argdef.cpp b/src/frontends/tensorflow/src/decoder_argdef.cpp
index 3430bcbe5e37aa..af7c1a1cfccd01 100644
--- a/src/frontends/tensorflow/src/decoder_argdef.cpp
+++ b/src/frontends/tensorflow/src/decoder_argdef.cpp
@@ -5,11 +5,11 @@
 #include "decoder_argdef.hpp"
 
 #include "decoder_proto.hpp"
-#include "op_def.pb.h"
 #include "openvino/frontend/tensorflow/node_context.hpp"
 #include "openvino/frontend/tensorflow/special_types.hpp"
+#include "ov_tensorflow/op_def.pb.h"
+#include "ov_tensorflow/types.pb.h"
 #include "tf_utils.hpp"
-#include "types.pb.h"
 
 namespace ov {
 namespace frontend {
diff --git a/src/frontends/tensorflow/src/decoder_proto.cpp b/src/frontends/tensorflow/src/decoder_proto.cpp
index 2488973c1029e1..9e0a53efb6d09f 100644
--- a/src/frontends/tensorflow/src/decoder_proto.cpp
+++ b/src/frontends/tensorflow/src/decoder_proto.cpp
@@ -4,12 +4,12 @@
 
 #include "decoder_proto.hpp"
 
-#include "attr_value.pb.h"
-#include "node_def.pb.h"
 #include "openvino/frontend/tensorflow/node_context.hpp"
 #include "openvino/frontend/tensorflow/special_types.hpp"
+#include "ov_tensorflow/attr_value.pb.h"
+#include "ov_tensorflow/node_def.pb.h"
+#include "ov_tensorflow/types.pb.h"
 #include "tf_utils.hpp"
-#include "types.pb.h"
 
 namespace ov {
 namespace frontend {
diff --git a/src/frontends/tensorflow/src/decoder_proto.hpp b/src/frontends/tensorflow/src/decoder_proto.hpp
index 338bfdeccea79d..9d22e273e1e146 100644
--- a/src/frontends/tensorflow/src/decoder_proto.hpp
+++ b/src/frontends/tensorflow/src/decoder_proto.hpp
@@ -9,7 +9,7 @@
 
 #include "openvino/core/type/element_type.hpp"
 #include "openvino/frontend/tensorflow/decoder.hpp"
-#include "types.pb.h"
+#include "ov_tensorflow/types.pb.h"
 
 namespace tensorflow {
 class GraphDef;
diff --git a/src/frontends/tensorflow/src/frontend.cpp b/src/frontends/tensorflow/src/frontend.cpp
index 24b5824fe336d1..ad9b5b76bdfbda 100644
--- a/src/frontends/tensorflow/src/frontend.cpp
+++ b/src/frontends/tensorflow/src/frontend.cpp
@@ -14,6 +14,7 @@
 #include "helper_transforms/embedding_segments_feature_fusing.hpp"
 #include "helper_transforms/gru_block_cell_replacer.hpp"
 #include "helper_transforms/saved_model_unused_remover.hpp"
+#include "helper_transforms/tensor_array_v3_replacer.hpp"
 #include "input_model.hpp"
 #include "op_table.hpp"
 #include "openvino/core/so_extension.hpp"
@@ -491,6 +492,7 @@ void FrontEnd::normalize(const std::shared_ptr<ov::Model>& model) const {
     manager.register_pass<pass::EmbeddingSegmentSingleFeatureFusion>();
     manager.register_pass<pass::BlockLSTMReplacer>();
     manager.register_pass<pass::GRUBlockCellReplacer>();
+    manager.register_pass<pass::TensorArrayV3Replacer>();
     manager.register_pass<pass::ConstToResultRemover>();
     manager.register_pass<pass::SwitchMergeResolver>();
     manager.register_pass<ov::pass::UnrollIf>();
diff --git a/src/frontends/tensorflow/src/graph_iterator_meta.cpp b/src/frontends/tensorflow/src/graph_iterator_meta.cpp
index 8bc41fbaefdd04..06f2d31f389a27 100644
--- a/src/frontends/tensorflow/src/graph_iterator_meta.cpp
+++ b/src/frontends/tensorflow/src/graph_iterator_meta.cpp
@@ -10,8 +10,8 @@
 #include <string>
 
 #include "openvino/core/type/element_type.hpp"
-#include "tensor_bundle.pb.h"
-#include "trackable_object_graph.pb.h"
+#include "ov_tensorflow/tensor_bundle.pb.h"
+#include "ov_tensorflow/trackable_object_graph.pb.h"
 
 namespace ov {
 namespace frontend {
diff --git a/src/frontends/tensorflow/src/graph_iterator_proto.hpp b/src/frontends/tensorflow/src/graph_iterator_proto.hpp
index 8b073b08373305..5ef6d0a5954b41 100644
--- a/src/frontends/tensorflow/src/graph_iterator_proto.hpp
+++ b/src/frontends/tensorflow/src/graph_iterator_proto.hpp
@@ -10,10 +10,10 @@
 #include "checkpoint_v1_reader.hpp"
 #include "decoder_argdef.hpp"
 #include "decoder_proto.hpp"
-#include "graph.pb.h"
 #include "openvino/frontend/exception.hpp"
 #include "openvino/frontend/graph_iterator.hpp"
 #include "openvino/frontend/tensorflow/decoder.hpp"
+#include "ov_tensorflow/graph.pb.h"
 
 namespace ov {
 namespace frontend {
diff --git a/src/frontends/tensorflow/src/graph_iterator_saved_model.cpp b/src/frontends/tensorflow/src/graph_iterator_saved_model.cpp
index ece0148d19bb20..803e7d694bc69a 100644
--- a/src/frontends/tensorflow/src/graph_iterator_saved_model.cpp
+++ b/src/frontends/tensorflow/src/graph_iterator_saved_model.cpp
@@ -10,8 +10,8 @@
 #include <string>
 
 #include "openvino/core/type/element_type.hpp"
-#include "tensor_bundle.pb.h"
-#include "trackable_object_graph.pb.h"
+#include "ov_tensorflow/tensor_bundle.pb.h"
+#include "ov_tensorflow/trackable_object_graph.pb.h"
 
 namespace ov {
 namespace frontend {
diff --git a/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp b/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp
index 511f2a0a5bc307..4cb385e66f744d 100644
--- a/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp
+++ b/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp
@@ -8,7 +8,7 @@
 
 #include "graph_iterator_proto.hpp"
 #include "openvino/util/file_util.hpp"
-#include "saved_model.pb.h"
+#include "ov_tensorflow/saved_model.pb.h"
 #include "variables_index.hpp"
 
 namespace ov {
diff --git a/src/frontends/tensorflow/src/op/merge.cpp b/src/frontends/tensorflow/src/op/merge.cpp
index 3594f93ed08278..708de72aa3434f 100644
--- a/src/frontends/tensorflow/src/op/merge.cpp
+++ b/src/frontends/tensorflow/src/op/merge.cpp
@@ -5,6 +5,8 @@
 #include "helper_ops/merge.hpp"
 
 #include "common_op_table.hpp"
+#include "helper_ops/enter.hpp"
+#include "helper_ops/next_iteration.hpp"
 #include "openvino/frontend/tensorflow/node_context.hpp"
 #include "openvino/op/constant.hpp"
 #include "utils.hpp"
@@ -24,20 +26,47 @@ OutputVector translate_merge_op(const NodeContext& node) {
     auto node_name = node.get_name();
     default_op_checks(node, 1, {"Merge"});
     int input_size = static_cast<int>(node.get_input_size());
-    OutputVector inputs;
+    OutputVector inputs(input_size);
     for (int input_ind = 0; input_ind < input_size; ++input_ind) {
-        inputs.push_back(node.get_input(input_ind));
+        inputs[input_ind] = node.get_input(input_ind);
     }
 
     // if Merge node has just one input, there is nothing to merge
     // return the same input and value_index equal to 0
-    if (inputs.size() == 1) {
+    if (input_size == 1) {
         auto value_index = make_shared<v0::Constant>(element::i32, Shape{}, 0);
         value_index->output(0).set_names({node_name + ":1"});
         inputs[0].add_names({node_name + ":0"});
         return OutputVector{inputs[0], value_index};
     }
 
+    // check if it is a case of TF1 While: Enter, NextIteration are going to Merge node
+    // in this case it can refine output shape and type for NextIteration based on Enter
+    if (input_size == 2) {
+        auto enter = as_type_ptr<Enter>(inputs[0].get_node_shared_ptr());
+        if (!enter) {
+            enter = as_type_ptr<Enter>(inputs[1].get_node_shared_ptr());
+        }
+        auto next_iteration = as_type_ptr<NextIteration>(inputs[0].get_node_shared_ptr());
+        if (!next_iteration) {
+            next_iteration = as_type_ptr<NextIteration>(inputs[1].get_node_shared_ptr());
+        }
+
+        if (enter && next_iteration) {
+            // set output type and shape for NextIteration
+            // borrow them from Enter output
+            auto enter_output_type = enter->output(0).get_element_type();
+            auto enter_output_shape = enter->output(0).get_partial_shape();
+            auto next_iteration_output_shape = PartialShape::dynamic(enter_output_shape.rank());
+            next_iteration->set_output_shape_and_type(next_iteration_output_shape, enter_output_type);
+
+            // reset inputs
+            // refines input shapes and types for Merge node
+            inputs[0] = enter->output(0);
+            inputs[1] = next_iteration->output(0);
+        }
+    }
+
     auto merge_node = make_shared<Merge>(inputs, node.get_decoder());
     set_node_name(node.get_name(), merge_node);
 
diff --git a/src/frontends/tensorflow/src/op/tensor_array_operations.cpp b/src/frontends/tensorflow/src/op/tensor_array_operations.cpp
new file mode 100644
index 00000000000000..c1b3d6ac205dc3
--- /dev/null
+++ b/src/frontends/tensorflow/src/op/tensor_array_operations.cpp
@@ -0,0 +1,332 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "common_op_table.hpp"
+#include "helper_ops/enter.hpp"
+#include "helper_ops/tensor_array.hpp"
+#include "openvino/frontend/tensorflow/node_context.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/broadcast.hpp"
+#include "openvino/op/concat.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/convert.hpp"
+#include "openvino/op/gather.hpp"
+#include "openvino/op/maximum.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/reshape.hpp"
+#include "openvino/op/scatter_nd_update.hpp"
+#include "openvino/op/scatter_update.hpp"
+#include "openvino/op/shape_of.hpp"
+#include "openvino/op/slice.hpp"
+#include "openvino/op/subtract.hpp"
+#include "openvino/op/unsqueeze.hpp"
+#include "utils.hpp"
+
+using namespace std;
+using namespace ov;
+using namespace ov::op;
+using namespace ov::frontend::tensorflow;
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+namespace op {
+
+namespace {
+// the function creates the constant imitating initial tensor array container
+Output<Node> create_initial_tensor_array_constant(int64_t tensor_element_rank,
+                                                  const element::Type& element_type,
+                                                  Output<Node> size,
+                                                  const string& node_name) {
+    // adjust size to have it of shape [1] for further concatenation with element shape
+    auto new_size_shape = make_shared<v0::Constant>(element::i32, Shape{1}, 1);
+    size = make_shared<v1::Reshape>(size, new_size_shape, false);
+
+    // create a vector of size element_shape.rank() with ones
+    // and compute a shape of initial tensor array [size, 1, ..., 1]
+    vector<int32_t> ones(tensor_element_rank, 1);
+    auto ones_const = make_shared<v0::Constant>(element::i32, Shape{ones.size()}, ones);
+    auto target_shape = make_shared<v0::Concat>(OutputVector{size, ones_const}, 0);
+
+    // create initial tensor array
+    auto scalar_value = make_shared<v0::Constant>(element_type, Shape{}, vector<int32_t>{0});
+    auto initial_tensor_array = make_shared<v3::Broadcast>(scalar_value, target_shape);
+
+    return initial_tensor_array->output(0);
+}
+}  // namespace
+
+OutputVector translate_tensor_array_v3_op(const NodeContext& node) {
+    // TensorArrayV3 has just one input:
+    // 0) size to initialize a size of tensor array
+    default_op_checks(node, 1, {"TensorArrayV3"});
+    auto dtype = node.get_attribute<element::Type>("dtype");
+    auto size = node.get_input(0);
+    auto element_shape = node.get_attribute<PartialShape>("element_shape");
+
+    if (element_shape.rank().is_static()) {
+        auto node_name = node.get_name();
+        auto new_output1 =
+            create_initial_tensor_array_constant(element_shape.rank().get_length(), dtype, size, node.get_name());
+        new_output1.set_names({node_name + ":0"});
+        auto new_output2 =
+            create_initial_tensor_array_constant(element_shape.rank().get_length(), dtype, size, node.get_name());
+        new_output2.set_names({node_name + ":1"});
+        return OutputVector{new_output1, new_output2};
+    }
+
+    // dynamic case when it is unable retrieve element rank from the attribute
+    auto tensor_array_v3 = make_shared<TensorArrayV3>(size, dtype, node.get_decoder());
+    set_node_name(node.get_name(), tensor_array_v3);
+
+    return tensor_array_v3->outputs();
+}
+
+OutputVector translate_tensor_array_scatter_v3_op(const NodeContext& node) {
+    // TensorArrayScatterV3 has four inputs:
+    // 0) handle, a Tensor of type resource. The handle to a TensorArray.
+    // 1) indices, a Tensor of type int32. The locations at which to write the tensor elements.
+    // 2) value, a Tensor. The concatenated tensor to write to the TensorArray
+    // 3) flow_in A Tensor of type float32. A float scalar that enforces proper chaining of operations.
+    // The operation has one output:
+    // 0) flow_out indicates that operation is complete and handle resource is updated
+    default_op_checks(node, 4, {"TensorArrayScatterV3"});
+    auto indices = node.get_input(1);
+    auto value = node.get_input(2);
+    // flow_in is used for transferring input tensor array
+    auto tensor_array = node.get_input(3);
+
+    // check if producer of tensor_array is TensorArrayV3, internal operation, still
+    // if yes, try to replace it with constant container
+    if (as_type_ptr<TensorArrayV3>(tensor_array.get_node_shared_ptr()) &&
+        value.get_partial_shape().rank().is_static()) {
+        // set tensor element rank that gets known from TensorArrayScatterV3 operation
+        auto tensor_array_v3 = as_type_ptr<TensorArrayV3>(tensor_array.get_node_shared_ptr());
+        TENSORFLOW_OP_VALIDATION(
+            node,
+            value.get_partial_shape().rank().get_length() > 0,
+            "[TensorFlow Frontend] internal error or inconsistent model: value to TensorArrayScatterV3 is a scalar");
+        int64_t tensor_element_rank = value.get_partial_shape().rank().get_length() - 1;
+        tensor_array_v3->set_element_rank(tensor_element_rank);
+    }
+
+    // compute element shape (shape of a tensor in the tensor array) using value
+    auto element_shape = make_shared<v3::ShapeOf>(value, element::i32)->output(0);
+    auto one_const = make_shared<v0::Constant>(element::i32, Shape{1}, 1);
+    auto max_const = make_shared<v0::Constant>(element::i32, Shape{1}, numeric_limits<int32_t>::max());
+    element_shape = make_shared<v8::Slice>(element_shape, one_const, max_const, one_const);
+
+    // compute size of tensor array
+    auto tensor_array_size = make_shared<v3::ShapeOf>(tensor_array, element::i32)->output(0);
+    auto zero_const = make_shared<v0::Constant>(element::i32, Shape{1}, 0);
+    tensor_array_size = make_shared<v8::Gather>(tensor_array_size, zero_const, zero_const);
+
+    // compute the new shape for tensor array where new tensors will be inserted
+    auto new_shape = make_shared<v0::Concat>(OutputVector{tensor_array_size, element_shape}, 0);
+    tensor_array = make_shared<v3::Broadcast>(tensor_array, new_shape);
+
+    // adjust indices for ScatterNDUpdate to have a shape [N, 1] where N is a number of indices
+    indices = make_shared<v0::Unsqueeze>(indices, one_const);
+
+    // compute updated tensor array using ScatterNDUpdate
+    // value should be of a shape [N, <elem_shape>]
+    auto updated_tensor_array = make_shared<v3::ScatterNDUpdate>(tensor_array, indices, value);
+    set_node_name(node.get_name(), updated_tensor_array);
+
+    // TensorArrayScatterV3 has just one output flow_out
+    // that is used for transferring updated tensor array
+    return {updated_tensor_array};
+}
+
+OutputVector translate_tensor_array_read_v3_op(const NodeContext& node) {
+    // TensorArrayReadV3 read an element from the TensorArray into the output
+    // and it has three inputs:
+    // 0) handle, a Tensor of type resource. The handle to a TensorArray.
+    // 1) index, a Tensor of type int32. The location from which to read the value
+    // 2) flow_in A Tensor of type float32. A float scalar that enforces proper chaining of operations.
+    // The operation has one output
+    // 0) read value from tensor array
+    default_op_checks(node, 3, {"TensorArrayReadV3"});
+    auto index = node.get_input(1);
+    // flow_in is used for transferring input tensor array
+    auto tensor_array = node.get_input(2);
+    auto dtype = node.get_attribute<element::Type>("dtype");
+
+    // adjust the index to a scalar for using Gather operation
+    auto new_shape = make_shared<v0::Constant>(element::i32, Shape{0}, vector<int32_t>{});
+    index = make_shared<v1::Reshape>(index, new_shape, false);
+
+    // gather tensor element by the required position
+    auto gather_axis = make_shared<v0::Constant>(element::i32, Shape{1}, 0);
+    Output<Node> tensor_element = make_shared<v8::Gather>(tensor_array, index, gather_axis);
+    tensor_element = make_shared<v0::Convert>(tensor_element, dtype);
+
+    set_node_name(node.get_name(), tensor_element.get_node_shared_ptr());
+    return {tensor_element};
+}
+
+OutputVector translate_tensor_array_close_v3_op(const NodeContext& node) {
+    // TensorArrayCloseV3 deletes the TensorArray from its resource container
+    // it outputs nothing
+    default_op_checks(node, 1, {"TensorArrayCloseV3"});
+    return {};
+}
+
+OutputVector translate_tensor_array_size_v3_op(const NodeContext& node) {
+    // TensorArraySizeV3 gets the current size of the TensorArray
+    // it outputs int32 scalar equal to a size of the tensor array
+    default_op_checks(node, 2, {"TensorArraySizeV3"});
+    // skip the handle by the first input
+    auto tensor_array = node.get_input(1);
+
+    auto size = make_shared<v3::ShapeOf>(tensor_array, element::i32)->output(0);
+    auto zero_const = make_shared<v0::Constant>(element::i32, Shape{1}, 0);
+    size = make_shared<v8::Gather>(size, zero_const, zero_const);
+
+    // size must be scalar
+    auto scalar_shape = make_shared<v0::Constant>(element::i32, Shape{0}, vector<int32_t>{});
+    size = make_shared<v1::Reshape>(size, scalar_shape, false);
+
+    set_node_name(node.get_name(), size.get_node_shared_ptr());
+    return {size};
+}
+
+OutputVector translate_tensor_array_gather_v3_op(const NodeContext& node) {
+    // TensorArrayGatherV3 gathers specific elements from the TensorArray into output
+    // and it has three inputs:
+    // 0) handle, a Tensor of type resource. The handle to a TensorArray.
+    // 1) indices, a Tensor of type int32. The location from which to read tensor elements
+    // 2) flow_in A Tensor of type float32. A float scalar that enforces proper chaining of operations.
+    // The operation has one output
+    // 0) value with read tensor elements
+    // it outputs int32 scalar equal to a size of the tensor array
+    default_op_checks(node, 3, {"TensorArrayGatherV3"});
+    // skip the handle by the first input
+    auto indices = node.get_input(1);
+    // flow_in serves for transferring tensor array
+    // handle input is ignored
+    auto tensor_array = node.get_input(2);
+    auto dtype = node.get_attribute<element::Type>("dtype");
+    auto element_shape = node.get_attribute<PartialShape>("element_shape", PartialShape::dynamic());
+
+    // gather tensor element by the required position
+    auto gather_axis = make_shared<v0::Constant>(element::i32, Shape{1}, 0);
+    Output<Node> tensor_element = make_shared<v8::Gather>(tensor_array, indices, gather_axis);
+    tensor_element = make_shared<v0::Convert>(tensor_element, dtype);
+
+    // concretize tensor_element shape if this is specified
+    if (tensor_element.get_partial_shape().rank().is_dynamic() && element_shape.is_static()) {
+        auto element_shape_value = element_shape.get_shape();
+        auto element_shape_const =
+            make_shared<v0::Constant>(element::i32, Shape{element_shape_value.size()}, element_shape_value);
+        auto size = make_shared<v3::ShapeOf>(tensor_array, element::i32)->output(0);
+        auto zero_const = make_shared<v0::Constant>(element::i32, Shape{1}, 0);
+        size = make_shared<v8::Gather>(size, zero_const, zero_const);
+        auto new_shape = make_shared<v0::Concat>(OutputVector{size, element_shape_const}, 0);
+        tensor_element = make_shared<v1::Reshape>(tensor_element, new_shape, false);
+    }
+
+    set_node_name(node.get_name(), tensor_element.get_node_shared_ptr());
+    return {tensor_element};
+}
+
+OutputVector translate_tensor_array_concat_v3_op(const NodeContext& node) {
+    // TensorArrayConcatV3 Concat the elements from the TensorArray into value
+    // and it has two inputs:
+    // 0) handle, a Tensor of type resource. The handle to a TensorArray.
+    // 1) flow_in A Tensor of type float32. A float scalar that enforces proper chaining of operations.
+    // The operation has one output
+    // 0) concatenated value by the first dimension
+    default_op_checks(node, 2, {"TensorArrayConcatV3"});
+    // flow_in serves for transferring tensor array
+    // handle input is ignored
+    auto tensor_array = node.get_input(1);
+    auto dtype = node.get_attribute<element::Type>("dtype");
+
+    // since tensor array saves tensor elements in the concatenated form by the first dimension
+    // and for this operation they should be concatenated by the first dimension of the tensor element
+    // it needs to combine the first two dimensions
+    // tensor array is of shape [k, n0, n1, ..., nd]
+    // 1. compute element shape excluding the first dimension
+    auto zero_const = make_shared<v0::Constant>(element::i32, Shape{1}, 0);
+    auto one_const = make_shared<v0::Constant>(element::i32, Shape{1}, 1);
+    auto two_const = make_shared<v0::Constant>(element::i32, Shape{1}, 2);
+    auto max_const = make_shared<v0::Constant>(element::i32, Shape{1}, numeric_limits<int32_t>::max());
+    auto tensor_array_shape = make_shared<v3::ShapeOf>(tensor_array, element::i64);
+    auto element_shape_no_two_dims = make_shared<v8::Slice>(tensor_array_shape, two_const, max_const, one_const);
+    // 2. compute the first and second dimensions k and n0
+    auto k = make_shared<v8::Gather>(tensor_array_shape, zero_const, zero_const);
+    auto n0 = make_shared<v8::Gather>(tensor_array_shape, one_const, zero_const);
+    auto k_by_n0 = make_shared<v1::Multiply>(k, n0);
+    // 3. compute the first output containing concatenated tensor elements
+    // it folds the first and second dimensions
+    auto new_shape = make_shared<v0::Concat>(OutputVector{k_by_n0, element_shape_no_two_dims}, 0);
+    auto concatenated_array = make_shared<v1::Reshape>(tensor_array, new_shape, false)->output(0);
+    concatenated_array = make_shared<v0::Convert>(concatenated_array, dtype);
+    concatenated_array.set_names({node.get_name() + ":0"});
+    // 4. compute the second output with length of each tensor element for the concatenation
+    auto lengths = make_shared<v3::Broadcast>(n0, k)->output(0);
+    lengths.set_names({node.get_name() + ":1"});
+
+    return {concatenated_array, lengths};
+}
+
+OutputVector translate_tensor_array_write_v3_op(const NodeContext& node) {
+    // TensorArrayWriteV3 pushes an element onto the tensor_array.
+    // and it has four inputs
+    // 0) handle, a Tensor of type resource. The handle to a TensorArray.
+    // 1) index, a Tensor of type int32. The location where to write tensor element
+    // 2) value, a Tensor. The tensor to write at the specified location
+    // 3) flow_in A Tensor of type float32. A float scalar that enforces proper chaining of operations.
+    // The operation has one output
+    // 0) read value from tensor array
+    default_op_checks(node, 4, {"TensorArrayWriteV3"});
+    auto handle = node.get_input(0);
+    auto index = node.get_input(1);
+    auto value = node.get_input(2);
+    // flow_in is used for transferring input tensor array
+    // tensor array has a rank equal to 1 + rank(element of tensor array)
+    // if it just initialized, its shape is equal to [tensor_array_size, 1, ..., 1]
+    // otherwise, it is equal to [tensor_array_size, <element shape>]
+    auto tensor_array = node.get_input(3);
+
+    // reshape index to have it of [1] shape
+    auto new_index_shape = make_shared<v0::Constant>(element::i32, Shape{1}, 1);
+    index = make_shared<v1::Reshape>(index, new_index_shape, false);
+
+    if (auto enter = as_type_ptr<Enter>(handle.get_node_shared_ptr())) {
+        if (as_type_ptr<TensorArrayV3>(enter->input_value(0).get_node_shared_ptr()) &&
+            value.get_partial_shape().rank().is_static()) {
+            // set tensor element rank that gets known from TensorArrayWriteV3 operation
+            auto tensor_array_v3 = as_type_ptr<TensorArrayV3>(enter->input_value(0).get_node_shared_ptr());
+            int64_t tensor_element_rank = value.get_partial_shape().rank().get_length();
+            tensor_array_v3->set_element_rank(tensor_element_rank);
+        }
+    }
+
+    // compute element shape in the input tensor array
+    auto tensor_array_shape = make_shared<v3::ShapeOf>(tensor_array, element::i32);
+
+    // compute the current size of tensor array
+    auto zero_const = make_shared<v0::Constant>(element::i32, Shape{1}, 0);
+    auto tensor_array_size = make_shared<v8::Gather>(tensor_array_shape, zero_const, zero_const);
+
+    // adjust tensor array to have the correct shape [size, <real element shape>] before value insertion
+    auto element_shape = make_shared<v3::ShapeOf>(value, element::i32);
+    auto new_tensor_array_shape = make_shared<v0::Concat>(OutputVector{tensor_array_size, element_shape}, 0);
+    tensor_array = make_shared<v3::Broadcast>(tensor_array, new_tensor_array_shape);
+
+    // update the resulted tensor using ScatterUpdate
+    value = make_shared<v0::Unsqueeze>(value, zero_const);
+    auto scatter_update = make_shared<v3::ScatterUpdate>(tensor_array, index, value, zero_const);
+
+    set_node_name(node.get_name(), scatter_update);
+    // use flow_out for transferring updated tensor array
+    return {scatter_update};
+}
+
+}  // namespace op
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/tensorflow/src/op/var_handle.cpp b/src/frontends/tensorflow/src/op/var_handle.cpp
index 50a5b73c449f8f..501df1c504309b 100644
--- a/src/frontends/tensorflow/src/op/var_handle.cpp
+++ b/src/frontends/tensorflow/src/op/var_handle.cpp
@@ -9,8 +9,9 @@
 #include "input_model.hpp"
 #include "ngraph/runtime/shared_buffer.hpp"
 #include "openvino/opsets/opset8.hpp"
+#include "openvino/runtime/shared_buffer.hpp"
 #include "openvino/util/mmap_object.hpp"
-#include "tensor_bundle.pb.h"
+#include "ov_tensorflow/tensor_bundle.pb.h"
 
 using namespace std;
 using namespace ov::opset8;
@@ -44,15 +45,12 @@ static std::shared_ptr<ov::Node> read_variable(std::shared_ptr<VariablesIndex> v
             node,
             static_cast<int64_t>(mapped_memory->size()) >= entry.offset() + entry.size(),
             "[TensorFlow Frontend] Internal error: Variable entry size is out of bounds of mapped memory size.");
-        OPENVINO_SUPPRESS_DEPRECATED_START
         return std::make_shared<Constant>(
             ov_type,
             shape,
-            std::make_shared<ngraph::runtime::SharedBuffer<std::shared_ptr<MappedMemory>>>(
-                mapped_memory->data() + entry.offset(),
-                entry.size(),
-                mapped_memory));
-        OPENVINO_SUPPRESS_DEPRECATED_END
+            std::make_shared<ov::SharedBuffer<std::shared_ptr<MappedMemory>>>(mapped_memory->data() + entry.offset(),
+                                                                              entry.size(),
+                                                                              mapped_memory));
     } else {
         std::vector<T> var_data;
         var_data.resize(size);
diff --git a/src/frontends/tensorflow/src/op/xla_conv_v2.cpp b/src/frontends/tensorflow/src/op/xla_conv_v2.cpp
index dc2e319c9a03b1..2d6ecdfa7bfb73 100644
--- a/src/frontends/tensorflow/src/op/xla_conv_v2.cpp
+++ b/src/frontends/tensorflow/src/op/xla_conv_v2.cpp
@@ -14,8 +14,8 @@
 #include "openvino/op/shape_of.hpp"
 #include "openvino/op/slice.hpp"
 #include "openvino/op/transpose.hpp"
+#include "ov_tensorflow/xla_data.pb.h"
 #include "utils.hpp"
-#include "xla_data.pb.h"
 
 using namespace std;
 using namespace ov;
diff --git a/src/frontends/tensorflow/src/op/xla_dot.cpp b/src/frontends/tensorflow/src/op/xla_dot.cpp
index e463494511f076..b4c38519ce210c 100644
--- a/src/frontends/tensorflow/src/op/xla_dot.cpp
+++ b/src/frontends/tensorflow/src/op/xla_dot.cpp
@@ -13,8 +13,8 @@
 #include "openvino/op/shape_of.hpp"
 #include "openvino/op/transpose.hpp"
 #include "openvino/op/unsqueeze.hpp"
+#include "ov_tensorflow/xla_data.pb.h"
 #include "utils.hpp"
-#include "xla_data.pb.h"
 
 using namespace std;
 using namespace ov;
diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp
index bc1a657faf54fb..3a4c570c6576fb 100644
--- a/src/frontends/tensorflow/src/op_table.cpp
+++ b/src/frontends/tensorflow/src/op_table.cpp
@@ -46,6 +46,14 @@ TF_OP_CONVERTER(translate_sparse_segment_sum_op);
 TF_OP_CONVERTER(translate_staticregexfullmatch_op);
 TF_OP_CONVERTER(translate_stringjoin_op);
 TF_OP_CONVERTER(translate_switch_op);
+TF_OP_CONVERTER(translate_tensor_array_close_v3_op);
+TF_OP_CONVERTER(translate_tensor_array_concat_v3_op);
+TF_OP_CONVERTER(translate_tensor_array_gather_v3_op);
+TF_OP_CONVERTER(translate_tensor_array_read_v3_op);
+TF_OP_CONVERTER(translate_tensor_array_scatter_v3_op);
+TF_OP_CONVERTER(translate_tensor_array_size_v3_op);
+TF_OP_CONVERTER(translate_tensor_array_v3_op);
+TF_OP_CONVERTER(translate_tensor_array_write_v3_op);
 TF_OP_CONVERTER(translate_varhandle_op);
 TF_OP_CONVERTER(translate_variable_op);
 TF_OP_CONVERTER(translate_varisinitialized_op);
@@ -174,6 +182,8 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
         {"Gather", CreatorFunction(translate_gather_op)},
         {"GatherV2", CreatorFunction(translate_gather_v2_op)},
         {"GatherNd", CreatorFunction(translate_gather_nd_op)},
+        {"GatherTree", CreatorFunction(translate_gather_tree_op)},
+        {"Addons>GatherTree", CreatorFunction(translate_gather_tree_op)},
         {"HashTable", CreatorFunction(translate_hash_table_op)},
         {"HashTableV2", CreatorFunction(translate_hash_table_op)},
         {"Identity", CreatorFunction(translate_identity_op)},
@@ -269,6 +279,14 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
         {"StatelessWhile", CreatorFunction(translate_while_op)},
         {"StridedSlice", CreatorFunction(translate_strided_slice_op)},
         {"Switch", CreatorFunction(translate_switch_op)},
+        {"TensorArrayCloseV3", CreatorFunction(translate_tensor_array_close_v3_op)},
+        {"TensorArrayConcatV3", CreatorFunction(translate_tensor_array_concat_v3_op)},
+        {"TensorArrayGatherV3", CreatorFunction(translate_tensor_array_gather_v3_op)},
+        {"TensorArrayReadV3", CreatorFunction(translate_tensor_array_read_v3_op)},
+        {"TensorArrayScatterV3", CreatorFunction(translate_tensor_array_scatter_v3_op)},
+        {"TensorArraySizeV3", CreatorFunction(translate_tensor_array_size_v3_op)},
+        {"TensorArrayV3", CreatorFunction(translate_tensor_array_v3_op)},
+        {"TensorArrayWriteV3", CreatorFunction(translate_tensor_array_write_v3_op)},
         {"TensorListFromTensor", CreatorFunction(translate_tensor_list_from_tensor_op)},
         {"TensorListGetItem", CreatorFunction(translate_tensor_list_get_item_op)},
         {"TensorListLength", CreatorFunction(translate_tensor_list_length_op)},
@@ -278,9 +296,12 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
         {"TensorListReserve", CreatorFunction(translate_tensor_list_reserve_op)},
         {"TensorListResize", CreatorFunction(translate_tensor_list_resize_op)},
         {"Tile", CreatorFunction(translate_tile_op)},
+        {"ToBool", CreatorFunction(translate_tobool_op)},
         {"TopK", CreatorFunction(translate_top_k_op)},
         {"TopKV2", CreatorFunction(translate_top_k_v2_op)},
         {"Transpose", CreatorFunction(translate_transpose_op)},
+        {"TruncateDiv", CreatorFunction(translate_truncate_div_op)},
+        {"TruncateMod", CreatorFunction(translate_truncate_mod_op)},
         {"Unpack", CreatorFunction(translate_unpack_op)},
         {"UnravelIndex", CreatorFunction(translate_unravel_index_op)},
         {"UnsortedSegmentSum", CreatorFunction(translate_unsorted_segment_sum_op)},
diff --git a/src/frontends/tensorflow/src/proto/any.proto b/src/frontends/tensorflow/src/proto/google/protobuf/any.proto
similarity index 100%
rename from src/frontends/tensorflow/src/proto/any.proto
rename to src/frontends/tensorflow/src/proto/google/protobuf/any.proto
diff --git a/src/frontends/tensorflow/src/proto/wrappers.proto b/src/frontends/tensorflow/src/proto/google/protobuf/wrappers.proto
similarity index 100%
rename from src/frontends/tensorflow/src/proto/wrappers.proto
rename to src/frontends/tensorflow/src/proto/google/protobuf/wrappers.proto
diff --git a/src/frontends/tensorflow/src/proto/allocation_description.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/allocation_description.proto
similarity index 100%
rename from src/frontends/tensorflow/src/proto/allocation_description.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/allocation_description.proto
diff --git a/src/frontends/tensorflow/src/proto/api_def.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/api_def.proto
similarity index 99%
rename from src/frontends/tensorflow/src/proto/api_def.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/api_def.proto
index 810aabc5a2c2c3..cbb581973d32bb 100644
--- a/src/frontends/tensorflow/src/proto/api_def.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/api_def.proto
@@ -21,7 +21,7 @@ option java_outer_classname = "ApiDefProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
 option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/api_def_go_proto";
-import "attr_value.proto";
+import "ov_tensorflow/attr_value.proto";
 
 // Used to specify and override the default API & behavior in the
 // generated code for client languages, from what you would get from
diff --git a/src/frontends/tensorflow/src/proto/attr_value.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/attr_value.proto
similarity index 96%
rename from src/frontends/tensorflow/src/proto/attr_value.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/attr_value.proto
index 3028176c02bcd7..b903c30cf99276 100644
--- a/src/frontends/tensorflow/src/proto/attr_value.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/attr_value.proto
@@ -14,9 +14,9 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "tensor.proto";
-import "tensor_shape.proto";
-import "types.proto";
+import "ov_tensorflow/tensor.proto";
+import "ov_tensorflow/tensor_shape.proto";
+import "ov_tensorflow/types.proto";
 
 option cc_enable_arenas = true;
 option java_outer_classname = "AttrValueProtos";
diff --git a/src/frontends/tensorflow/src/proto/cost_graph.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/cost_graph.proto
similarity index 97%
rename from src/frontends/tensorflow/src/proto/cost_graph.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/cost_graph.proto
index dad93a029babae..8e4d9788f49595 100644
--- a/src/frontends/tensorflow/src/proto/cost_graph.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/cost_graph.proto
@@ -14,8 +14,8 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "tensor_shape.proto";
-import "types.proto";
+import "ov_tensorflow/tensor_shape.proto";
+import "ov_tensorflow/types.proto";
 
 option cc_enable_arenas = true;
 option java_outer_classname = "CostGraphProtos";
diff --git a/src/frontends/tensorflow/src/proto/dataset_options.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/dataset_options.proto
similarity index 100%
rename from src/frontends/tensorflow/src/proto/dataset_options.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/dataset_options.proto
diff --git a/src/frontends/tensorflow/src/proto/device_attributes.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/device_attributes.proto
similarity index 100%
rename from src/frontends/tensorflow/src/proto/device_attributes.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/device_attributes.proto
diff --git a/src/frontends/tensorflow/src/proto/function.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/function.proto
similarity index 98%
rename from src/frontends/tensorflow/src/proto/function.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/function.proto
index 65a2acb3b91979..9e84731c983bb1 100644
--- a/src/frontends/tensorflow/src/proto/function.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/function.proto
@@ -14,9 +14,9 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "attr_value.proto";
-import "node_def.proto";
-import "op_def.proto";
+import "ov_tensorflow/attr_value.proto";
+import "ov_tensorflow/node_def.proto";
+import "ov_tensorflow/op_def.proto";
 
 option cc_enable_arenas = true;
 option java_outer_classname = "FunctionProtos";
diff --git a/src/frontends/tensorflow/src/proto/graph.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/graph.proto
similarity index 95%
rename from src/frontends/tensorflow/src/proto/graph.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/graph.proto
index c52e84022f9fcd..e047abeafe18b1 100644
--- a/src/frontends/tensorflow/src/proto/graph.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/graph.proto
@@ -14,9 +14,9 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "function.proto";
-import "node_def.proto";
-import "versions.proto";
+import "ov_tensorflow/function.proto";
+import "ov_tensorflow/node_def.proto";
+import "ov_tensorflow/versions.proto";
 
 option cc_enable_arenas = true;
 option java_outer_classname = "GraphProtos";
diff --git a/src/frontends/tensorflow/src/proto/graph_transfer_info.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/graph_transfer_info.proto
similarity index 98%
rename from src/frontends/tensorflow/src/proto/graph_transfer_info.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/graph_transfer_info.proto
index e42c1353695313..9e7d598e34a5c1 100644
--- a/src/frontends/tensorflow/src/proto/graph_transfer_info.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/graph_transfer_info.proto
@@ -14,7 +14,7 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "types.proto";
+import "ov_tensorflow/types.proto";
 
 option cc_enable_arenas = true;
 option java_outer_classname = "GraphTransferInfoProto";
diff --git a/src/frontends/tensorflow/src/proto/kernel_def.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/kernel_def.proto
similarity index 98%
rename from src/frontends/tensorflow/src/proto/kernel_def.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/kernel_def.proto
index 5e6b839d31582e..88142d3de9584d 100644
--- a/src/frontends/tensorflow/src/proto/kernel_def.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/kernel_def.proto
@@ -14,7 +14,7 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "attr_value.proto";
+import "ov_tensorflow/attr_value.proto";
 
 option cc_enable_arenas = true;
 option java_outer_classname = "KernelDefProtos";
diff --git a/src/frontends/tensorflow/src/proto/log_memory.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/log_memory.proto
similarity index 98%
rename from src/frontends/tensorflow/src/proto/log_memory.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/log_memory.proto
index 96dac4c9ca370d..62489f0e0b8df4 100644
--- a/src/frontends/tensorflow/src/proto/log_memory.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/log_memory.proto
@@ -14,7 +14,7 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "tensor_description.proto";
+import "ov_tensorflow/tensor_description.proto";
 
 option cc_enable_arenas = true;
 option java_outer_classname = "LogMemoryProtos";
diff --git a/src/frontends/tensorflow/src/proto/meta_graph.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/meta_graph.proto
similarity index 97%
rename from src/frontends/tensorflow/src/proto/meta_graph.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/meta_graph.proto
index b6918fa853bf8c..255fb6efeb2f9e 100644
--- a/src/frontends/tensorflow/src/proto/meta_graph.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/meta_graph.proto
@@ -14,14 +14,14 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "any.proto";
-import "graph.proto";
-import "op_def.proto";
-import "tensor_shape.proto";
-import "types.proto";
-import "saved_object_graph.proto";
-import "saver.proto";
-import "struct.proto";
+import "google/protobuf/any.proto";
+import "ov_tensorflow/graph.proto";
+import "ov_tensorflow/op_def.proto";
+import "ov_tensorflow/tensor_shape.proto";
+import "ov_tensorflow/types.proto";
+import "ov_tensorflow/saved_object_graph.proto";
+import "ov_tensorflow/saver.proto";
+import "ov_tensorflow/struct.proto";
 
 option cc_enable_arenas = true;
 option java_outer_classname = "MetaGraphProtos";
diff --git a/src/frontends/tensorflow/src/proto/model.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/model.proto
similarity index 100%
rename from src/frontends/tensorflow/src/proto/model.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/model.proto
diff --git a/src/frontends/tensorflow/src/proto/node_def.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/node_def.proto
similarity index 99%
rename from src/frontends/tensorflow/src/proto/node_def.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/node_def.proto
index 573d0f901dd732..b8f3a017a30fc5 100644
--- a/src/frontends/tensorflow/src/proto/node_def.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/node_def.proto
@@ -14,7 +14,7 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "attr_value.proto";
+import "ov_tensorflow/attr_value.proto";
 
 option cc_enable_arenas = true;
 option java_outer_classname = "NodeProto";
diff --git a/src/frontends/tensorflow/src/proto/op_def.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/op_def.proto
similarity index 98%
rename from src/frontends/tensorflow/src/proto/op_def.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/op_def.proto
index 4d5c66c39e16d7..31493fed26ce55 100644
--- a/src/frontends/tensorflow/src/proto/op_def.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/op_def.proto
@@ -18,9 +18,10 @@ option java_outer_classname = "OpDefProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
 option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/op_def_go_proto";
-import "attr_value.proto";
-import "types.proto";
-import "resource_handle.proto";
+
+import "ov_tensorflow/attr_value.proto";
+import "ov_tensorflow/types.proto";
+import "ov_tensorflow/resource_handle.proto";
 
 // Defines an operation. A NodeDef in a GraphDef specifies an Op by
 // using the "op" field which should match the name of a OpDef.
diff --git a/src/frontends/tensorflow/src/proto/reader_base.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/reader_base.proto
similarity index 100%
rename from src/frontends/tensorflow/src/proto/reader_base.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/reader_base.proto
diff --git a/src/frontends/tensorflow/src/proto/remote_fused_graph_execute_info.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/remote_fused_graph_execute_info.proto
similarity index 94%
rename from src/frontends/tensorflow/src/proto/remote_fused_graph_execute_info.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/remote_fused_graph_execute_info.proto
index abfcfdbec08007..3b17878e127cf9 100644
--- a/src/frontends/tensorflow/src/proto/remote_fused_graph_execute_info.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/remote_fused_graph_execute_info.proto
@@ -14,9 +14,9 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "graph.proto";
-import "tensor_shape.proto";
-import "types.proto";
+import "ov_tensorflow/graph.proto";
+import "ov_tensorflow/tensor_shape.proto";
+import "ov_tensorflow/types.proto";
 
 option cc_enable_arenas = true;
 option java_outer_classname = "RemoteFusedGraphExecuteInfoProto";
diff --git a/src/frontends/tensorflow/src/proto/resource_handle.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/resource_handle.proto
similarity index 96%
rename from src/frontends/tensorflow/src/proto/resource_handle.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/resource_handle.proto
index 4d872b6d9d8074..19b4dcc3b84ded 100644
--- a/src/frontends/tensorflow/src/proto/resource_handle.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/resource_handle.proto
@@ -14,8 +14,8 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "tensor_shape.proto";
-import "types.proto";
+import "ov_tensorflow/tensor_shape.proto";
+import "ov_tensorflow/types.proto";
 
 option cc_enable_arenas = true;
 option java_outer_classname = "ResourceHandle";
diff --git a/src/frontends/tensorflow/src/proto/saved_model.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_model.proto
similarity index 97%
rename from src/frontends/tensorflow/src/proto/saved_model.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/saved_model.proto
index 0034fdfd46dcf8..f8660655229245 100644
--- a/src/frontends/tensorflow/src/proto/saved_model.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_model.proto
@@ -14,7 +14,7 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "meta_graph.proto";
+import "ov_tensorflow/meta_graph.proto";
 
 option cc_enable_arenas = true;
 option java_outer_classname = "SavedModelProtos";
diff --git a/src/frontends/tensorflow/src/proto/saved_object_graph.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_object_graph.proto
similarity index 97%
rename from src/frontends/tensorflow/src/proto/saved_object_graph.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/saved_object_graph.proto
index 671441075c3628..d0b2170044966c 100644
--- a/src/frontends/tensorflow/src/proto/saved_object_graph.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_object_graph.proto
@@ -14,13 +14,13 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "any.proto";
-import "tensor_shape.proto";
-import "types.proto";
-import "variable.proto";
-import "versions.proto";
-import "struct.proto";
-import "trackable_object_graph.proto";
+import "google/protobuf/any.proto";
+import "ov_tensorflow/tensor_shape.proto";
+import "ov_tensorflow/types.proto";
+import "ov_tensorflow/variable.proto";
+import "ov_tensorflow/versions.proto";
+import "ov_tensorflow/struct.proto";
+import "ov_tensorflow/trackable_object_graph.proto";
 
 option cc_enable_arenas = true;
 option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
diff --git a/src/frontends/tensorflow/src/proto/saved_tensor_slice.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_tensor_slice.proto
similarity index 94%
rename from src/frontends/tensorflow/src/proto/saved_tensor_slice.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/saved_tensor_slice.proto
index 4645b2bdca9b89..9e628752bb1f5c 100644
--- a/src/frontends/tensorflow/src/proto/saved_tensor_slice.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_tensor_slice.proto
@@ -35,11 +35,11 @@ option java_outer_classname = "SavedTensorSliceProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.util";
 
-import "tensor_shape.proto";
-import "tensor_slice.proto";
-import "tensor.proto";
-import "types.proto";
-import "versions.proto";
+import "ov_tensorflow/tensor_shape.proto";
+import "ov_tensorflow/tensor_slice.proto";
+import "ov_tensorflow/tensor.proto";
+import "ov_tensorflow/types.proto";
+import "ov_tensorflow/versions.proto";
 
 // Metadata describing the set of slices of the same tensor saved in a
 // checkpoint file.
diff --git a/src/frontends/tensorflow/src/proto/saver.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/saver.proto
similarity index 100%
rename from src/frontends/tensorflow/src/proto/saver.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/saver.proto
diff --git a/src/frontends/tensorflow/src/proto/step_stats.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/step_stats.proto
similarity index 97%
rename from src/frontends/tensorflow/src/proto/step_stats.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/step_stats.proto
index 04e0864a5aec49..027a1d79ee22e4 100644
--- a/src/frontends/tensorflow/src/proto/step_stats.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/step_stats.proto
@@ -14,8 +14,8 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "allocation_description.proto";
-import "tensor_description.proto";
+import "ov_tensorflow/allocation_description.proto";
+import "ov_tensorflow/tensor_description.proto";
 
 option cc_enable_arenas = true;
 option java_outer_classname = "StepStatsProtos";
diff --git a/src/frontends/tensorflow/src/proto/struct.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/struct.proto
similarity index 98%
rename from src/frontends/tensorflow/src/proto/struct.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/struct.proto
index d03201b685ac79..4126bd98c4a3d3 100644
--- a/src/frontends/tensorflow/src/proto/struct.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/struct.proto
@@ -14,9 +14,9 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "tensor.proto";
-import "tensor_shape.proto";
-import "types.proto";
+import "ov_tensorflow/tensor.proto";
+import "ov_tensorflow/tensor_shape.proto";
+import "ov_tensorflow/types.proto";
 
 option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
 
diff --git a/src/frontends/tensorflow/src/proto/summary.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/summary.proto
similarity index 99%
rename from src/frontends/tensorflow/src/proto/summary.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/summary.proto
index 9e4b95f4bc3348..ce326176947dd4 100644
--- a/src/frontends/tensorflow/src/proto/summary.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/summary.proto
@@ -14,7 +14,7 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "tensor.proto";
+import "ov_tensorflow/tensor.proto";
 
 option cc_enable_arenas = true;
 option java_outer_classname = "SummaryProtos";
diff --git a/src/frontends/tensorflow/src/proto/tensor.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor.proto
similarity index 96%
rename from src/frontends/tensorflow/src/proto/tensor.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/tensor.proto
index c2e1fd7eb6b627..42f063536e09e0 100644
--- a/src/frontends/tensorflow/src/proto/tensor.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor.proto
@@ -14,9 +14,9 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "resource_handle.proto";
-import "tensor_shape.proto";
-import "types.proto";
+import "ov_tensorflow/resource_handle.proto";
+import "ov_tensorflow/tensor_shape.proto";
+import "ov_tensorflow/types.proto";
 
 option cc_enable_arenas = true;
 option java_outer_classname = "TensorProtos";
diff --git a/src/frontends/tensorflow/src/proto/tensor_bundle.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_bundle.proto
similarity index 94%
rename from src/frontends/tensorflow/src/proto/tensor_bundle.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_bundle.proto
index 43fea749b42172..21af38195c4e11 100644
--- a/src/frontends/tensorflow/src/proto/tensor_bundle.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_bundle.proto
@@ -14,10 +14,10 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "tensor_shape.proto";
-import "tensor_slice.proto";
-import "types.proto";
-import "versions.proto";
+import "ov_tensorflow/tensor_shape.proto";
+import "ov_tensorflow/tensor_slice.proto";
+import "ov_tensorflow/types.proto";
+import "ov_tensorflow/versions.proto";
 
 option cc_enable_arenas = true;
 option java_outer_classname = "TensorBundleProtos";
diff --git a/src/frontends/tensorflow/src/proto/tensor_description.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_description.proto
similarity index 90%
rename from src/frontends/tensorflow/src/proto/tensor_description.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_description.proto
index 3ab9c310a6f127..c03e1311c1f386 100644
--- a/src/frontends/tensorflow/src/proto/tensor_description.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_description.proto
@@ -14,9 +14,9 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "allocation_description.proto";
-import "tensor_shape.proto";
-import "types.proto";
+import "ov_tensorflow/allocation_description.proto";
+import "ov_tensorflow/tensor_shape.proto";
+import "ov_tensorflow/types.proto";
 
 option cc_enable_arenas = true;
 option java_outer_classname = "TensorDescriptionProtos";
diff --git a/src/frontends/tensorflow/src/proto/tensor_shape.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_shape.proto
similarity index 100%
rename from src/frontends/tensorflow/src/proto/tensor_shape.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_shape.proto
diff --git a/src/frontends/tensorflow/src/proto/tensor_slice.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_slice.proto
similarity index 100%
rename from src/frontends/tensorflow/src/proto/tensor_slice.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_slice.proto
diff --git a/src/frontends/tensorflow/src/proto/trackable_object_graph.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/trackable_object_graph.proto
similarity index 98%
rename from src/frontends/tensorflow/src/proto/trackable_object_graph.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/trackable_object_graph.proto
index f4a8e4da34f129..f0a9617432f617 100644
--- a/src/frontends/tensorflow/src/proto/trackable_object_graph.proto
+++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/trackable_object_graph.proto
@@ -14,7 +14,7 @@ syntax = "proto3";
 
 package tensorflow;
 
-import "wrappers.proto";
+import "google/protobuf/wrappers.proto";
 
 option cc_enable_arenas = true;
 option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
diff --git a/src/frontends/tensorflow/src/proto/types.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/types.proto
similarity index 100%
rename from src/frontends/tensorflow/src/proto/types.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/types.proto
diff --git a/src/frontends/tensorflow/src/proto/variable.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/variable.proto
similarity index 100%
rename from src/frontends/tensorflow/src/proto/variable.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/variable.proto
diff --git a/src/frontends/tensorflow/src/proto/versions.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/versions.proto
similarity index 100%
rename from src/frontends/tensorflow/src/proto/versions.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/versions.proto
diff --git a/src/frontends/tensorflow/src/proto/xla_data.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/xla_data.proto
similarity index 100%
rename from src/frontends/tensorflow/src/proto/xla_data.proto
rename to src/frontends/tensorflow/src/proto/ov_tensorflow/xla_data.proto
diff --git a/src/frontends/tensorflow/src/tf_utils.cpp b/src/frontends/tensorflow/src/tf_utils.cpp
index c72e8e7bb9080a..e298f49f92889f 100644
--- a/src/frontends/tensorflow/src/tf_utils.cpp
+++ b/src/frontends/tensorflow/src/tf_utils.cpp
@@ -423,7 +423,7 @@ shared_ptr<v5::Loop> create_loop_for_tf_while(const std::string& while_node_name
     FRONT_END_GENERAL_CHECK(
         cond_results.size() == 1 && cond_results[0],
         "[TensorFlow Frontend] Internal error or inconsistent model: condition body must contain one Result node.");
-    auto body_condition_output_idx = static_cast<int64_t>(body_results.size());
+    auto body_condition_output_idx = body_results.size();
     body_model->add_results(cond_results);
 
     // type setting for body graph parameters is needed for TensorList support since DT_VARIANT type is present
@@ -435,14 +435,18 @@ shared_ptr<v5::Loop> create_loop_for_tf_while(const std::string& while_node_name
     loop->set_function(body_model);
 
     // body_results may contain less nodes than body_params that means back edge exists not for all body_params
-    for (size_t input_ind = 0; input_ind < static_cast<size_t>(body_condition_output_idx); ++input_ind) {
+    for (size_t input_ind = 0; input_ind < body_condition_output_idx; ++input_ind) {
         loop->set_merged_input(body_params[input_ind], ov_inputs[input_ind], body_results[input_ind]->input_value(0));
     }
-    loop->set_special_body_ports({-1, body_condition_output_idx});
+    loop->set_special_body_ports({-1, static_cast<int64_t>(body_condition_output_idx)});
+    // set invariant inputs for the loop
+    for (size_t input_ind = body_condition_output_idx; input_ind < input_size; ++input_ind) {
+        loop->set_invariant_input(body_params[input_ind], ov_inputs[input_ind]);
+    }
 
     // set external outputs for Loop node
     // do not get execution condition outside of the Loop node
-    for (size_t output_ind = 0; output_ind < static_cast<size_t>(body_condition_output_idx); ++output_ind) {
+    for (size_t output_ind = 0; output_ind < body_condition_output_idx; ++output_ind) {
         loop->get_iter_value(body_results[output_ind]);
     }
     loop->validate_and_infer_types();
diff --git a/src/frontends/tensorflow/src/tf_utils.hpp b/src/frontends/tensorflow/src/tf_utils.hpp
index 5de9029a816e6c..861fb56f552685 100644
--- a/src/frontends/tensorflow/src/tf_utils.hpp
+++ b/src/frontends/tensorflow/src/tf_utils.hpp
@@ -4,8 +4,6 @@
 
 #pragma once
 
-#include "attr_value.pb.h"
-#include "node_def.pb.h"
 #include "openvino/core/node.hpp"
 #include "openvino/core/partial_shape.hpp"
 #include "openvino/core/runtime_attribute.hpp"
@@ -14,9 +12,11 @@
 #include "openvino/frontend/node_context.hpp"
 #include "openvino/op/loop.hpp"
 #include "openvino/runtime/tensor.hpp"
-#include "tensor.pb.h"
-#include "tensor_shape.pb.h"
-#include "types.pb.h"
+#include "ov_tensorflow/attr_value.pb.h"
+#include "ov_tensorflow/node_def.pb.h"
+#include "ov_tensorflow/tensor.pb.h"
+#include "ov_tensorflow/tensor_shape.pb.h"
+#include "ov_tensorflow/types.pb.h"
 
 namespace ov {
 namespace frontend {
diff --git a/src/frontends/tensorflow/src/variables_index.cpp b/src/frontends/tensorflow/src/variables_index.cpp
index c24ffd8112bd09..2dcf3faf9e0b0c 100644
--- a/src/frontends/tensorflow/src/variables_index.cpp
+++ b/src/frontends/tensorflow/src/variables_index.cpp
@@ -11,8 +11,8 @@
 #include "graph_iterator_saved_model.hpp"
 #include "openvino/core/type/element_type.hpp"
 #include "openvino/util/mmap_object.hpp"
-#include "tensor_bundle.pb.h"
-#include "trackable_object_graph.pb.h"
+#include "ov_tensorflow/tensor_bundle.pb.h"
+#include "ov_tensorflow/trackable_object_graph.pb.h"
 
 #ifdef ENABLE_SNAPPY_COMPRESSION
 #    include "snappy.h"
diff --git a/src/frontends/tensorflow/src/variables_index.hpp b/src/frontends/tensorflow/src/variables_index.hpp
index df852a627994e7..aa805b264bc3d1 100644
--- a/src/frontends/tensorflow/src/variables_index.hpp
+++ b/src/frontends/tensorflow/src/variables_index.hpp
@@ -9,7 +9,7 @@
 #include "graph_iterator_proto.hpp"
 #include "openvino/util/file_util.hpp"
 #include "openvino/util/mmap_object.hpp"
-#include "saved_model.pb.h"
+#include "ov_tensorflow/saved_model.pb.h"
 
 namespace ov {
 namespace frontend {
diff --git a/src/frontends/tensorflow/tests/convert_model.cpp b/src/frontends/tensorflow/tests/convert_model.cpp
index fc00a6784963e3..f6ec18cf9cc12c 100644
--- a/src/frontends/tensorflow/tests/convert_model.cpp
+++ b/src/frontends/tensorflow/tests/convert_model.cpp
@@ -15,7 +15,8 @@ static const std::vector<std::string> models{
     std::string("2in_2out/2in_2out.pb"),
     std::string("forward_edge_model/forward_edge_model.pbtxt"),
     std::string("forward_edge_model2/forward_edge_model2.pbtxt"),
-    std::string("concat_with_non_constant_axis/concat_with_non_constant_axis.pbtxt")};
+    std::string("concat_with_non_constant_axis/concat_with_non_constant_axis.pbtxt"),
+    std::string("gather_tree_model/gather_tree_model.pbtxt")};
 
 INSTANTIATE_TEST_SUITE_P(TFConvertModelTest,
                          FrontEndConvertModelTest,
diff --git a/src/frontends/tensorflow/tests/test_models/models_pbtxt/gather_tree_model.pbtxt b/src/frontends/tensorflow/tests/test_models/models_pbtxt/gather_tree_model.pbtxt
new file mode 100644
index 00000000000000..54351036dd72a2
--- /dev/null
+++ b/src/frontends/tensorflow/tests/test_models/models_pbtxt/gather_tree_model.pbtxt
@@ -0,0 +1,103 @@
+node {
+  name: "step_ids"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 20
+        }
+        dim {
+          size: 2
+        }
+        dim {
+          size: 30
+        }
+      }
+    }
+  }
+}
+node {
+  name: "parent_ids"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 20
+        }
+        dim {
+          size: 2
+        }
+        dim {
+          size: 30
+        }
+      }
+    }
+  }
+}
+node {
+  name: "max_seq_len"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 2
+        }
+      }
+    }
+  }
+}
+node {
+  name: "end_token"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+      }
+    }
+  }
+}
+node {
+  name: "Addons>GatherTree"
+  op: "Addons>GatherTree"
+  input: "step_ids"
+  input: "parent_ids"
+  input: "max_seq_len"
+  input: "end_token"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+}
diff --git a/src/frontends/tensorflow/tests/tf_utils.cpp b/src/frontends/tensorflow/tests/tf_utils.cpp
index 120b8ffab8659d..d742b53dcf8704 100644
--- a/src/frontends/tensorflow/tests/tf_utils.cpp
+++ b/src/frontends/tensorflow/tests/tf_utils.cpp
@@ -16,6 +16,8 @@ namespace frontend {
 namespace tensorflow {
 namespace tests {
 
+const std::string TF_FE = "tf";
+
 shared_ptr<Model> convert_model(const string& model_path,
                                 const ConversionExtension::Ptr& conv_ext,
                                 const vector<string>& input_names,
diff --git a/src/frontends/tensorflow/tests/tf_utils.hpp b/src/frontends/tensorflow/tests/tf_utils.hpp
index 1c48a95c85fee8..80addd43d61e9e 100644
--- a/src/frontends/tensorflow/tests/tf_utils.hpp
+++ b/src/frontends/tensorflow/tests/tf_utils.hpp
@@ -14,7 +14,7 @@ namespace ov {
 namespace frontend {
 namespace tensorflow {
 namespace tests {
-static const std::string TF_FE = "tf";
+extern const std::string TF_FE;
 
 // a wrapper to create TensorFlow Frontend and configure the conversion pipeline
 // by registering new translator via extension, specifying (new) inputs, their shapes and types
diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp
index 54f1dff243efd1..29efb83547d263 100644
--- a/src/frontends/tensorflow_common/include/common_op_table.hpp
+++ b/src/frontends/tensorflow_common/include/common_op_table.hpp
@@ -72,6 +72,7 @@ OP_CONVERTER_NAMED(translate_fused_batch_norm_op);
 OP_CONVERTER(translate_gather_op);
 OP_CONVERTER(translate_gather_v2_op);
 OP_CONVERTER(translate_gather_nd_op);
+OP_CONVERTER(translate_gather_tree_op);
 OP_CONVERTER(translate_identity_op);
 OP_CONVERTER(translate_identity_n_op);
 OP_CONVERTER(translate_input_arg_op);
@@ -142,9 +143,12 @@ OP_CONVERTER(translate_tensor_list_set_item_op);
 OP_CONVERTER(translate_tensor_list_stack_op);
 OP_CONVERTER(translate_tensor_list_resize_op);
 OP_CONVERTER(translate_tile_op);
+OP_CONVERTER(translate_tobool_op);
 OP_CONVERTER_NAMED(translate_top_k_op);
 OP_CONVERTER_NAMED(translate_top_k_v2_op);
 OP_CONVERTER(translate_transpose_op);
+OP_CONVERTER(translate_truncate_div_op);
+OP_CONVERTER(translate_truncate_mod_op);
 OP_CONVERTER(translate_unpack_op);
 OP_CONVERTER(translate_unravel_index_op);
 OP_CONVERTER(translate_unsorted_segment_sum_op);
diff --git a/src/frontends/tensorflow_common/include/helper_ops/merge.hpp b/src/frontends/tensorflow_common/include/helper_ops/merge.hpp
index eb7e611f3e21f0..6261dd0e67c229 100644
--- a/src/frontends/tensorflow_common/include/helper_ops/merge.hpp
+++ b/src/frontends/tensorflow_common/include/helper_ops/merge.hpp
@@ -33,20 +33,34 @@ class Merge : public InternalOperation {
         ov::PartialShape output_data_shape = ov::PartialShape::dynamic();
 
         auto input_size = get_input_size();
-        bool merge_output_shape = true;
         for (size_t input_ind = 0; input_ind < input_size; ++input_ind) {
             auto input_type = get_input_element_type(input_ind);
             if (input_type.is_static()) {
                 output_data_type = input_type;
             }
 
-            // check if it still needs to merge input shapes
-            // if yes, it tries to merge them
-            if (merge_output_shape &&
-                !PartialShape::merge_into(output_data_shape, get_input_partial_shape(input_ind))) {
-                merge_output_shape = false;
-                // reset output shape to dynamic rank
+            auto input_shape = get_input_partial_shape(input_ind);
+            if (input_shape.rank().is_dynamic()) {
+                continue;
+            }
+
+            if (output_data_shape.rank().is_dynamic()) {
+                // firstly met shape of static rank
+                // immediately use this shape of static rank
+                output_data_shape = input_shape;
+            } else if (output_data_shape.rank().is_static() &&
+                       output_data_shape.rank().get_length() != input_shape.rank().get_length()) {
+                // different inputs have different rank means output must be of a dynamic rank
                 output_data_shape = ov::PartialShape::dynamic();
+                break;
+            } else {
+                auto output_rank = output_data_shape.rank().get_length();
+                for (int64_t dim_ind = 0; dim_ind < output_rank; ++dim_ind) {
+                    if (input_shape[dim_ind] != output_data_shape[dim_ind]) {
+                        // different inputs can have different dimensions so it must combine them
+                        output_data_shape[dim_ind] = ov::Dimension::dynamic();
+                    }
+                }
             }
         }
 
diff --git a/src/frontends/tensorflow_common/include/helper_ops/next_iteration.hpp b/src/frontends/tensorflow_common/include/helper_ops/next_iteration.hpp
index eb262b4307af7f..e556c9ad4478da 100644
--- a/src/frontends/tensorflow_common/include/helper_ops/next_iteration.hpp
+++ b/src/frontends/tensorflow_common/include/helper_ops/next_iteration.hpp
@@ -43,6 +43,10 @@ class NextIteration : public InternalOperation {
         producer_output_port_idx = m_producer_output_port_idx;
     }
 
+    void set_output_shape_and_type(const ov::PartialShape& output_shape, const ov::element::Type& output_type) {
+        set_output_type(0, output_type, output_shape);
+    }
+
 private:
     bool m_back_edge_set;
     std::string m_producer_name;
diff --git a/src/frontends/tensorflow_common/include/helper_ops/tensor_array.hpp b/src/frontends/tensorflow_common/include/helper_ops/tensor_array.hpp
new file mode 100644
index 00000000000000..030ff12d5b68c5
--- /dev/null
+++ b/src/frontends/tensorflow_common/include/helper_ops/tensor_array.hpp
@@ -0,0 +1,60 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+
+#include "internal_operation.hpp"
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+
+// Internal operation for TensorArrayV3
+// An array of Tensors of given size
+// It has two outputs:
+// 1. handle - resource (a reference) for tensor array
+// 2. flow_out - float type will be used for storing tensor array
+class TensorArrayV3 : public InternalOperation {
+public:
+    OPENVINO_OP("TensorArrayV3", "ov::frontend::tensorflow", InternalOperation);
+
+    TensorArrayV3(const Output<Node>& size,
+                  const ov::element::Type element_type,
+                  const std::shared_ptr<DecoderBase>& decoder = std::make_shared<DecoderFake>())
+        : InternalOperation(decoder, OutputVector{size}, 2, "TensorArrayV3"),
+          m_element_type(element_type),
+          m_element_rank(-1) {
+        validate_and_infer_types();
+    }
+
+    void validate_and_infer_types() override {
+        set_output_type(0, m_element_type, ov::PartialShape::dynamic());
+        set_output_type(1, m_element_type, ov::PartialShape::dynamic());
+    }
+
+    ov::element::Type get_element_type() const {
+        return m_element_type;
+    }
+
+    int64_t get_element_rank() const {
+        return m_element_rank;
+    }
+
+    void set_element_rank(int64_t element_rank) {
+        FRONT_END_GENERAL_CHECK(
+            element_rank >= 0,
+            "[TensorFlow Frontend] internal error: negavite element rank tries to set for TensorArrayV3");
+        m_element_rank = element_rank;
+    }
+
+private:
+    ov::element::Type m_element_type;
+    int64_t m_element_rank;
+};
+
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/tensorflow_common/include/helper_transforms/tensor_array_v3_replacer.hpp b/src/frontends/tensorflow_common/include/helper_transforms/tensor_array_v3_replacer.hpp
new file mode 100644
index 00000000000000..42e5a0ad754ea7
--- /dev/null
+++ b/src/frontends/tensorflow_common/include/helper_transforms/tensor_array_v3_replacer.hpp
@@ -0,0 +1,29 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <utility>
+
+#include "openvino/pass/graph_rewrite.hpp"
+#include "openvino/pass/pass.hpp"
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+namespace pass {
+
+// This transformation replaces internal operation TensorArrayV3 with a Constant
+// that simulates initial state of tensor array container
+class TensorArrayV3Replacer : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ov::frontend::tensorflow::pass::TensorArrayV3Replacer");
+    TensorArrayV3Replacer();
+};
+
+}  // namespace pass
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/tensorflow_common/src/helper_transforms/tensor_array_v3_replacer.cpp b/src/frontends/tensorflow_common/src/helper_transforms/tensor_array_v3_replacer.cpp
new file mode 100644
index 00000000000000..72ed922511cd98
--- /dev/null
+++ b/src/frontends/tensorflow_common/src/helper_transforms/tensor_array_v3_replacer.cpp
@@ -0,0 +1,71 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "helper_transforms/tensor_array_v3_replacer.hpp"
+
+#include "helper_ops/tensor_array.hpp"
+#include "openvino/op/broadcast.hpp"
+#include "openvino/op/concat.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/reshape.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "transformations/utils/utils.hpp"
+
+using namespace std;
+using namespace ov;
+using namespace ov::op;
+using namespace ov::pass;
+
+ov::frontend::tensorflow::pass::TensorArrayV3Replacer::TensorArrayV3Replacer() {
+    auto tensor_array_v3 = pattern::wrap_type<TensorArrayV3>();
+
+    matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        NodeRegistry rg;
+
+        auto tensor_array_v3 = dynamic_pointer_cast<TensorArrayV3>(m.get_match_root());
+        if (!tensor_array_v3) {
+            return false;
+        }
+
+        int32_t tensor_element_rank = static_cast<int32_t>(tensor_array_v3->get_element_rank());
+        if (tensor_element_rank < 0) {
+            return false;
+        }
+
+        // retrieve all TensorArrayV3 inputs
+        auto size = tensor_array_v3->input_value(0);
+        auto element_type = tensor_array_v3->get_element_type();
+
+        // adjust size to have it of shape [1] for further concatenation with element shape
+        auto new_size_shape = rg.make<v0::Constant>(element::i32, Shape{1}, 1);
+        auto new_size = rg.make<v1::Reshape>(size, new_size_shape, false);
+
+        // create a vector of size element_shape.rank() with ones
+        // and compute a shape of initial tensor array [size, 1, ..., 1]
+        Output<Node> target_shape;
+        if (tensor_element_rank == 0) {
+            target_shape = new_size->output(0);
+        } else {
+            vector<int32_t> ones(tensor_element_rank, 1);
+            auto ones_const = rg.make<v0::Constant>(element::i32, Shape{ones.size()}, ones);
+            target_shape = rg.make<v0::Concat>(OutputVector{new_size, ones_const}, 0)->output(0);
+        }
+
+        // create initial tensor array
+        auto scalar_value = make_shared<v0::Constant>(element_type, Shape{}, vector<int32_t>{0});
+        auto initial_tensor_array = make_shared<v3::Broadcast>(scalar_value, target_shape);
+
+        // preserve names of the node and the output tensor
+        initial_tensor_array->set_friendly_name(tensor_array_v3->get_friendly_name());
+        copy_runtime_info(tensor_array_v3, rg.get());
+
+        ov::replace_node(tensor_array_v3,
+                         ov::OutputVector{initial_tensor_array->output(0), initial_tensor_array->output(0)});
+        return true;
+    };
+
+    auto m =
+        std::make_shared<pattern::Matcher>(tensor_array_v3, "ov::frontend::tensorflow::pass::TensorArrayV3Replacer");
+    register_matcher(m, callback);
+}
diff --git a/src/frontends/tensorflow_common/src/op/gather_tree.cpp b/src/frontends/tensorflow_common/src/op/gather_tree.cpp
new file mode 100644
index 00000000000000..e349efe6784e64
--- /dev/null
+++ b/src/frontends/tensorflow_common/src/op/gather_tree.cpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/op/gather_tree.hpp"
+
+#include "common_op_table.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/reshape.hpp"
+
+using namespace std;
+using namespace ov::op;
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+namespace op {
+
+OutputVector translate_gather_tree_op(const NodeContext& node) {
+    default_op_checks(node, 4, {"GatherTree", "Addons>GatherTree"});
+    auto step_ids = node.get_input(0);
+    auto parent_ids = node.get_input(1);
+    auto max_sequence_lengths = node.get_input(2);
+    auto end_token = node.get_input(3);
+
+    // adjust end_token that must be a scalar
+    auto new_shape_end_token = make_shared<v0::Constant>(element::i32, Shape{0}, vector<int32_t>{});
+    end_token = make_shared<v1::Reshape>(end_token, new_shape_end_token, false);
+
+    auto gather_tree = make_shared<v1::GatherTree>(step_ids, parent_ids, max_sequence_lengths, end_token);
+    set_node_name(node.get_name(), gather_tree);
+
+    return {gather_tree};
+}
+
+}  // namespace op
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/tensorflow_common/src/op/tobool.cpp b/src/frontends/tensorflow_common/src/op/tobool.cpp
new file mode 100644
index 00000000000000..a8d595800a4f5c
--- /dev/null
+++ b/src/frontends/tensorflow_common/src/op/tobool.cpp
@@ -0,0 +1,68 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "common_op_table.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/equal.hpp"
+#include "openvino/op/greater.hpp"
+#include "openvino/op/logical_and.hpp"
+#include "openvino/op/logical_or.hpp"
+#include "openvino/op/not_equal.hpp"
+#include "openvino/op/reduce_prod.hpp"
+#include "openvino/op/select.hpp"
+#include "openvino/op/shape_of.hpp"
+
+using namespace std;
+using namespace ov::op;
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+namespace op {
+OutputVector translate_tobool_op(const NodeContext& node) {
+    // (rank(x) == 0 && x != 0) || (rank > 0 && ReduceProd(ShapeOf(x))) > 0
+
+    default_op_checks(node, 1, {"ToBool"});
+    auto x = node.get_input(0);
+
+    // prepare auxiliary zero and zero constants of the same type as the inputs
+    auto zero = create_same_type_const_scalar<int32_t>(x, 0);
+    auto zero_2 = make_shared<v0::Constant>(element::i32, Shape{}, 0);
+    auto true_const = make_shared<v0::Constant>(element::boolean, Shape{}, true);
+    auto false_const = make_shared<v0::Constant>(element::boolean, Shape{}, false);
+    // compute a mask to get rank(x) == 0
+    auto x_rank = compute_subgraph_scalar_rank(x, element::i32);
+
+    // compute rank(x) == 0
+    auto is_zero = make_shared<v1::Equal>(x_rank, zero_2);
+
+    // compute mask to get x != 0
+    auto is_not_zero = make_shared<v1::NotEqual>(x, zero);
+
+    // compute (rank(x) == 0 && x != 0)
+    auto logical_and = make_shared<v1::LogicalAnd>(is_zero, is_not_zero);
+    // compute rank(x) > 0
+    auto greater_than_zero = make_shared<v1::Greater>(x_rank, zero_2);
+
+    // compute ShapeOf(x)
+    auto cond_shape = make_shared<v3::ShapeOf>(x, element::i32);
+    // compute ReduceProd(ShapeOf(x))) and axis
+    auto axis = make_shared<v0::Constant>(element::i32, Shape{}, 0);
+    auto reduce_prod = make_shared<v1::ReduceProd>(cond_shape, axis);
+
+    // compute ReduceProd(ShapeOf(x))) > 0
+    auto greater_than__zero_2 = make_shared<v1::Greater>(reduce_prod, zero_2);
+    // compute (rank > 0 && ReduceProd(ShapeOf(x))) > 0
+    auto logical_and_2 = make_shared<v1::LogicalAnd>(greater_than_zero, greater_than__zero_2);
+
+    auto logical_or = make_shared<v1::LogicalOr>(logical_and, logical_and_2);
+
+    auto tobool = make_shared<v1::Select>(logical_or, true_const, false_const);
+    set_node_name(node.get_name(), tobool);
+    return tobool->outputs();
+}
+}  // namespace op
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
\ No newline at end of file
diff --git a/src/frontends/tensorflow_common/src/op/truncate_div.cpp b/src/frontends/tensorflow_common/src/op/truncate_div.cpp
new file mode 100644
index 00000000000000..b725bbd76b44a3
--- /dev/null
+++ b/src/frontends/tensorflow_common/src/op/truncate_div.cpp
@@ -0,0 +1,36 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "common_op_table.hpp"
+#include "openvino/op/ceiling.hpp"
+#include "openvino/op/divide.hpp"
+#include "openvino/op/equal.hpp"
+#include "openvino/op/floor.hpp"
+#include "openvino/op/less.hpp"
+#include "openvino/op/mod.hpp"
+#include "openvino/op/select.hpp"
+
+using namespace std;
+using namespace ov::opset10;
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+namespace op {
+OutputVector translate_truncate_div_op(const NodeContext& node) {
+    default_op_checks(node, 2, {"TruncateDiv"});
+    auto x = node.get_input(0);
+    auto y = node.get_input(1);
+
+    auto res = make_shared<Divide>(x, y);
+    auto is_res_negative = make_shared<Less>(res, create_same_type_const_scalar(x, 0));
+    auto final_res = make_shared<Select>(is_res_negative, make_shared<Ceiling>(res), make_shared<Floor>(res));
+
+    set_node_name(node.get_name(), final_res);
+    return final_res->outputs();
+}
+}  // namespace op
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/tensorflow_common/src/op/truncate_mod.cpp b/src/frontends/tensorflow_common/src/op/truncate_mod.cpp
new file mode 100644
index 00000000000000..c4422ee983345d
--- /dev/null
+++ b/src/frontends/tensorflow_common/src/op/truncate_mod.cpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "common_op_table.hpp"
+#include "openvino/op/equal.hpp"
+#include "openvino/op/floor_mod.hpp"
+#include "openvino/op/less.hpp"
+#include "openvino/op/negative.hpp"
+#include "openvino/op/select.hpp"
+#include "openvino/op/subtract.hpp"
+
+using namespace std;
+using namespace ov::opset10;
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+namespace op {
+OutputVector translate_truncate_mod_op(const NodeContext& node) {
+    default_op_checks(node, 2, {"TruncateMod"});
+    auto x = node.get_input(0);
+    auto y = node.get_input(1);
+
+    auto is_x_negative = make_shared<Less>(x, create_same_type_const_scalar(x, 0));
+    auto is_y_negative = make_shared<Less>(y, create_same_type_const_scalar(y, 0));
+
+    // if (y < 0) {y = -y}
+    auto negative_y = make_shared<Negative>(y);
+    y = make_shared<Select>(is_y_negative, negative_y, y);
+
+    // check if floor_mod == zero
+    auto floor_mod = make_shared<FloorMod>(x, y);
+    auto is_zero = make_shared<Equal>(floor_mod, create_same_type_const_scalar(floor_mod, 0));
+
+    // floor_mod - y
+    auto other_res = make_shared<Subtract>(floor_mod, y);
+
+    // select operation to handle the sign
+    auto result = make_shared<Select>(is_zero, floor_mod, make_shared<Select>(is_x_negative, other_res, floor_mod));
+
+    set_node_name(node.get_name(), result);
+    return result->outputs();
+}
+}  // namespace op
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/tests/frontend/shared/src/library_extension.cpp b/src/frontends/tests/frontend/shared/src/library_extension.cpp
index a2257f8fca116b..8a6bb23d82f0ef 100644
--- a/src/frontends/tests/frontend/shared/src/library_extension.cpp
+++ b/src/frontends/tests/frontend/shared/src/library_extension.cpp
@@ -9,6 +9,7 @@
 #include "common_test_utils/file_utils.hpp"
 #include "openvino/op/relu.hpp"
 #include "openvino/op/swish.hpp"
+#include "openvino/runtime/core.hpp"
 #include "utils.hpp"
 
 using namespace ov::frontend;
@@ -88,3 +89,30 @@ TEST_P(FrontendLibraryExtensionTest, verifyFunctions) {
                   nodes.end());
     }
 }
+
+TEST_P(FrontendLibraryExtensionTest, loadExtensionBeforeFrontend) {
+    // release all frontends internally
+    ov::shutdown();
+
+    const auto& lib_path = get_lib_path("test_builtin_extensions");
+
+    ov::Core core;
+    core.add_extension(lib_path);
+
+    auto model = core.read_model(m_param.m_modelName);
+    ASSERT_NE(nullptr, model);
+
+    const auto nodes = model->get_ops();
+    ASSERT_EQ(std::find_if(nodes.begin(),
+                           nodes.end(),
+                           [](const std::shared_ptr<ov::Node>& n) {
+                               return ov::is_type<ov::op::v0::Relu>(n);
+                           }),
+              nodes.end());
+    ASSERT_NE(std::find_if(nodes.begin(),
+                           nodes.end(),
+                           [](const std::shared_ptr<ov::Node>& n) {
+                               return ov::is_type<ov::op::v4::Swish>(n);
+                           }),
+              nodes.end());
+}
diff --git a/src/inference/dev_api/ie_icore.hpp b/src/inference/dev_api/ie_icore.hpp
index 8852c1f4ecd8c9..2210f26bbfc6ef 100644
--- a/src/inference/dev_api/ie_icore.hpp
+++ b/src/inference/dev_api/ie_icore.hpp
@@ -191,7 +191,7 @@ class ICore : public ov::ICore {
     virtual InferenceEngine::RemoteContext::Ptr CreateContext(const std::string& deviceName, const ov::AnyMap&) = 0;
 
     /**
-     * @brief Get only configs that are suppored by device
+     * @brief Get only configs that are supported by device
      * @param deviceName Name of a device
      * @param config Map of configs that can contains configs that are not supported by device
      * @return map of configs that are supported by device
diff --git a/src/inference/dev_api/openvino/runtime/icore.hpp b/src/inference/dev_api/openvino/runtime/icore.hpp
index e4d0a98f5be968..de2ca2ebf07c57 100644
--- a/src/inference/dev_api/openvino/runtime/icore.hpp
+++ b/src/inference/dev_api/openvino/runtime/icore.hpp
@@ -222,7 +222,7 @@ class OPENVINO_RUNTIME_API ICore {
     }
 
     /**
-     * @brief Get only properties that are suppored by specified device
+     * @brief Get only properties that are supported by specified device
      * @param full_device_name Name of a device (can be either virtual or hardware)
      * @param properties Properties that can contains configs that are not supported by device
      * @return map of properties that are supported by device
diff --git a/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp b/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp
index ed15438de2eb83..938fa8924fbb05 100644
--- a/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp
+++ b/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp
@@ -124,6 +124,12 @@ class OPENVINO_RUNTIME_API ISyncInferRequest : public IInferRequest {
         }
     };
 
+    /**
+     * @brief Finds input or output port
+     * @return structure which contains index of Input/Output or report that port wasn't found
+     */
+    FoundPort find_port(const ov::Output<const ov::Node>& port) const;
+
     /**
      * @brief Converts batched tensors to tensor
      */
@@ -157,12 +163,9 @@ class OPENVINO_RUNTIME_API ISyncInferRequest : public IInferRequest {
     std::shared_ptr<const ov::ICompiledModel> m_compiled_model;
     // Mutable to return reference to ov::Tensor
     mutable std::unordered_map<std::shared_ptr<ov::descriptor::Tensor>, ov::SoPtr<ov::ITensor>> m_tensors;
-
-    /**
-     * @brief Finds input or output port
-     * @return structure which contains index of Input/Output or report that port wasn't found
-     */
-    FoundPort find_port(const ov::Output<const ov::Node>& port) const;
+    // Cache ports
+    mutable std::unordered_map<size_t, FoundPort> m_cached_ports;
+    mutable std::mutex m_cache_mutex;
 };
 
 };  // namespace ov
diff --git a/src/inference/src/dev/isync_infer_request.cpp b/src/inference/src/dev/isync_infer_request.cpp
index 8e0f554fedd900..94d714d9f134a5 100644
--- a/src/inference/src/dev/isync_infer_request.cpp
+++ b/src/inference/src/dev/isync_infer_request.cpp
@@ -4,6 +4,7 @@
 
 #include "openvino/runtime/isync_infer_request.hpp"
 
+#include <functional>
 #include <memory>
 #include <unordered_map>
 
@@ -17,6 +18,7 @@
 #include "openvino/runtime/make_tensor.hpp"
 #include "openvino/runtime/plugin_itt.hpp"
 #include "openvino/runtime/tensor.hpp"
+#include "openvino/util/common_util.hpp"
 
 namespace {
 void check_batched_tensors(const ov::Output<const ov::Node>& input,
@@ -93,14 +95,18 @@ ov::IInferRequest::~IInferRequest() = default;
 ov::ISyncInferRequest::ISyncInferRequest(const std::shared_ptr<const ov::ICompiledModel>& compiled_model)
     : m_compiled_model(compiled_model) {
     OPENVINO_ASSERT(m_compiled_model);
-    // Create map of empty tensors
-    for (const auto& input : get_inputs()) {
-        if (m_tensors.find(input.get_tensor_ptr()) == m_tensors.end())
-            m_tensors[input.get_tensor_ptr()] = ov::SoPtr<ov::ITensor>();
-    }
-    for (const auto& output : get_outputs()) {
-        if (m_tensors.find(output.get_tensor_ptr()) == m_tensors.end())
-            m_tensors[output.get_tensor_ptr()] = ov::SoPtr<ov::ITensor>();
+    // Create map of empty tensors and cache ports from the compiled model
+    auto port_type = ov::ISyncInferRequest::FoundPort::Type::INPUT;
+    for (const auto& ports : {get_inputs(), get_outputs()}) {
+        for (size_t i = 0; i < ports.size(); i++) {
+            const auto& port = ports[i];
+            if (m_tensors.find(port.get_tensor_ptr()) == m_tensors.end())
+                m_tensors[port.get_tensor_ptr()] = ov::SoPtr<ov::ITensor>();
+            size_t port_hash = ov::util::hash_combine(std::vector<size_t>{std::hash<const ov::Node*>()(port.get_node()),
+                                                                          std::hash<size_t>()(port.get_index())});
+            m_cached_ports[port_hash] = {i, port_type};
+        }
+        port_type = ov::ISyncInferRequest::FoundPort::Type::OUTPUT;
     }
 }
 
@@ -118,18 +124,30 @@ ov::ISyncInferRequest::FoundPort ov::ISyncInferRequest::find_port(const ov::Outp
     // This function is hotspot, need optimization.
     auto check_nodes = [](const ov::Node* node1, const ov::Node* node2) {
         return node1 == node2 ||
-               (node1->get_friendly_name() == node2->get_friendly_name() &&
-                node1->get_type_info() == node2->get_type_info() &&
-                node1->outputs().size() == node2->outputs().size() && node1->inputs().size() == node2->inputs().size());
+               (node1->outputs().size() == node2->outputs().size() &&
+                node1->inputs().size() == node2->inputs().size() && node1->get_type_info() == node2->get_type_info() &&
+                node1->get_friendly_name() == node2->get_friendly_name());
     };
+    // Find port without caching work slow because we need each time iterate over all ports and compare different
+    // strings So use WA with caching in order to make 2+ calls for the same ports faster.
+    // Calculate hash for the port
+    size_t port_hash = ov::util::hash_combine(
+        std::vector<size_t>{std::hash<const ov::Node*>()(port.get_node()), std::hash<size_t>()(port.get_index())});
+    {
+        std::lock_guard<std::mutex> lock(m_cache_mutex);
+        if (m_cached_ports.find(port_hash) != m_cached_ports.end()) {
+            // Cached port for the hash was found
+            return m_cached_ports[port_hash];
+        }
+    }
     ov::ISyncInferRequest::FoundPort::Type type = ov::ISyncInferRequest::FoundPort::Type::INPUT;
     for (const auto& ports : {get_inputs(), get_outputs()}) {
         for (size_t i = 0; i < ports.size(); i++) {
-            // TODO: Fix port comparison
-            // if (ports[i] == port) {
             if (ports[i].get_index() == port.get_index() && ports[i].get_names() == port.get_names() &&
                 check_nodes(ports[i].get_node(), port.get_node())) {
-                return {i, type};
+                std::lock_guard<std::mutex> lock(m_cache_mutex);
+                m_cached_ports[port_hash] = {i, type};
+                return m_cached_ports[port_hash];
             }
         }
         type = ov::ISyncInferRequest::FoundPort::Type::OUTPUT;
@@ -275,10 +293,10 @@ void ov::ISyncInferRequest::allocate_tensor(
 void ov::ISyncInferRequest::check_tensors() const {
     const auto& inputs = m_compiled_model->inputs();
     for (size_t i = 0; i < inputs.size(); i++) {
-        check_tensor(inputs[i], get_tensor_ptr(inputs[i]));
+        check_tensor(inputs[i], m_tensors.at(inputs[i].get_tensor_ptr()));
     }
     const auto& outputs = m_compiled_model->outputs();
     for (size_t i = 0; i < outputs.size(); i++) {
-        check_tensor(outputs[i], get_tensor_ptr(outputs[i]));
+        check_tensor(outputs[i], m_tensors.at(outputs[i].get_tensor_ptr()));
     }
 }
diff --git a/src/inference/src/dev/threading/cpu_streams_executor.cpp b/src/inference/src/dev/threading/cpu_streams_executor.cpp
index dba0082d647080..691a3951615460 100644
--- a/src/inference/src/dev/threading/cpu_streams_executor.cpp
+++ b/src/inference/src/dev/threading/cpu_streams_executor.cpp
@@ -4,6 +4,7 @@
 
 #include "openvino/runtime/threading/cpu_streams_executor.hpp"
 
+#include <atomic>
 #include <condition_variable>
 #include <memory>
 #include <mutex>
@@ -22,8 +23,6 @@
 
 namespace ov {
 namespace threading {
-// maybe there are two CPUStreamsExecutors in the same thread.
-thread_local std::map<void*, std::shared_ptr<std::thread::id>> t_stream_count_map;
 struct CPUStreamsExecutor::Impl {
     struct Stream {
 #if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
@@ -335,12 +334,58 @@ struct CPUStreamsExecutor::Impl {
     // will be counted by thread_local t_stream_count_map.
     // when the customer's thread is destoryed, the stream's count will became 1,
     // Call local() will reuse one of them, and release others.
+    // it's only a workaround for ticket CVS-111490, please be carefully when need to modify
+    // CustomeThreadLocal::local(), especially like operations that will affect the count of
+    // CustomThreadLocal::ThreadId
     class CustomThreadLocal : public ThreadLocal<std::shared_ptr<Stream>> {
+        class ThreadTracker {
+        public:
+            explicit ThreadTracker(const std::thread::id& id)
+                : _id(id),
+                  _count_ptr(std::make_shared<std::atomic_int>(1)) {}
+            ~ThreadTracker() {
+                _count_ptr->fetch_sub(1);
+            }
+            std::shared_ptr<ThreadTracker> fetch() {
+                auto new_ptr = std::shared_ptr<ThreadTracker>(new ThreadTracker(*this));
+                auto pre_valule = new_ptr.get()->_count_ptr->fetch_add(1);
+                OPENVINO_ASSERT(pre_valule == 1, "this value must be 1, please check code CustomThreadLocal::local()");
+                return new_ptr;
+            }
+            const std::thread::id& get_id() const {
+                return _id;
+            }
+            int count() const {
+                return *(_count_ptr.get());
+            }
+
+        private:
+            // disable all copy and move semantics, user only can use fetch()
+            // to create a new instance with a shared count num;
+            ThreadTracker(ThreadTracker const&) = default;
+            ThreadTracker(ThreadTracker&&) = delete;
+            ThreadTracker& operator=(ThreadTracker const&) = delete;
+            ThreadTracker& operator=(ThreadTracker&&) = delete;
+            std::thread::id _id;
+            std::shared_ptr<std::atomic_int> _count_ptr;
+        };
+
     public:
         CustomThreadLocal(std::function<std::shared_ptr<Stream>()> callback_construct, Impl* impl)
             : ThreadLocal<std::shared_ptr<Stream>>(callback_construct),
               _impl(impl) {}
         std::shared_ptr<Stream> local() {
+            // maybe there are two CPUStreamsExecutors in the same thread.
+            static thread_local std::map<void*, std::shared_ptr<CustomThreadLocal::ThreadTracker>> t_stream_count_map;
+            // fix the memory leak issue that CPUStreamsExecutor is already released,
+            // but still exists CustomThreadLocal::ThreadTracker in t_stream_count_map
+            for (auto it = t_stream_count_map.begin(); it != t_stream_count_map.end();) {
+                if (this != it->first && it->second->count() == 1) {
+                    t_stream_count_map.erase(it++);
+                } else {
+                    it++;
+                }
+            }
             auto id = std::this_thread::get_id();
             auto search = _thread_ids.find(id);
             if (search != _thread_ids.end()) {
@@ -348,14 +393,13 @@ struct CPUStreamsExecutor::Impl {
             }
             std::lock_guard<std::mutex> guard(_stream_map_mutex);
             for (auto& item : _stream_map) {
-                if (*(item.first.get()) == id) {
-                    t_stream_count_map[(void*)this] = item.first;
+                if (item.first->get_id() == id) {
                     return item.second;
                 }
             }
             std::shared_ptr<Impl::Stream> stream = nullptr;
             for (auto it = _stream_map.begin(); it != _stream_map.end();) {
-                if (it->first.use_count() == 1) {
+                if (it->first->count() == 1) {
                     if (stream == nullptr) {
                         stream = it->second;
                     }
@@ -367,9 +411,10 @@ struct CPUStreamsExecutor::Impl {
             if (stream == nullptr) {
                 stream = std::make_shared<Impl::Stream>(_impl);
             }
-            auto id_ptr = std::make_shared<std::thread::id>(id);
-            t_stream_count_map[(void*)this] = id_ptr;
-            _stream_map[id_ptr] = stream;
+            auto tracker_ptr = std::make_shared<CustomThreadLocal::ThreadTracker>(id);
+            t_stream_count_map[(void*)this] = tracker_ptr;
+            auto new_tracker_ptr = tracker_ptr->fetch();
+            _stream_map[new_tracker_ptr] = stream;
             return stream;
         }
 
@@ -382,7 +427,7 @@ struct CPUStreamsExecutor::Impl {
     private:
         std::set<std::thread::id> _thread_ids;
         Impl* _impl;
-        std::map<std::shared_ptr<std::thread::id>, std::shared_ptr<Impl::Stream>> _stream_map;
+        std::map<std::shared_ptr<CustomThreadLocal::ThreadTracker>, std::shared_ptr<Impl::Stream>> _stream_map;
         std::mutex _stream_map_mutex;
     };
 
@@ -397,7 +442,7 @@ struct CPUStreamsExecutor::Impl {
         auto numaNodes = get_available_numa_nodes();
         if (_config._streams != 0) {
             std::copy_n(std::begin(numaNodes),
-                        std::min(static_cast<std::size_t>(_config._streams), numaNodes.size()),
+                        std::min<std::size_t>(_config._streams, numaNodes.size()),
                         std::back_inserter(_usedNumaNodes));
         } else {
             _usedNumaNodes = numaNodes;
diff --git a/src/inference/src/ie_network_reader.cpp b/src/inference/src/ie_network_reader.cpp
index 7fe34b42ed7948..f5aca3586e8339 100644
--- a/src/inference/src/ie_network_reader.cpp
+++ b/src/inference/src/ie_network_reader.cpp
@@ -20,6 +20,7 @@
 #include "ie_icnn_network.hpp"
 #include "ie_input_info.hpp"
 #include "openvino/frontend/manager.hpp"
+#include "openvino/runtime/shared_buffer.hpp"
 #ifdef ENABLE_IR_V7_READER
 #    include "legacy/ie_ir_version.hpp"
 #endif
@@ -388,8 +389,8 @@ CNNNetwork details::ReadNetwork(const std::string& model,
     ov::AnyVector params{&modelStream};
     if (weights) {
         char* data = weights->cbuffer().as<char*>();
-        std::shared_ptr<ngraph::runtime::AlignedBuffer> weights_buffer =
-            std::make_shared<ngraph::runtime::SharedBuffer<Blob::CPtr>>(data, weights->byteSize(), weights);
+        std::shared_ptr<ov::AlignedBuffer> weights_buffer =
+            std::make_shared<ov::SharedBuffer<Blob::CPtr>>(data, weights->byteSize(), weights);
         params.emplace_back(weights_buffer);
     }
 
diff --git a/src/inference/src/model_reader.cpp b/src/inference/src/model_reader.cpp
index 1837d75a2d44aa..bc67f6d21b225a 100644
--- a/src/inference/src/model_reader.cpp
+++ b/src/inference/src/model_reader.cpp
@@ -9,6 +9,8 @@
 #include "openvino/core/model.hpp"
 #include "openvino/core/preprocess/pre_post_process.hpp"
 #include "openvino/frontend/manager.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
+#include "openvino/runtime/shared_buffer.hpp"
 #include "openvino/util/file_util.hpp"
 #include "transformations/utils/utils.hpp"
 
@@ -155,10 +157,10 @@ std::shared_ptr<ov::Model> read_model(const std::string& model,
 
     ov::AnyVector params{&modelStream};
     if (weights) {
-        std::shared_ptr<ngraph::runtime::AlignedBuffer> weights_buffer =
-            std::make_shared<ngraph::runtime::SharedBuffer<ov::Tensor>>(reinterpret_cast<char*>(weights.data()),
-                                                                        weights.get_byte_size(),
-                                                                        weights);
+        std::shared_ptr<ov::AlignedBuffer> weights_buffer =
+            std::make_shared<ov::SharedBuffer<ov::Tensor>>(reinterpret_cast<char*>(weights.data()),
+                                                           weights.get_byte_size(),
+                                                           weights);
         params.emplace_back(weights_buffer);
     }
 
diff --git a/src/plugins/hetero/src/async_infer_request.cpp b/src/plugins/hetero/src/async_infer_request.cpp
index a4f5f36e15f0f5..e9d3643b5baa43 100644
--- a/src/plugins/hetero/src/async_infer_request.cpp
+++ b/src/plugins/hetero/src/async_infer_request.cpp
@@ -7,7 +7,7 @@
 struct RequestExecutor : ov::threading::ITaskExecutor {
     explicit RequestExecutor(ov::SoPtr<ov::IAsyncInferRequest>& request) : m_request(request) {
         m_request->set_callback([this](std::exception_ptr exception_ptr) mutable {
-            m_exception_ptr = exception_ptr;
+            m_exception_ptr = std::move(exception_ptr);
             auto task = std::move(m_task);
             task();
         });
diff --git a/src/plugins/hetero/src/sync_infer_request.cpp b/src/plugins/hetero/src/sync_infer_request.cpp
index 21fbd0b4f2e2a3..0bb4bc4b7e9a4f 100644
--- a/src/plugins/hetero/src/sync_infer_request.cpp
+++ b/src/plugins/hetero/src/sync_infer_request.cpp
@@ -49,20 +49,15 @@ ov::hetero::InferRequest::InferRequest(const std::shared_ptr<const ov::hetero::C
 ov::hetero::InferRequest::~InferRequest() = default;
 
 ov::SoPtr<ov::IAsyncInferRequest> ov::hetero::InferRequest::get_request(const ov::Output<const ov::Node>& port) const {
-    auto check_nodes = [](const ov::Node* node1, const ov::Node* node2) {
-        return node1 == node2 ||
-               (node1->get_friendly_name() == node2->get_friendly_name() &&
-                node1->get_type_info() == node2->get_type_info() &&
-                node1->outputs().size() == node2->outputs().size() && node1->inputs().size() == node2->inputs().size());
-    };
-
-    for (const auto& kvp : m_port_to_subrequest_idx) {
-        if (kvp.first.get_index() == port.get_index() && kvp.first.get_names() == port.get_names() &&
-            check_nodes(kvp.first.get_node(), port.get_node())) {
-            return m_subrequests[kvp.second];
-        }
+    auto found_port = find_port(port);
+    ov::Output<const ov::Node> internal_port;
+    OPENVINO_ASSERT(found_port.found(), "Cannot find infer request for port ", port);
+    if (found_port.is_input()) {
+        internal_port = get_inputs().at(found_port.idx);
+    } else {
+        internal_port = get_outputs().at(found_port.idx);
     }
-    OPENVINO_THROW("Cannot find infer request for port ", port);
+    return m_subrequests[m_port_to_subrequest_idx.at(internal_port)];
 }
 
 ov::SoPtr<ov::ITensor> ov::hetero::InferRequest::get_tensor(const ov::Output<const ov::Node>& port) const {
diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp
index 734fd462acbebb..ba3474fddf2cc6 100644
--- a/src/plugins/intel_cpu/src/edge.cpp
+++ b/src/plugins/intel_cpu/src/edge.cpp
@@ -450,7 +450,9 @@ void Edge::init() {
         DEBUG_LOG(*this, " getBaseEdge() return itself");
         changeStatus(Status::NeedAllocation);
     } else {
-        if (edgePtr->getParent()->isConstant() && !edgePtr->getChild()->isConstant()) {
+        if (Type::Input == edgePtr->getParent()->getType() &&
+            edgePtr->getParent()->isConstant() &&
+            !edgePtr->getChild()->isConstant()) {
             changeStatus(Status::NeedAllocation);
             DEBUG_LOG(*this, " edge inplace from ", *edgePtr, " is broken!");
             return;
diff --git a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp
index b23e068244512a..da6e2e39442777 100644
--- a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp
+++ b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp
@@ -25,25 +25,25 @@
 
 #include <ngraph/opsets/opset5.hpp>
 
-using namespace std;
+namespace ov {
 
 #define CREATE_SNIPPETS_EMITTER(e_type) { \
-    [this](const ov::snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr<snippets::Emitter> { \
+    [this](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr<snippets::Emitter> { \
         return std::make_shared<e_type>(h.get(), isa, expr); \
     }, \
     [](const std::shared_ptr<ngraph::Node>& n) -> std::set<std::vector<element::Type>> { \
         return e_type::get_supported_precisions(n); \
     } \
-};
+}
 
 #define CREATE_CPU_EMITTER(e_type) { \
-    [this](const ov::snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr<snippets::Emitter> { \
+    [this](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr<snippets::Emitter> { \
         return std::make_shared<e_type>(h.get(), isa, expr->get_node()); \
     }, \
-    [](const std::shared_ptr<ngraph::Node>& n) -> std::set<std::vector<element::Type>> { \
+    [](const std::shared_ptr<ov::Node>& n) -> std::set<std::vector<element::Type>> { \
         return e_type::get_supported_precisions(n); \
     } \
-};
+}
 
 class jit_snippet : public dnnl::impl::cpu::x64::jit_generator {
 public:
@@ -58,94 +58,95 @@ class jit_snippet : public dnnl::impl::cpu::x64::jit_generator {
     }
 };
 
-ov::intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t host_isa)
+intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t host_isa)
     : TargetMachine(), h(new jit_snippet()), isa(host_isa) {
     // data movement
-    jitters[ov::op::v0::Parameter::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter);
-    jitters[ov::op::v0::Result::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter);
+    jitters[op::v0::Parameter::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter);
+    jitters[op::v0::Result::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter);
     jitters[snippets::op::Buffer::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter);
     jitters[snippets::op::VectorBuffer::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter);
-    // jitters[ov::op::v1::Constant::get_type_info_static()] = CREATE_CPU_EMITTER(); // Not supported
+    jitters[snippets::op::RankNormalization::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter);
+    // jitters[op::v1::Constant::get_type_info_static()] = CREATE_CPU_EMITTER(); // Not supported
 
     jitters[snippets::op::Load::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadEmitter);
     jitters[snippets::op::LoadReshape::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadEmitter);
     jitters[snippets::op::BroadcastLoad::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BroadcastLoadEmitter);
-    jitters[ov::intel_cpu::LoadConvertSaturation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadConvertEmitter);
-    jitters[ov::intel_cpu::LoadConvertTruncation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadConvertEmitter);
+    jitters[intel_cpu::LoadConvertSaturation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadConvertEmitter);
+    jitters[intel_cpu::LoadConvertTruncation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadConvertEmitter);
 
     jitters[snippets::op::Store::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(StoreEmitter);
-    jitters[ov::intel_cpu::StoreConvertSaturation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(StoreConvertEmitter);
-    jitters[ov::intel_cpu::StoreConvertTruncation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(StoreConvertEmitter);
+    jitters[intel_cpu::StoreConvertSaturation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(StoreConvertEmitter);
+    jitters[intel_cpu::StoreConvertTruncation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(StoreConvertEmitter);
 
     jitters[snippets::op::Scalar::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(ScalarEmitter);
     jitters[snippets::op::BroadcastMove::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BroadcastMoveEmitter);
     // jitters[snippets::op::Nop::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter); // Not supported
-    // jitters[ov::op::v1::Broadcast::get_type_info_static()] = CREATE_CPU_EMITTER(); // Not supported
+    // jitters[op::v1::Broadcast::get_type_info_static()] = CREATE_CPU_EMITTER(); // Not supported
 
-    jitters[snippets::op::ConvertTruncation::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_convert_truncation_emitter);
-    jitters[snippets::op::ConvertSaturation::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_convert_saturation_emitter);
-    // jitters[ov::op::v1::FakeQuantize::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
+    jitters[snippets::op::ConvertTruncation::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_convert_truncation_emitter);
+    jitters[snippets::op::ConvertSaturation::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_convert_saturation_emitter);
+    // jitters[op::v1::FakeQuantize::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
 
     // ternary
-    jitters[ov::op::v1::Select::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_select_emitter);
-    jitters[ov::intel_cpu::FusedMulAdd::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_mul_add_emitter);
+    jitters[op::v1::Select::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_select_emitter);
+    jitters[intel_cpu::FusedMulAdd::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_mul_add_emitter);
 
     // binary
-    jitters[ov::op::v1::Add::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_add_emitter);
-    jitters[ov::op::v1::Divide::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_divide_emitter);
-    jitters[ov::op::v1::Equal::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_equal_emitter);
-    jitters[ov::op::v1::FloorMod::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_floor_mod_emitter);
-    jitters[ov::op::v1::Greater::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_greater_emitter);
-    jitters[ov::op::v1::GreaterEqual::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_greater_equal_emitter);
-    jitters[ov::op::v1::Less::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_less_emitter);
-    jitters[ov::op::v1::LessEqual::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_less_equal_emitter);
-    jitters[ov::op::v1::LogicalAnd::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_logical_and_emitter);
-    jitters[ov::op::v1::LogicalOr::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_logical_or_emitter);
-    jitters[ov::op::v1::LogicalXor::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_logical_xor_emitter);
-    jitters[ov::op::v1::Maximum::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_maximum_emitter);
-    jitters[ov::op::v1::Minimum::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_minimum_emitter);
-    jitters[ov::op::v1::Mod::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_mod_emitter);
-    jitters[ov::op::v1::Multiply::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_multiply_emitter);
-    jitters[ov::op::v1::NotEqual::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_not_equal_emitter);
-    jitters[snippets::op::PowerStatic::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_power_static_emitter);
-    jitters[ov::op::v1::Power::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_power_dynamic_emitter);
-    jitters[ov::op::v0::PRelu::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_prelu_emitter);
-    jitters[ov::op::v0::SquaredDifference::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_squared_difference_emitter);
-    jitters[ov::op::v1::Subtract::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_subtract_emitter);
-    jitters[ov::op::v0::Xor::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_logical_xor_emitter);
+    jitters[op::v1::Add::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_add_emitter);
+    jitters[op::v1::Divide::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_divide_emitter);
+    jitters[op::v1::Equal::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_equal_emitter);
+    jitters[op::v1::FloorMod::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_floor_mod_emitter);
+    jitters[op::v1::Greater::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_greater_emitter);
+    jitters[op::v1::GreaterEqual::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_greater_equal_emitter);
+    jitters[op::v1::Less::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_less_emitter);
+    jitters[op::v1::LessEqual::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_less_equal_emitter);
+    jitters[op::v1::LogicalAnd::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_logical_and_emitter);
+    jitters[op::v1::LogicalOr::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_logical_or_emitter);
+    jitters[op::v1::LogicalXor::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_logical_xor_emitter);
+    jitters[op::v1::Maximum::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_maximum_emitter);
+    jitters[op::v1::Minimum::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_minimum_emitter);
+    jitters[op::v1::Mod::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_mod_emitter);
+    jitters[op::v1::Multiply::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_multiply_emitter);
+    jitters[op::v1::NotEqual::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_not_equal_emitter);
+    jitters[snippets::op::PowerStatic::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_power_static_emitter);
+    jitters[op::v1::Power::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_power_dynamic_emitter);
+    jitters[op::v0::PRelu::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_prelu_emitter);
+    jitters[op::v0::SquaredDifference::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_squared_difference_emitter);
+    jitters[op::v1::Subtract::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_subtract_emitter);
+    jitters[op::v0::Xor::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_logical_xor_emitter);
 
     // unary
-    jitters[ov::op::v0::Abs::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_abs_emitter);
-    // jitters[ov::op::v1::Acos::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
-    // jitters[ov::op::v1::Asin::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
-    // jitters[ov::op::v1::Atan::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
-    jitters[ov::op::v0::Ceiling::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_ceiling_emitter);
-    jitters[ov::op::v0::Clamp::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_clamp_emitter);
-    // jitters[ov::op::v1::Cos::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
-    // jitters[ov::op::v1::Cosh::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
-    jitters[ov::op::v0::Elu::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_elu_emitter);
-    jitters[ov::op::v0::Erf::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_erf_emitter);
-    jitters[ov::op::v0::Exp::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_exp_emitter);
-    jitters[ov::op::v0::Floor::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_floor_emitter);
-    jitters[ngraph::opset5::Round::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_round_emitter);
-    // jitters[ov::op::v1::Log::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
-    jitters[ov::op::v1::LogicalNot::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_logical_not_emitter);
-    jitters[ov::op::v0::Negative::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_negative_emitter);
-    jitters[ov::op::v0::Relu::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_relu_emitter);
-    // jitters[ov::op::v1::Sign::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
-    jitters[ov::op::v0::Sigmoid::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_sigmoid_emitter);
-    // jitters[ov::op::v1::Sin::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
-    // jitters[ov::op::v1::Sinh::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
-    jitters[ov::op::v0::Sqrt::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_sqrt_emitter);
-    // jitters[ov::op::v1::Tan::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
-    jitters[ov::op::v0::Tanh::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_tanh_emitter);
-
-    jitters[ov::intel_cpu::SwishNode::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_swish_emitter);
-    jitters[ngraph::op::v4::HSwish::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_hswish_emitter);
-    // jitters[ov::op::v1::HardSigmoid::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
-    // jitters[ov::op::v1::Selu::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
-    jitters[ngraph::op::v0::Gelu::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_gelu_v0_emitter);
-    jitters[ngraph::op::v7::Gelu::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_gelu_v7_emitter);
+    jitters[op::v0::Abs::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_abs_emitter);
+    // jitters[op::v1::Acos::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
+    // jitters[op::v1::Asin::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
+    // jitters[op::v1::Atan::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
+    jitters[op::v0::Ceiling::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_ceiling_emitter);
+    jitters[op::v0::Clamp::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_clamp_emitter);
+    // jitters[op::v1::Cos::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
+    // jitters[op::v1::Cosh::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
+    jitters[op::v0::Elu::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_elu_emitter);
+    jitters[op::v0::Erf::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_erf_emitter);
+    jitters[op::v0::Exp::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_exp_emitter);
+    jitters[op::v0::Floor::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_floor_emitter);
+    jitters[ngraph::opset5::Round::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_round_emitter);
+    // jitters[op::v1::Log::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
+    jitters[op::v1::LogicalNot::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_logical_not_emitter);
+    jitters[op::v0::Negative::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_negative_emitter);
+    jitters[op::v0::Relu::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_relu_emitter);
+    // jitters[op::v1::Sign::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
+    jitters[op::v0::Sigmoid::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_sigmoid_emitter);
+    // jitters[op::v1::Sin::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
+    // jitters[op::v1::Sinh::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
+    jitters[op::v0::Sqrt::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_sqrt_emitter);
+    // jitters[op::v1::Tan::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
+    jitters[op::v0::Tanh::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_tanh_emitter);
+
+    jitters[intel_cpu::SwishNode::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_swish_emitter);
+    jitters[ngraph::op::v4::HSwish::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_hswish_emitter);
+    // jitters[op::v1::HardSigmoid::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
+    // jitters[op::v1::Selu::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported
+    jitters[ngraph::op::v0::Gelu::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_gelu_v0_emitter);
+    jitters[ngraph::op::v7::Gelu::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_gelu_v7_emitter);
     jitters[snippets::op::Fill::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(FillEmitter);
 
     jitters[snippets::op::HorizonMax::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(HorizonEmitter);
@@ -154,11 +155,11 @@ ov::intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_
     jitters[snippets::op::Kernel::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(KernelEmitter);
     jitters[snippets::op::LoopBegin::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoopBeginEmitter);
     jitters[snippets::op::LoopEnd::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoopEndEmitter);
-    jitters[ov::intel_cpu::BrgemmCPU::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BrgemmEmitter);
-    jitters[ov::intel_cpu::BrgemmCopyB::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BrgemmCopyBEmitter);
+    jitters[intel_cpu::BrgemmCPU::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BrgemmEmitter);
+    jitters[intel_cpu::BrgemmCopyB::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BrgemmCopyBEmitter);
 }
 
-size_t ov::intel_cpu::CPUTargetMachine::get_lanes() const {
+size_t intel_cpu::CPUTargetMachine::get_lanes() const {
     switch (isa) {
         case dnnl::impl::cpu::x64::avx2 : return dnnl::impl::cpu::x64::cpu_isa_traits<dnnl::impl::cpu::x64::avx2>::vlen / sizeof(float);
         case dnnl::impl::cpu::x64::sse41 : return dnnl::impl::cpu::x64::cpu_isa_traits<dnnl::impl::cpu::x64::sse41>::vlen / sizeof(float);
@@ -167,28 +168,62 @@ size_t ov::intel_cpu::CPUTargetMachine::get_lanes() const {
     }
 }
 
-bool ov::intel_cpu::CPUTargetMachine::is_supported() const {
+dnnl::impl::cpu::x64::cpu_isa_t intel_cpu::CPUTargetMachine::get_isa() const {
+    return isa;
+}
+
+bool intel_cpu::CPUTargetMachine::is_supported() const {
     return dnnl::impl::cpu::x64::mayiuse(isa);
 }
 
-ov::snippets::code ov::intel_cpu::CPUTargetMachine::get_snippet() const {
+snippets::CompiledSnippetPtr intel_cpu::CPUTargetMachine::get_snippet() {
     if (h->create_kernel() != dnnl::impl::status::success) {
         IE_THROW() << "Failed to create jit_kernel in get_snippet()";
     }
-    return h->jit_ker();
+    const auto& result = std::make_shared<CompiledSnippetCPU>(std::unique_ptr<dnnl::impl::cpu::x64::jit_generator>(h.release()));
+    // Note that we reset all the generated code, since it was copied into CompiledSnippetCPU
+    h.reset(new jit_snippet());
+    return result;
+}
+
+intel_cpu::CompiledSnippetCPU::CompiledSnippetCPU(std::unique_ptr<dnnl::impl::cpu::x64::jit_generator> h) : h_compiled(std::move(h)) {
+    OPENVINO_ASSERT(h_compiled && h_compiled->jit_ker(), "Got invalid jit generator or kernel was nopt compiled");
 }
 
-ov::intel_cpu::CPUGenerator::CPUGenerator(dnnl::impl::cpu::x64::cpu_isa_t isa_) : Generator(std::make_shared<CPUTargetMachine>(isa_)) {
+const uint8_t* intel_cpu::CompiledSnippetCPU::get_code() const {
+    return h_compiled->jit_ker();
 }
 
-ov::snippets::Generator::opRegType ov::intel_cpu::CPUGenerator::get_specific_op_reg_type(const std::shared_ptr<ov::Node>& op) const {
-    if (std::dynamic_pointer_cast<ov::intel_cpu::BrgemmCPU>(op) ||
-        std::dynamic_pointer_cast<ov::intel_cpu::BrgemmCopyB>(op))
+size_t intel_cpu::CompiledSnippetCPU::get_code_size() const {
+    return h_compiled->getSize();
+}
+
+bool intel_cpu::CompiledSnippetCPU::empty() const {
+    return get_code_size() == 0;
+}
+
+intel_cpu::CPUGenerator::CPUGenerator(dnnl::impl::cpu::x64::cpu_isa_t isa_) : Generator(std::make_shared<CPUTargetMachine>(isa_)) {
+}
+
+std::shared_ptr<snippets::Generator> intel_cpu::CPUGenerator::clone() const {
+    const auto& cpu_target_machine = std::dynamic_pointer_cast<CPUTargetMachine>(target);
+    OPENVINO_ASSERT(cpu_target_machine, "Failed to clone CPUGenerator: the instance contains incompatible TargetMachine type");
+    return std::make_shared<CPUGenerator>(cpu_target_machine->get_isa());
+}
+
+snippets::Generator::opRegType intel_cpu::CPUGenerator::get_specific_op_reg_type(const std::shared_ptr<ov::Node>& op) const {
+    if (std::dynamic_pointer_cast<intel_cpu::BrgemmCPU>(op) ||
+        std::dynamic_pointer_cast<intel_cpu::BrgemmCopyB>(op))
         return gpr2gpr;
     else if (
-        std::dynamic_pointer_cast<ov::intel_cpu::FusedMulAdd>(op) ||
-        std::dynamic_pointer_cast<ov::intel_cpu::SwishNode>(op))
+        std::dynamic_pointer_cast<intel_cpu::FusedMulAdd>(op) ||
+        std::dynamic_pointer_cast<intel_cpu::SwishNode>(op))
         return vec2vec;
     else
         OPENVINO_THROW("Register type of the operation " + std::string(op->get_type_name()) + " isn't determined!");
 }
+bool intel_cpu::CPUGenerator::uses_precompiled_kernel(const std::shared_ptr<snippets::Emitter>& e) const {
+    return std::dynamic_pointer_cast<intel_cpu::BrgemmEmitter>(e) ||
+           std::dynamic_pointer_cast<intel_cpu::BrgemmCopyBEmitter>(e);
+}
+} // namespace ov
\ No newline at end of file
diff --git a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp
index 96ccbb4b0db97f..fa3528df6c9e6d 100644
--- a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp
+++ b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp
@@ -13,13 +13,23 @@
 namespace ov {
 namespace intel_cpu {
 
+class CompiledSnippetCPU : public snippets::CompiledSnippet {
+    const std::unique_ptr<const dnnl::impl::cpu::x64::jit_generator> h_compiled;
+public:
+    const uint8_t* get_code() const override;
+    size_t get_code_size() const override;
+    bool empty() const override;
+    explicit CompiledSnippetCPU(std::unique_ptr<dnnl::impl::cpu::x64::jit_generator> h);
+};
+
 class CPUTargetMachine : public snippets::TargetMachine {
 public:
-    CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t host_isa);
+    explicit CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t host_isa);
 
     bool is_supported() const override;
-    snippets::code get_snippet() const override;
+    snippets::CompiledSnippetPtr get_snippet() override;
     size_t get_lanes() const override;
+    dnnl::impl::cpu::x64::cpu_isa_t get_isa() const;
 
 private:
     std::unique_ptr<dnnl::impl::cpu::x64::jit_generator> h;
@@ -29,8 +39,10 @@ class CPUTargetMachine : public snippets::TargetMachine {
 class CPUGenerator : public snippets::Generator {
 public:
     CPUGenerator(dnnl::impl::cpu::x64::cpu_isa_t isa);
+    std::shared_ptr<Generator> clone() const override;
 
 protected:
+    bool uses_precompiled_kernel(const std::shared_ptr<snippets::Emitter>& emitter) const override;
     opRegType get_specific_op_reg_type(const std::shared_ptr<ov::Node>& op) const override;
 };
 
diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp
index 24493334f1d675..072c3f7edcf60b 100644
--- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp
@@ -11,6 +11,7 @@
 #include "snippets/lowered/port_connector.hpp"
 #include "transformations/snippets/x64/op/brgemm_copy_b.hpp"
 #include "transformations/snippets/x64/op//brgemm_cpu.hpp"
+#include "snippets/op/rank_normalization.hpp"
 
 using namespace InferenceEngine;
 using namespace Xbyak;
@@ -121,7 +122,12 @@ KernelEmitter::KernelEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPt
         element::Type etype;
         switch (expr->get_type()) {
             case snippets::lowered::IOExpression::io_type::INPUT: {
-                desc = expr->get_output_port_descriptor(0);
+                const auto first_consumer = expr->get_output_port_connector(0)->get_consumers().begin()->get_expr();
+                if (ov::is_type<snippets::op::RankNormalization>(first_consumer->get_node())) {
+                    desc = first_consumer->get_output_port_descriptor(0);
+                } else {
+                    desc = expr->get_output_port_descriptor(0);
+                }
                 etype = expr->get_node()->get_output_element_type(0);
                 num_inputs++;
                 break;
diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp
index 71c84045e12848..1da8e866f7c4a1 100644
--- a/src/plugins/intel_cpu/src/extension.cpp
+++ b/src/plugins/intel_cpu/src/extension.cpp
@@ -157,6 +157,7 @@ std::map<std::string, ngraph::OpSet> Extension::getOpSets() {
         NGRAPH_OP(Store, ov::snippets::op)
         NGRAPH_OP(Subgraph, ov::snippets::op)
         NGRAPH_OP(VectorBuffer, ov::snippets::op)
+        NGRAPH_OP(RankNormalization, ov::snippets::op)
         NGRAPH_OP_X64(LoadConvertSaturation, ov::intel_cpu)
         NGRAPH_OP_X64(LoadConvertTruncation, ov::intel_cpu)
         NGRAPH_OP_X64(StoreConvertSaturation, ov::intel_cpu)
diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp
index c20ecbea76cdca..58c6c4f595a95f 100644
--- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp
+++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp
@@ -13,7 +13,6 @@
 #include <onednn/dnnl.h>
 #include <dnnl_extension_utils.h>
 
-#include <ngraph/pass/visualize_tree.hpp>
 #include <ngraph/rt_info.hpp>
 #include <ie_ngraph_utils.hpp>
 
@@ -119,67 +118,36 @@ bool SnippetKey::operator==(const SnippetKey& rhs) const {
     return true;
 }
 
-snippets::op::Subgraph::BlockedShapeVector getBlockedShapes(const std::vector<std::vector<size_t>>& memBlockedDims,
-        const std::vector<std::vector<size_t>>& memOrders, const std::vector<InferenceEngine::Precision>& memPrecs) {
-    size_t numShapes = memBlockedDims.size();
-    if (memOrders.size() != numShapes || memPrecs.size() != numShapes)
-        IE_THROW(Unexpected) << "Number of shapes is mismacthed for dimensions, orders and precisions";
-    snippets::op::Subgraph::BlockedShapeVector blockedShapes(numShapes);
-    for (size_t i = 0; i < numShapes; i++) {
-        size_t dimSize = memBlockedDims[i].size();
-        std::vector<Dimension> dims(dimSize);
-        for (size_t j = 0; j < dimSize; j++) {
-            dims[j] = memBlockedDims[i][j];
-        }
-        ov::PartialShape shape(dims);
-        ov::AxisVector order(memOrders[i]);
-        ov::element::Type precision = InferenceEngine::details::convertPrecision(memPrecs[i]);
-
-        blockedShapes[i] = snippets::op::Subgraph::BlockedShape{shape, order, precision};
-    }
-
-    return blockedShapes;
-}
 } // namespace
 
 Snippet::Snippet(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context)
         : Node(op, context, SnippetShapeInferFactory(op)) {
     host_isa = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) ?
         dnnl::impl::cpu::x64::avx512_core : dnnl::impl::cpu::x64::avx2;
-    original_snippet = ov::as_type_ptr<snippets::op::Subgraph>(op);
-    if (!original_snippet) {
-        IE_THROW(NotImplemented) << "Node is not an instance of snippets::op::Subgraph";
-    }
-    init_body_hash();
-    is_dynamic = isDynamicNgraphNode(op);
-}
+    const auto& tmp_snippet = ov::as_type_ptr<snippets::op::Subgraph>(op);
+    OPENVINO_ASSERT(tmp_snippet, "Attempt to create Snippet node from an invalid op type");
+    snippetAttrs.snippet = tmp_snippet->clone();
+    snippetAttrs.bodyHash = get_body_hash(tmp_snippet);
 
-void Snippet::copy_snippet() const {
-    ov::OutputVector subgraph_node_inputs;
-    for (const auto &input : original_snippet->input_values()) {
-        auto new_input = std::make_shared<ov::opset1::Parameter>(input.get_element_type(), input.get_partial_shape());
-        subgraph_node_inputs.push_back(new_input);
-    }
-    std::shared_ptr<ov::Model> new_body = original_snippet->body_ptr()->clone();
-    snippetAttrs.snippet = std::make_shared<snippets::op::Subgraph>(subgraph_node_inputs, new_body);
-    ov::copy_runtime_info(original_snippet, snippetAttrs.snippet);
-    snippetAttrs.snippet->set_friendly_name(original_snippet->get_friendly_name());
 #if defined(OPENVINO_ARCH_X86_64)
     snippetAttrs.snippet->set_generator(std::make_shared<CPUGenerator>(host_isa));
 #else
-    IE_THROW(NotImplemented) << "CPU plugin: code-generation is not supported on non-x64 platforms";
+    OPENVINO_THROW("CPU plugin: Snippets code-generator is not supported on non-x64 platforms");
 #endif // OPENVINO_ARCH_X86_64
+
+    // Note: we have to update shapeInfer, so it uses the per-thread op::Subgraph copy
+    shapeInference = SnippetShapeInferFactory(snippetAttrs.snippet).makeShapeInfer();
+    is_dynamic = isDynamicNgraphNode(op);
 }
 
-void Snippet::init_body_hash() {
+uint64_t Snippet::get_body_hash(const std::shared_ptr<snippets::op::Subgraph>& snippet) {
     uint64_t seed = 0;
     ov::snippets::pass::Hash hash_function(seed);
-    hash_function.run_on_model(original_snippet->body_ptr());
-    snippetAttrs.bodyHash = seed;
+    hash_function.run_on_model(snippet->body_ptr());
+    return seed;
 }
 
 void Snippet::initSupportedPrimitiveDescriptors() {
-    copy_snippet();
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
@@ -315,16 +283,29 @@ void Snippet::selectOptimalPrimitiveDescriptor() {
 }
 
 void Snippet::initOptimalPrimitiveDescriptor() {
+    const auto isPlanar = [](const VectorDims& order ) {
+        for (size_t i = 0; i < order.size(); ++i)
+            if (order[i] != i)
+                return false;
+        return true;
+    };
     Node::initOptimalPrimitiveDescriptor();
     // memory order and precision is determined now, there is no need to prepare for each dynamic shapes.
     const auto config = getSelectedPrimitiveDescriptor()->getConfig();
     inputNum = config.inConfs.size();
+    snippets::op::Subgraph::BlockedShapeVector in_blocked_shapes;
     snippetAttrs.inMemPrecs.resize(inputNum);
     snippetAttrs.inMemOrders.resize(inputNum);
+    in_blocked_shapes.reserve(inputNum);
+    snippetAttrs.has_non_planar_inputs = false;
     for (size_t i = 0; i < inputNum; i++) {
         const auto& memDesc = config.inConfs[i].getMemDesc();
         snippetAttrs.inMemPrecs[i] = memDesc->getPrecision();
-        snippetAttrs.inMemOrders[i] = memDesc->as<BlockedMemoryDesc>()->getOrder();
+        const auto& blockedDesc = memDesc->as<BlockedMemoryDesc>();
+        const auto& order = blockedDesc->getOrder();
+        snippetAttrs.inMemOrders[i] = order;
+        snippetAttrs.has_non_planar_inputs |= !isPlanar(order);
+        in_blocked_shapes.emplace_back(blockedDesc->getBlockDims(), order);
     }
     outputNum = config.outConfs.size();
     snippetAttrs.outMemPrecs.resize(outputNum);
@@ -338,6 +319,52 @@ void Snippet::initOptimalPrimitiveDescriptor() {
     snippetAttrs.outMemBlockedDims.resize(outputNum);
     srcMemPtrs.resize(inputNum);
     dstMemPtrs.resize(outputNum);
+
+    // here we should perform all shape-agnostic snippets passes
+    // * canonicalization (RankNormalization insert)
+    // * precision propagation & align element types
+    // * data flow optimizations
+    // The result of these transformations will be reused by all shapes
+    using Manager = snippets::pass::Manager;
+    std::vector<Manager::PositionedPass> backend_passes;
+#if defined(OPENVINO_ARCH_X86_64)
+    using PassPosition = snippets::pass::Manager::PassPosition;
+    using Place = snippets::pass::Manager::PassPosition::Place;
+#   define SNIPPETS_REGISTER_PASS(PASS_POS, PASS, ...) \
+            backend_passes.emplace_back(PASS_POS, std::make_shared<PASS>(__VA_ARGS__))
+#else
+#    define SNIPPETS_REGISTER_PASS(PASS_POS, PASS, ...)
+#endif  // OPENVINO_ARCH_X86_64
+
+    SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineStart), ConvertToSwishCPU);
+    if (context->getConfig().inferencePrecision == ov::element::bf16 && snippetAttrs.snippet->has_domain_sensitive_ops()) {
+        // enforce BF16 precisions to supported operations
+        // MatMul has to be decomposed to Brgemm operations before enforcement
+        // Note, MatMul decomposition will be run later again for case if BF16 enforcement is not happened
+        SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineStart), ov::snippets::pass::MatMulToBrgemm);
+        SNIPPETS_REGISTER_PASS(PassPosition(Place::After, "MatMulToBrgemm"), pass::EnforcePrecision, element::f32, element::bf16);
+    }
+
+    SNIPPETS_REGISTER_PASS(PassPosition(Place::Before, "PropagatePrecision"), ov::intel_cpu::pass::BrgemmToBrgemmCPU);
+    SNIPPETS_REGISTER_PASS(PassPosition(Place::Before, "PropagatePrecision"), ov::intel_cpu::pass::SetBrgemmCPUBlockingParams);
+
+    SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineEnd), ov::intel_cpu::pass::RemoveConverts);
+    SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineEnd), ov::intel_cpu::pass::MulAddToFMA);
+
+#undef SNIPPETS_REGISTER_PASS
+
+    std::vector<ov::element::Type> input_precisions;
+    std::vector<ov::element::Type> output_precisions;
+    input_precisions.reserve(inputNum);
+    for (const auto& p :  snippetAttrs.inMemPrecs) {
+        input_precisions.push_back(InferenceEngine::details::convertPrecision(p));
+    }
+    output_precisions.reserve(outputNum);
+    for (const auto& p :  snippetAttrs.outMemPrecs)
+        output_precisions.push_back(InferenceEngine::details::convertPrecision(p));
+
+    snippetAttrs.snippet->data_flow_transformations(in_blocked_shapes, input_precisions, output_precisions, backend_passes);
+    snippetAttrs.snippet->convert_body_to_linear_ir(std::make_shared<snippets::CPUShapeInferSnippetsFactory>());
 }
 
 InferenceEngine::Precision Snippet::getRuntimePrecision() const {
@@ -361,9 +388,8 @@ void Snippet::prepareParams() {
     SnippetKey key = {snippetAttrs};
 
     auto builder = [this](const SnippetKey& key) -> std::shared_ptr<SnippetExecutor> {
-        std::shared_ptr<SnippetExecutor> executor = std::make_shared<SnippetJitExecutor>(key.attrs, is_canonicalized,
-            is_dynamic, context->getConfig().inferencePrecision == ov::element::bf16);
-        is_canonicalized = true;
+        std::shared_ptr<SnippetExecutor> executor =
+                std::make_shared<SnippetJitExecutor>(key.attrs, is_dynamic, context->getConfig().inferencePrecision == ov::element::bf16);
         return executor;
     };
 
@@ -426,15 +452,17 @@ void Snippet::executeDynamicImpl(dnnl::stream strm) {
 }
 
 void Snippet::SnippetJitExecutor::exec(const std::vector<MemoryPtr>& inMemPtrs, const std::vector<MemoryPtr>& outMemPtrs) {
-    if (schedule.ptr == nullptr) {
+    if (schedule.lowering_result.compiled_snippet->empty()) {
         IE_THROW() << "Snippet can't use Optimized implementation and can't fallback to reference";
     }
     auto initStartMemoryOffsets = [this, &inMemPtrs, &outMemPtrs]() {
         for (size_t i = 0; i < numInput; i++) {
-            start_offset_in[i] = inMemPtrs[i]->getDescWithType<BlockedMemoryDesc>()->getOffsetPadding() * dataSize[i];
+            start_offset_in[i] =
+                    static_cast<ptrdiff_t>(inMemPtrs[i]->getDescWithType<BlockedMemoryDesc>()->getOffsetPadding() * dataSize[i]);
         }
         for (size_t i = 0; i < numOutput; i++) {
-            start_offset_out[i] = outMemPtrs[i]->getDescWithType<BlockedMemoryDesc>()->getOffsetPadding() * dataSize[i + numInput];
+            start_offset_out[i] =
+                    static_cast<ptrdiff_t>(outMemPtrs[i]->getDescWithType<BlockedMemoryDesc>()->getOffsetPadding() * dataSize[i + numInput]);
         }
     };
     // initialize start offsets to src and dst memory
@@ -465,13 +493,13 @@ void Snippet::SnippetJitExecutor::update_ptrs(jit_snippets_call_args& call_args,
 void Snippet::SnippetJitExecutor::schedule_6d(const std::vector<MemoryPtr>& inMemPtrs, const std::vector<MemoryPtr>& outMemPtrs) {
     const auto& dom = parallel_exec_domain;
     // < N, C, H, W > < 1, 1, N, C*H*W>
+    const auto& callable = schedule.get_callable<kernel>();
     parallel_for5d(dom[0], dom[1], dom[2], dom[3], dom[4],
         [&](int64_t d0, int64_t d1, int64_t d2, int64_t d3, int64_t d4) {
             int64_t indexes[] = {d0, d1, d2, d3, d4};
             jit_snippets_call_args call_args;
             update_ptrs(call_args, inMemPtrs, outMemPtrs);
-
-            schedule.get_callable<kernel>()(indexes, &call_args);
+            callable(indexes, &call_args);
         });
 }
 
@@ -487,8 +515,8 @@ void Snippet::SnippetJitExecutor::schedule_nt(const std::vector<MemoryPtr>& inMe
         std::vector<int64_t> indexes(work_size.size() - 1, 0);
         for (size_t iwork = start; iwork < end; ++iwork) {
             size_t tmp = iwork;
-            for (ptrdiff_t j = work_size.size() - 2; j >= 0; j--) {
-                indexes[j] = tmp % work_size[j];
+            for (ptrdiff_t j = static_cast<ptrdiff_t>(work_size.size()) - 2; j >= 0; j--) {
+                indexes[j] = static_cast<int64_t>(tmp % work_size[j]);
                 tmp /= work_size[j];
             }
 
@@ -497,49 +525,25 @@ void Snippet::SnippetJitExecutor::schedule_nt(const std::vector<MemoryPtr>& inMe
     });
 }
 
-Snippet::SnippetExecutor::SnippetExecutor(const SnippetAttrs& attrs, bool is_canonicalized, bool is_dynamic, bool enforceBF16)
-    : snippetAttrs(attrs), is_canonicalized(is_canonicalized), is_dynamic(is_dynamic), enforceBF16(enforceBF16) {}
+Snippet::SnippetExecutor::SnippetExecutor(SnippetAttrs attrs, bool is_dynamic, bool enforceBF16)
+    : snippetAttrs(std::move(attrs)), is_dynamic(is_dynamic), enforceBF16(enforceBF16) {}
 
-Snippet::SnippetJitExecutor::SnippetJitExecutor(const SnippetAttrs& attrs, bool is_canonicalized, bool is_dynamic, bool enforceBF16) :
-    SnippetExecutor(attrs, is_canonicalized, is_dynamic, enforceBF16) {
+Snippet::SnippetJitExecutor::SnippetJitExecutor(SnippetAttrs attrs, bool is_dynamic, bool enforceBF16) :
+    SnippetExecutor(std::move(attrs), is_dynamic, enforceBF16) {
     numInput = snippetAttrs.inMemBlockedDims.size();
     numOutput = snippetAttrs.outMemBlockedDims.size();
     start_offset_in.resize(numInput);
     start_offset_out.resize(numOutput);
-    auto local_copy = [this]() {
-        ov::OutputVector subgraph_node_inputs;
-        for (size_t i = 0; i < numInput; i++) {
-            const auto paramShape = snippetAttrs.snippet->body_ptr()->get_parameters()[i]->get_shape();
-            const auto paramType =  snippetAttrs.snippet->body_ptr()->get_parameters()[i]->get_element_type();
-            auto new_input = std::make_shared<ov::opset1::Parameter>(paramType, paramShape);
-            subgraph_node_inputs.push_back(new_input);
-        }
-        std::shared_ptr<ov::Model> new_body = snippetAttrs.snippet->body_ptr()->clone();
-
-        snippet_for_generation = std::make_shared<ov::snippets::op::Subgraph>(subgraph_node_inputs, new_body);
-        ov::copy_runtime_info(snippetAttrs.snippet, snippet_for_generation);
-        snippet_for_generation->set_friendly_name(snippetAttrs.snippet->get_friendly_name());
-#if defined(OPENVINO_ARCH_X86_64)
-        auto host_isa = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)
-                            ? dnnl::impl::cpu::x64::avx512_core
-                            : dnnl::impl::cpu::x64::avx2;
-        snippet_for_generation->set_generator(std::make_shared<CPUGenerator>(host_isa));
-#else
-        IE_THROW(NotImplemented) << "CPU plugin: code-generation is not supported on non-x64 platforms";
-#endif  // OPENVINO_ARCH_X86_64
-    };
 
-    // is_canonicalized is ture means just reshape canonicalized graph with new input shapes, and get updated master shape,
-    // false means canonicalization, determine master_shape on snippetAttrs.snippet.
-    ov::PartialShape canonicalShape = canonicalizeBody(is_canonicalized);
-
-    if (is_dynamic) {
-        // we need a local snippets for generation, which will be adjusted based on input shapes possibily.
-        // The adjustment may be not compatible with new input shape in dynamic node, such as broadcastMove inserted.
-        local_copy();
-    } else {
-        snippet_for_generation = snippetAttrs.snippet;
+    // todo: snippets don't support backend-provided blocking, so we need to reshape body
+    //  using blocked shapes first. This can be removed after [121670]
+    if (snippetAttrs.has_non_planar_inputs) {
+        std::vector<snippets::VectorDimsRef> in_shapes;
+        for (const auto& s : snippetAttrs.inMemBlockedDims)
+            in_shapes.emplace_back(s);
+        snippetAttrs.snippet->shape_infer(in_shapes);
     }
+    const VectorDims& canonicalShape = snippetAttrs.snippet->infer_master_shape();
 
     // initialize by maximum output dimension. Dimensions of outputs should be broadcastable
     tensorRank = std::max(static_cast<size_t>(rank6D), canonicalShape.size());
@@ -552,85 +556,39 @@ Snippet::SnippetJitExecutor::SnippetJitExecutor(const SnippetAttrs& attrs, bool
     };
     initDataSizes();
 
-    if (canonicalShape.is_dynamic())
+    if (std::any_of(canonicalShape.begin(), canonicalShape.end(),
+                    [](size_t x){return x == snippets::IShapeInferSnippets::DYNAMIC_DIMENSION;}))
         IE_THROW() << "Snippets: Canonicalization returned dynamic shape in static pipeline";
-    snippet_for_generation->set_min_parallel_work_amount(static_cast<size_t>(parallel_get_max_threads()));
+    snippetAttrs.snippet->set_min_parallel_work_amount(static_cast<size_t>(parallel_get_max_threads()));
     // Note: minimal JIT work amount is a predefined value that describes the number of kernel iterations (work amount)
     // needed to cover kernel call overhead. It is used for balancing between parallel and JIT work amounts in domain optimization.
-    snippet_for_generation->set_min_jit_work_amount(256);
+    snippetAttrs.snippet->set_min_jit_work_amount(256);
 
     // generate
     jit_snippets_compile_args jcp;
     jcp.parallel_executor_ndims = tensorRank;
     generate(&jcp);
-    buffer_scratchpad_size = snippet_for_generation->get_buffer_scratchpad_size();
+    buffer_scratchpad_size = schedule.lowering_result.buffer_scratchpad_size;
     buffer_scratchpad.resize(buffer_scratchpad_size * parallel_get_max_threads(), 0);
     parallel_exec_domain = schedule.parallel_exec_domain;
     harnessWorkAmount = std::accumulate(parallel_exec_domain.begin(), parallel_exec_domain.end(), 1, std::multiplies<size_t>());
     parallel_exec_domain = getNormalizedDimsBySize(parallel_exec_domain, tensorRank);
 }
 
-ov::PartialShape Snippet::SnippetJitExecutor::canonicalizeBody(bool reshape) {
-    ov::snippets::op::Subgraph::BlockedShapeVector input_blocked_shapes = getBlockedShapes(
-        snippetAttrs.inMemBlockedDims, snippetAttrs.inMemOrders, snippetAttrs.inMemPrecs);
-    if (reshape) {
-        const auto& canonicalShape = snippetAttrs.snippet->canonicalized_body_shape_infer(input_blocked_shapes);
-        return canonicalShape;
-    } else {
-        ov::snippets::op::Subgraph::BlockedShapeVector output_blocked_shapes = getBlockedShapes(
-        snippetAttrs.outMemBlockedDims, snippetAttrs.outMemOrders, snippetAttrs.outMemPrecs);
-
-        const auto& canonicalShape = snippetAttrs.snippet->canonicalize(output_blocked_shapes, input_blocked_shapes);
-        return canonicalShape;
-    }
-}
-
 void Snippet::SnippetJitExecutor::generate(const jit_snippets_compile_args* jcp) {
-    using Manager = snippets::pass::Manager;
-    std::vector<Manager::PositionedPass> backend_passes;
-#if defined(OPENVINO_ARCH_X86_64)
-    using PassPosition = snippets::pass::Manager::PassPosition;
-    using Place = snippets::pass::Manager::PassPosition::Place;
-#    define SNIPPETS_REGISTER_PASS(PASS_POS, PASS, ...) \
-        backend_passes.emplace_back(PASS_POS, std::make_shared<PASS>(__VA_ARGS__))
-#else
-#    define SNIPPETS_REGISTER_PASS(PASS_POS, PASS, ...)
-#endif  // OPENVINO_ARCH_X86_64
-
-    SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineStart), ConvertToSwishCPU);
-    if (enforceBF16 && snippet_for_generation->has_domain_sensitive_ops()) {
-        // enforce BF16 precisions to supported operations
-        // MatMul has to be decomposed to Brgemm operations before enforcement
-        // Note, MatMul decomposition will be run later again for case if BF16 enforcement is not happened
-        SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineStart), ov::snippets::pass::MatMulToBrgemm);
-        SNIPPETS_REGISTER_PASS(PassPosition(Place::After, "MatMulToBrgemm"), pass::EnforcePrecision, element::f32, element::bf16);
-    }
-
-    SNIPPETS_REGISTER_PASS(PassPosition(Place::Before, "PropagatePrecision"), ov::intel_cpu::pass::BrgemmToBrgemmCPU);
-    SNIPPETS_REGISTER_PASS(PassPosition(Place::Before, "PropagatePrecision"), ov::intel_cpu::pass::SetBrgemmCPUBlockingParams);
-
-    SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineEnd), ov::intel_cpu::pass::RemoveConverts);
-    SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineEnd), ov::intel_cpu::pass::MulAddToFMA);
-
-#undef SNIPPETS_REGISTER_PASS
-
     ov::snippets::lowered::pass::PassPipeline control_flow_markup_pipeline;
     CPU_REGISTER_PASS_X64(control_flow_markup_pipeline, ov::intel_cpu::pass::BrgemmBlocking)
 
     ov::snippets::lowered::pass::PassPipeline control_flow_pipeline;
     CPU_REGISTER_PASS_X64(control_flow_pipeline, ov::intel_cpu::pass::FuseLoadStoreConvert)
     CPU_REGISTER_PASS_X64(control_flow_pipeline, ov::intel_cpu::pass::SetBrgemmCopyBBuffersShape);
-    // Note: we need to pass valid shapeInfer factory to generate, so it can be used in OptimizeDomain pass
-    // in all other cases nGraph shape inference will be used until ticket # 113209 (PR 18563) is merged
-    schedule = snippet_for_generation->generate(backend_passes,
-                                                control_flow_markup_pipeline,
-                                                control_flow_pipeline,
-                                                std::make_shared<snippets::CPUShapeInferSnippetsFactory>(),
-                                                reinterpret_cast<const void*>(jcp));
+    schedule = snippetAttrs.snippet->generate_from_linear_ir(control_flow_markup_pipeline,
+                                                             control_flow_pipeline,
+                                                             reinterpret_cast<const void*>(jcp));
 }
 
 bool Snippet::SnippetJitExecutor::schedule_created() {
-    return schedule.ptr != nullptr;
+    return !schedule.lowering_result.compiled_snippet->empty();
 }
 
 }   // namespace node
diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.h b/src/plugins/intel_cpu/src/nodes/subgraph.h
index 086e84e15ba631..0979aeee807d8f 100644
--- a/src/plugins/intel_cpu/src/nodes/subgraph.h
+++ b/src/plugins/intel_cpu/src/nodes/subgraph.h
@@ -48,31 +48,24 @@ class Snippet : public Node {
         // Local copy of subgraph node for canonization & code generation
         std::shared_ptr<snippets::op::Subgraph> snippet;
         uint64_t bodyHash;
-        std::vector<std::vector<size_t>> inMemBlockedDims;
-        std::vector<std::vector<size_t>> inMemOrders;
+        std::vector<VectorDims> inMemBlockedDims;
+        std::vector<VectorDims> inMemOrders;
         std::vector<InferenceEngine::Precision> inMemPrecs;
-        std::vector<std::vector<size_t>> outMemBlockedDims;
-        std::vector<std::vector<size_t>> outMemOrders;
+        std::vector<VectorDims> outMemBlockedDims;
+        std::vector<VectorDims> outMemOrders;
         std::vector<InferenceEngine::Precision> outMemPrecs;
+        // todo: used flag if we need extra shape infer, can be removed after [121670]
+        bool has_non_planar_inputs;
     };
 
 private:
-    static const size_t rank6D {6};
-
     typedef void (*kernel)(const void *, const void *);
 
-    // Create a deep local copy of the input snippet to perform canonicalization & code generation
-    // TODO: Probably better to implement a proper copy constructor
-    void copy_snippet() const;
-    void init_body_hash();
+    static uint64_t get_body_hash(const std::shared_ptr<snippets::op::Subgraph>& snippet);
 
     size_t inputNum = 0;
     size_t outputNum = 0;
 
-    // Original subgraph node
-    std::shared_ptr<snippets::op::Subgraph> original_snippet;
-    mutable std::shared_ptr<snippets::op::Subgraph> local_snippet;
-
     // Holds ISA version used is codeGeneration target
     dnnl::impl::cpu::x64::cpu_isa_t host_isa;
 
@@ -80,18 +73,17 @@ class Snippet : public Node {
     std::vector<MemoryPtr> dstMemPtrs = {};
 
     mutable SnippetAttrs snippetAttrs;
-    mutable bool is_canonicalized = false;
     bool is_dynamic = false;
 
     class SnippetExecutor {
         public:
-            SnippetExecutor(const SnippetAttrs& attrs, bool is_canonicalized, bool is_dynamic, bool enforceBF16);
+            SnippetExecutor(SnippetAttrs attrs, bool is_dynamic, bool enforceBF16);
             virtual void exec(const std::vector<MemoryPtr>& inMemPtrs, const std::vector<MemoryPtr>& outMemPtrs) = 0;
             virtual ~SnippetExecutor() = default;
+            std::shared_ptr<IShapeInfer> shapeInference = nullptr;
 
         protected:
             SnippetAttrs snippetAttrs;
-            bool is_canonicalized = false;
             bool is_dynamic = false;
             bool enforceBF16 = false;
     };
@@ -100,7 +92,7 @@ class Snippet : public Node {
 
     class SnippetJitExecutor : public SnippetExecutor {
         public:
-            SnippetJitExecutor(const SnippetAttrs& attrs, bool is_canonicalized, bool is_dynamic, bool enforceBF16);
+            SnippetJitExecutor(SnippetAttrs attrs, bool is_dynamic, bool enforceBF16);
             void exec(const std::vector<MemoryPtr>& inMemPtrs, const std::vector<MemoryPtr>& outMemPtrs) override;
 
             bool schedule_created();
@@ -113,16 +105,12 @@ class Snippet : public Node {
             size_t numInput = 0;
             size_t numOutput = 0;
 
-            ov::PartialShape canonicalizeBody(bool reshape);
-
             void generate(const jit_snippets_compile_args*);
             inline void update_ptrs(jit_snippets_call_args&, const std::vector<MemoryPtr>& inMemPtrs, const std::vector<MemoryPtr>& outMemPtrs);
             // Evaluates generated snippet using parallel backend
             void schedule_6d(const std::vector<MemoryPtr>& inMemPtrs, const std::vector<MemoryPtr>& outMemPtrs);
             void schedule_nt(const std::vector<MemoryPtr>& inMemPtrs, const std::vector<MemoryPtr>& outMemPtrs);
 
-            std::shared_ptr<snippets::op::Subgraph> snippet_for_generation;
-
             // Holds generated snippet with information about how to schedule it
             snippets::Schedule schedule;
 
diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp
index b38ae2fde7e7db..b9b7345b37f493 100644
--- a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp
+++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp
@@ -513,7 +513,7 @@ void TensorIterator::createPrimitive() {
         lastUsedCond = initial_cond_check->getStatus();
     }
 
-    if (isDynamicNode())
+    if (runAsDynamic())
         prepareDynamicBuffers();
 
     Node::createPrimitive();
@@ -556,7 +556,7 @@ void TensorIterator::prepareParams() {
         prepareContinueCond();
         prepareLoopBodyCurrentIteration();
 
-        if (!isDynamicNode()) {
+        if (!runAsDynamic()) {
             prepareOutputPorts();
             prepareBackEdges();
         }
@@ -568,6 +568,12 @@ void TensorIterator::prepareParams() {
 }
 
 void TensorIterator::execute(dnnl::stream strm) {
+    //Special case, the subgraph is dynamic while the node has all static shapes
+    if (runAsDynamic()) {
+        executeDynamicImpl(strm);
+        return;
+    }
+
     sub_graph.ResetInferCount();
 
     bool continue_cond = initial_cond_check->getStatus();
@@ -872,6 +878,10 @@ int TensorIterator::getNumIteration(const std::vector<PortMap>& inputPortMap, co
     return numIterations;
 }
 
+bool TensorIterator::runAsDynamic() const {
+    return isDynamicNode() || Graph::Status::ReadyDynamic == sub_graph.getStatus();
+}
+
 bool TensorIterator::created() const {
     return getType() == Type::TensorIterator;
 }
diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.h b/src/plugins/intel_cpu/src/nodes/tensoriterator.h
index 8633be5c28df61..104ee077f9a163 100644
--- a/src/plugins/intel_cpu/src/nodes/tensoriterator.h
+++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.h
@@ -138,6 +138,7 @@ class TensorIterator : public Node {
     void reshapeAndFillOutput(dnnl::stream strm);
     bool checkForInputAndBodyShapesInequality() const;
     int getNumIteration(const std::vector<PortMap>& inputPortMap, const std::vector<PortMap>& outputPortMap) const;
+    bool runAsDynamic() const;
 
     ExtensionManager::Ptr ext_mng;
     Graph sub_graph;
diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.cpp
index 64db193d7773f1..bae9b2b1dd43cc 100644
--- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.cpp
+++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.cpp
@@ -43,3 +43,8 @@ void FusedMulAdd::validate_and_infer_types() {
     }
     set_output_type(0, element_type, pshape);
 }
+
+const ov::op::AutoBroadcastSpec& FusedMulAdd::get_autob() const {
+    static ov::op::AutoBroadcastSpec autob_spec(ov::op::AutoBroadcastType::NUMPY);
+    return autob_spec;
+}
diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.hpp
index e55741986a2473..68a730c95aabd2 100644
--- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.hpp
+++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.hpp
@@ -24,6 +24,7 @@ class FusedMulAdd : public ngraph::op::Op {
     bool visit_attributes(AttributeVisitor& visitor) override;
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
     void validate_and_infer_types() override;
+    const ov::op::AutoBroadcastSpec& get_autob() const override;
 };
 
 } // namespace intel_cpu
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp
index dfae3a030abdeb..fee91b5c5a38d3 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp
@@ -34,6 +34,8 @@ void core_configuration(ov::test::SubgraphBaseTest* test) {
             test->configuration.insert({ov::hint::inference_precision.name(), ov::element::f32.to_string()});
         }
     #endif
+    // todo: issue: 123320
+    test->convert_precisions = {{ ov::element::bf16, ov::element::f32 }, { ov::element::f16, ov::element::f32 }};
 }
 
 } // namespace test
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
index 4eb40365fa95d7..e942043dd3fbf2 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -230,6 +230,8 @@ std::vector<std::string> disabledTestPatterns() {
     retVector.emplace_back(R"(smoke_CompareWithRefs_Mvn.*INFERENCE_PRECISION_HINT=f16.*)");
     retVector.emplace_back(R"(smoke_staticShapes4D.*INFERENCE_PRECISION_HINT=f16.*)");
     retVector.emplace_back(R"(smoke_dynamicShapes4D.*INFERENCE_PRECISION_HINT=f16.*)");
+    // Issue: 123064
+    retVector.emplace_back(R"(smoke_TestsROIPooling_.*/ROIPoolingLayerTest.*modelType=f16.*)");
 #endif
 
 #endif
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/add.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/add.cpp
index 45aaf9f4eeafdf..bd2f1aad832550 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/add.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/add.cpp
@@ -68,6 +68,9 @@ std::vector<std::vector<InputShape>> inShapesAddPair {
         {{{}, {{1, 128, 9, 30}}}, {{}, {{1, 128, 1, 30}}}},
         {{{}, {{1, 128, 9, 1}}}, {{}, {{1, 128, 1, 30}}}},
         {{{}, {{1, 128, 9, 16}}}, {{}, {{1, 128, 9, 1}}}},
+        // Test Canonicalization and Dimension collapsing
+        {{{}, {{2, 17, 3, 4}}}, {{}, {{1, 3, 4}}}},
+        {{{}, {{2, 17, 3, 4}}}, {{}, {{1, 4}}}},
         // DS
         {{{1, -1, {1, 10}, {1, 33}}, {{1, 128, 1, 1}, {1, 128, 1, 9}, {1, 128, 1, 17}, {1, 128, 1, 29}, {1, 128, 9, 1}, {1, 128, 1, 1}}},
          {{{1, 1}, {128, 128}, {1, 10}, {1, 33}}, {{1, 128, 1, 1}, {1, 128, 1, 9}, {1, 128, 1, 17}, {1, 128, 1, 29}, {1, 128, 1, 30}, {1, 128, 1, 1}}}},
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp
new file mode 100644
index 00000000000000..f84c9844db6c10
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp
@@ -0,0 +1,14 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "subgraph_tests/transpose_matmul_fusion.hpp"
+
+using namespace ov::test;
+
+namespace {
+INSTANTIATE_TEST_SUITE_P(smoke_TransposeMatMulFusion, TransposeMatMulFusion,
+                         ::testing::Values(ov::test::utils::DEVICE_CPU),
+                         TransposeMatMulFusion::getTestCaseName);
+
+}  // namespace
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/adaptive_pooling.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/adaptive_pooling.cpp
index 1efb20ffc1d8d2..8cab3926b72a5f 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/adaptive_pooling.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/adaptive_pooling.cpp
@@ -91,6 +91,10 @@ class AdaPoolLayerCPUTest : public testing::WithParamInterface<AdaPoolLayerCPUTe
             rel_threshold = 1e-2;
         }
         function = createFunction(isStatic);
+        if (function->get_parameters().size() == 2) {
+            generatePooledVector();
+            functionRefs = createFunction(true);
+        }
     }
 
     void generatePooledVector() {
@@ -124,14 +128,6 @@ class AdaPoolLayerCPUTest : public testing::WithParamInterface<AdaPoolLayerCPUTe
         return function;
     }
 
-    void init_ref_function(std::shared_ptr<ov::Model> &funcRef, const std::vector<ov::Shape>& targetInputStaticShapes) override {
-        if (function->get_parameters().size() == 2) {
-            generatePooledVector();
-            funcRef = createFunction(true);
-        }
-        ngraph::helpers::resize_function(funcRef, targetInputStaticShapes);
-    }
-
     void validate() override {
         auto actualOutputs = get_plugin_outputs();
         if (function->get_parameters().size() == 2) {
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp
index 836931ec465669..99367ef14e8ba9 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp
@@ -9,14 +9,12 @@
 
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
-using namespace ngraph::opset3;
 using namespace ov::test;
 
 namespace CPULayerTestsDefinitions  {
 
 namespace {
     std::vector<int64_t> blockShape, cropsBegin, cropsEnd;
-    ngraph::Shape paramShape;
 }  // namespace
 
 using BatchToSpaceLayerTestCPUParams = std::tuple<
@@ -24,7 +22,7 @@ using BatchToSpaceLayerTestCPUParams = std::tuple<
         std::vector<int64_t>,               // block shape
         std::vector<int64_t>,               // crops begin
         std::vector<int64_t>,               // crops end
-        Precision ,                         // Network precision
+        ov::element::Type,                  // Network precision
         CPUSpecificParams>;
 
 class BatchToSpaceCPULayerTest : public testing::WithParamInterface<BatchToSpaceLayerTestCPUParams>,
@@ -32,9 +30,9 @@ class BatchToSpaceCPULayerTest : public testing::WithParamInterface<BatchToSpace
 public:
     static std::string getTestCaseName(const testing::TestParamInfo<BatchToSpaceLayerTestCPUParams> &obj) {
         std::vector<InputShape> inputShapes;
-        Precision netPrecision;
+        ov::element::Type model_type;
         CPUSpecificParams cpuParams;
-        std::tie(inputShapes, blockShape, cropsBegin, cropsEnd, netPrecision, cpuParams) = obj.param;
+        std::tie(inputShapes, blockShape, cropsBegin, cropsEnd, model_type, cpuParams) = obj.param;
         std::ostringstream result;
         if (inputShapes.front().first.size() != 0) {
             result << "IS=(";
@@ -53,39 +51,40 @@ class BatchToSpaceCPULayerTest : public testing::WithParamInterface<BatchToSpace
         result << "blockShape=" << ov::test::utils::vec2str(blockShape) << "_";
         result << "cropsBegin=" << ov::test::utils::vec2str(cropsBegin) << "_";
         result << "cropsEnd=" << ov::test::utils::vec2str(cropsEnd) << "_";
-        result << "netPRC=" << netPrecision.name() << "_";
+        result << "netPRC=" << model_type << "_";
         result << CPUTestsBase::getTestCaseName(cpuParams);
         return result.str();
     }
 
     void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
         inputs.clear();
-        const auto& funcInputs = function->inputs();
-        for (size_t i = 0; i < funcInputs.size(); i++) {
-            const auto& funcInput = funcInputs[i];
+        const auto& parameters = function->get_parameters();
+        for (size_t i = 0; i < parameters.size(); i++) {
+            const auto& parameter = parameters[i];
             ov::Tensor tensor;
-            if (i == 0U) {
-                tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256);
-            } else if (i == 1U) {
-                tensor = ov::Tensor(funcInput.get_element_type(), paramShape);
-                auto *dataPtr = tensor.data<int64_t>();
-                for (size_t j = 0; j < blockShape.size(); j++) {
-                    dataPtr[j] = blockShape[j];
+            const auto& param_type = parameter->get_output_element_type(0);
+            const auto& static_shape = targetInputStaticShapes[i];
+            switch (i) {
+                case 0: {
+                    tensor = ov::test::utils::create_and_fill_tensor(param_type, static_shape, 2560, 0, 256);
+                    break;
                 }
-            } else if (i == 2U) {
-                tensor = ov::Tensor(funcInput.get_element_type(), paramShape);
-                auto *dataPtr = tensor.data<int64_t>();
-                for (size_t j = 0; j < cropsBegin.size(); j++) {
-                    dataPtr[j] = cropsBegin[j];
+                case 1: {
+                    ASSERT_EQ(ov::shape_size(static_shape), blockShape.size());
+                    tensor = ov::Tensor(param_type, static_shape, blockShape.data());
+                    break;
                 }
-            } else if (i == 3U) {
-                tensor = ov::Tensor(funcInput.get_element_type(), paramShape);
-                auto *dataPtr = tensor.data<int64_t>();
-                for (size_t j = 0; j < cropsEnd.size(); j++) {
-                    dataPtr[j] = cropsEnd[j];
+                case 2:
+                case 3: {
+                    ASSERT_EQ(ov::shape_size(static_shape), cropsEnd.size());
+                    tensor = ov::Tensor(param_type, static_shape, cropsEnd.data());
+                    break;
+                }
+                default: {
+                    throw std::runtime_error("Incorrect parameter number!");
                 }
             }
-            inputs.insert({funcInput.get_node_shared_ptr(), tensor});
+            inputs.insert({parameter, tensor});
         }
     }
 
@@ -94,55 +93,51 @@ class BatchToSpaceCPULayerTest : public testing::WithParamInterface<BatchToSpace
         targetDevice = ov::test::utils::DEVICE_CPU;
 
         std::vector<InputShape>  inputShapes;
-        Precision netPrecision;
+        ov::element::Type model_type;
         CPUSpecificParams cpuParams;
-        std::tie(inputShapes, blockShape, cropsBegin, cropsEnd, netPrecision, cpuParams) = this->GetParam();
+        std::tie(inputShapes, blockShape, cropsBegin, cropsEnd, model_type, cpuParams) = this->GetParam();
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+        init_input_shapes(inputShapes);
 
-        auto ngPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
-
-        const std::vector<InputShape> inputShapesVec{inputShapes};
-        init_input_shapes(inputShapesVec);
-
-        if (strcmp(netPrecision.name(), "U8") == 0)
+        if (model_type == ov::element::Type_t::u8) {
             selectedType = std::string("ref_any_") + "I8";
-        else
-            selectedType = std::string("ref_any_") + netPrecision.name();
-
-        ov::ParameterVector params{std::make_shared<ov::op::v0::Parameter>(ngPrec, inputDynamicShapes.front())};
-        auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
-        paramShape = {paramOuts[0].get_partial_shape().size()};
-
-        std::shared_ptr<ov::Node> in2, in3, in4;
-        auto blockShapeParam = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, paramShape);
-        in2 = blockShapeParam;
-        params.push_back(blockShapeParam);
-        auto cropsBeginParam = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, paramShape);
-        params.push_back(cropsBeginParam);
-        in3 = cropsBeginParam;
-        auto cropsEndParam = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, paramShape);
-        params.push_back(cropsEndParam);
-        in4 = cropsEndParam;
-        auto btsNode = std::make_shared<ngraph::opset2::BatchToSpace>(paramOuts[0], in2, in3, in4);
+        } else {
+            std::string type_name = model_type.get_type_name();
+            if (type_name == "f16")
+                type_name = "fp16";
+            if (type_name == "f32")
+                type_name = "fp32";
+            if (type_name == "f64")
+                type_name = "fp64";
+            std::transform(type_name.begin(), type_name.end(), type_name.begin(), ::toupper);
+            selectedType = std::string("ref_any_") + type_name;
+        }
+
+        std::shared_ptr<ov::op::v0::Parameter> in0, in1, in2, in3;
+        in0 = std::make_shared<ov::op::v0::Parameter>(model_type, inputDynamicShapes.front());
+        in1 = std::make_shared<ov::op::v0::Parameter>(ov::element::Type_t::i64, inputDynamicShapes[1]);
+        in2 = std::make_shared<ov::op::v0::Parameter>(ov::element::Type_t::i64, inputDynamicShapes[2]);
+        in3 = std::make_shared<ov::op::v0::Parameter>(ov::element::Type_t::i64, inputDynamicShapes[3]);
+        auto btsNode = std::make_shared<ov::op::v1::BatchToSpace>(in0, in1, in2, in3);
         btsNode->get_rt_info() = getCPUInfo();
-        ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(btsNode)};
-        function = std::make_shared<ngraph::Function>(results, params, "BatchToSpace");
+        ngraph::ResultVector results{std::make_shared<ov::op::v0::Result>(btsNode)};
+        function = std::make_shared<ngraph::Function>(results, ov::ParameterVector{in0, in1, in2, in3}, "BatchToSpace");
     }
 };
 
 TEST_P(BatchToSpaceCPULayerTest, CompareWithRefs) {
     run();
-    CheckPluginRelatedResults(compiledModel, "BatchToSpace");
+    // CheckPluginRelatedResults(compiledModel, "BatchToSpace");
 };
 
 namespace {
 
-const std::vector<Precision> netPrecision = {
-        Precision::U8,
-        Precision::I8,
-        Precision::I32,
-        Precision::FP32,
-        Precision::BF16
+const std::vector<ov::element::Type> model_types = {
+        ov::element::Type_t::u8,
+        ov::element::Type_t::i8,
+        ov::element::Type_t::i32,
+        ov::element::Type_t::f32,
+        ov::element::Type_t::bf16
 };
 
 const std::vector<std::vector<int64_t>> blockShape4D1  = {{1, 1, 1, 2}, {1, 2, 2, 1}};
@@ -226,7 +221,7 @@ const auto staticBatchToSpaceParamsSet4D1 = ::testing::Combine(
         ::testing::ValuesIn(blockShape4D1),
         ::testing::ValuesIn(cropsBegin4D1),
         ::testing::ValuesIn(cropsEnd4D1),
-        ::testing::ValuesIn(netPrecision),
+        ::testing::ValuesIn(model_types),
         ::testing::ValuesIn(cpuParamsWithBlock_4D));
 
 const auto dynamicBatchToSpaceParamsSet4D1 = ::testing::Combine(
@@ -234,7 +229,7 @@ const auto dynamicBatchToSpaceParamsSet4D1 = ::testing::Combine(
         ::testing::ValuesIn(blockShape4D1),
         ::testing::ValuesIn(cropsBegin4D1),
         ::testing::ValuesIn(cropsEnd4D1),
-        ::testing::ValuesIn(netPrecision),
+        ::testing::ValuesIn(model_types),
         ::testing::ValuesIn(cpuParams_4D));
 
 const auto dynamicBatchToSpaceParamsWithBlockedSet4D1 = ::testing::Combine(
@@ -242,7 +237,7 @@ const auto dynamicBatchToSpaceParamsWithBlockedSet4D1 = ::testing::Combine(
         ::testing::ValuesIn(blockShape4D1),
         ::testing::ValuesIn(cropsBegin4D1),
         ::testing::ValuesIn(cropsEnd4D1),
-        ::testing::ValuesIn(netPrecision),
+        ::testing::ValuesIn(model_types),
         ::testing::ValuesIn(cpuParamsWithBlock_4D));
 
 const auto staticBatchToSpaceParamsSet4D2 = ::testing::Combine(
@@ -250,7 +245,7 @@ const auto staticBatchToSpaceParamsSet4D2 = ::testing::Combine(
         ::testing::ValuesIn(blockShape4D2),
         ::testing::ValuesIn(cropsBegin4D2),
         ::testing::ValuesIn(cropsEnd4D2),
-        ::testing::ValuesIn(netPrecision),
+        ::testing::ValuesIn(model_types),
         ::testing::ValuesIn(cpuParamsWithBlock_4D));
 
 const auto dynamicBatchToSpaceParamsSet4D2 = ::testing::Combine(
@@ -258,7 +253,7 @@ const auto dynamicBatchToSpaceParamsSet4D2 = ::testing::Combine(
         ::testing::ValuesIn(blockShape4D2),
         ::testing::ValuesIn(cropsBegin4D2),
         ::testing::ValuesIn(cropsEnd4D2),
-        ::testing::ValuesIn(netPrecision),
+        ::testing::ValuesIn(model_types),
         ::testing::ValuesIn(cpuParams_4D));
 
 const auto dynamicBatchToSpaceParamsWithBlockedSet4D2 = ::testing::Combine(
@@ -266,7 +261,7 @@ const auto dynamicBatchToSpaceParamsWithBlockedSet4D2 = ::testing::Combine(
         ::testing::ValuesIn(blockShape4D2),
         ::testing::ValuesIn(cropsBegin4D2),
         ::testing::ValuesIn(cropsEnd4D2),
-        ::testing::ValuesIn(netPrecision),
+        ::testing::ValuesIn(model_types),
         ::testing::ValuesIn(cpuParamsWithBlock_4D));
 
 INSTANTIATE_TEST_SUITE_P(smoke_StaticBatchToSpaceCPULayerTestCase1_4D, BatchToSpaceCPULayerTest,
@@ -381,7 +376,7 @@ const auto staticBatchToSpaceParamsSet5D1 = ::testing::Combine(
         ::testing::ValuesIn(blockShape5D1),
         ::testing::ValuesIn(cropsBegin5D1),
         ::testing::ValuesIn(cropsEnd5D1),
-        ::testing::ValuesIn(netPrecision),
+        ::testing::ValuesIn(model_types),
         ::testing::ValuesIn(cpuParamsWithBlock_5D));
 
 const auto dynamicBatchToSpaceParamsSet5D1 = ::testing::Combine(
@@ -389,7 +384,7 @@ const auto dynamicBatchToSpaceParamsSet5D1 = ::testing::Combine(
         ::testing::ValuesIn(blockShape5D1),
         ::testing::ValuesIn(cropsBegin5D1),
         ::testing::ValuesIn(cropsEnd5D1),
-        ::testing::ValuesIn(netPrecision),
+        ::testing::ValuesIn(model_types),
         ::testing::ValuesIn(cpuParams_5D));
 
 const auto dynamicBatchToSpaceParamsWithBlockedSet5D1 = ::testing::Combine(
@@ -397,7 +392,7 @@ const auto dynamicBatchToSpaceParamsWithBlockedSet5D1 = ::testing::Combine(
         ::testing::ValuesIn(blockShape5D1),
         ::testing::ValuesIn(cropsBegin5D1),
         ::testing::ValuesIn(cropsEnd5D1),
-        ::testing::ValuesIn(netPrecision),
+        ::testing::ValuesIn(model_types),
         ::testing::ValuesIn(cpuParamsWithBlock_5D));
 
 const auto staticBatchToSpaceParamsSet5D2 = ::testing::Combine(
@@ -405,7 +400,7 @@ const auto staticBatchToSpaceParamsSet5D2 = ::testing::Combine(
         ::testing::ValuesIn(blockShape5D2),
         ::testing::ValuesIn(cropsBegin5D2),
         ::testing::ValuesIn(cropsEnd5D2),
-        ::testing::ValuesIn(netPrecision),
+        ::testing::ValuesIn(model_types),
         ::testing::ValuesIn(cpuParamsWithBlock_5D));
 
 const auto dynamicBatchToSpaceParamsSet5D2 = ::testing::Combine(
@@ -413,7 +408,7 @@ const auto dynamicBatchToSpaceParamsSet5D2 = ::testing::Combine(
         ::testing::ValuesIn(blockShape5D2),
         ::testing::ValuesIn(cropsBegin5D2),
         ::testing::ValuesIn(cropsEnd5D2),
-        ::testing::ValuesIn(netPrecision),
+        ::testing::ValuesIn(model_types),
         ::testing::ValuesIn(cpuParams_5D));
 
 const auto dynamicBatchToSpaceParamsWithBlockedSet5D2 = ::testing::Combine(
@@ -421,7 +416,7 @@ const auto dynamicBatchToSpaceParamsWithBlockedSet5D2 = ::testing::Combine(
         ::testing::ValuesIn(blockShape5D2),
         ::testing::ValuesIn(cropsBegin5D2),
         ::testing::ValuesIn(cropsEnd5D2),
-        ::testing::ValuesIn(netPrecision),
+        ::testing::ValuesIn(model_types),
         ::testing::ValuesIn(cpuParamsWithBlock_5D));
 
 INSTANTIATE_TEST_SUITE_P(smoke_StaticBatchToSpaceCPULayerTestCase1_5D, BatchToSpaceCPULayerTest,
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp
index 2f9706e7d2562e..282ebef47ba9bb 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp
@@ -122,6 +122,14 @@ void RandomUniformLayerTestCPU::SetUp() {
     const ov::ResultVector results{std::make_shared<ov::op::v0::Result>(rnd_op)};
 
     function = std::make_shared<ov::Model>(results, in_params, "RandomUniformLayerTestCPU");
+
+    // todo: issue: 123320
+    if (!InferenceEngine::with_cpu_x86_avx512_core()) {
+        convert_precisions.insert({ ov::element::bf16, ov::element::f32 });
+    }
+    if (!InferenceEngine::with_cpu_x86_avx512_core_fp16()) {
+        convert_precisions.insert({ ov::element::f16, ov::element::f32 });
+    }
 }
 
 template<typename TD, typename TS>
@@ -206,19 +214,6 @@ void RandomUniformLayerTestCPU::compare(const std::vector<ov::Tensor>& expected,
 #undef CASE
 }
 
-precisions_map RandomUniformLayerTestCPU::get_ref_precisions_convert_map() {
-    precisions_map precisions;
-
-    if (!InferenceEngine::with_cpu_x86_avx512_core()) {
-        precisions.insert({ ov::element::bf16, ov::element::f32 });
-    }
-    if (!InferenceEngine::with_cpu_x86_avx512_core_fp16()) {
-        precisions.insert({ ov::element::f16, ov::element::f32 });
-    }
-
-    return precisions;
-}
-
 inline double less_or_equal(double a, double b) {
     return (b - a) >= (std::fmax(std::fabs(a), std::fabs(b)) * std::numeric_limits<double>::epsilon());
 }
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp
index 1cb9f5fccc451a..8e071439bc8577 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp
@@ -35,8 +35,6 @@ class RandomUniformLayerTestCPU : public testing::WithParamInterface<RandomUnifo
 
     void compare(const std::vector<ov::Tensor>& expected, const std::vector<ov::Tensor>& actual) override;
 
-    precisions_map get_ref_precisions_convert_map() override;
-
     template<typename T>
     void rndUCompare(const ov::Tensor& expected, const ov::Tensor& actual);
 
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution_backprop_data.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution_backprop_data.cpp
index 455c78a8c09ed0..a602d3cbac45a8 100755
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution_backprop_data.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution_backprop_data.cpp
@@ -94,6 +94,13 @@ class DeconvolutionLayerCPUTest : public testing::WithParamInterface<DeconvLayer
     }
 
     void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
+        if (function->get_parameters().size() != 1) {
+            // WA: output_shape depends on 3rd deconvolution input data
+            // but the reference implementation doesn't implement shape inference
+            // so we need to build a new ngraph function and replace the 3rd input parameter with a constant
+            // to get valid output shapes
+            functionRefs = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT);
+        }
         inputs.clear();
         const auto& funcInputs = function->inputs();
         for (size_t i = 0; i < funcInputs.size(); ++i) {
@@ -111,38 +118,6 @@ class DeconvolutionLayerCPUTest : public testing::WithParamInterface<DeconvLayer
         inferRequestNum++;
     }
 
-    void init_ref_function(std::shared_ptr<ov::Model> &funcRef, const std::vector<ov::Shape>& targetInputStaticShapes) override {
-        if (function->get_parameters().size() == 1) {
-            ngraph::helpers::resize_function(funcRef, targetInputStaticShapes);
-        } else {
-            // WA: output_shape depends on 3rd deconvolution input data
-            // but the reference implementation doesn't implement shape inference
-            // so we need to build a new ngraph function and replace the 3rd input parameter with a constant
-            // to get valid output shapes
-            funcRef = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT);
-        }
-    }
-
-    void validate() override {
-        auto actualOutputs = get_plugin_outputs();
-        if (function->get_parameters().size() == 2) {
-            auto pos = std::find_if(inputs.begin(), inputs.end(),
-                [](const std::pair<std::shared_ptr<ov::Node>, ov::Tensor> &params) {
-                    return params.first->get_friendly_name() == "param_1";
-                });
-            IE_ASSERT(pos != inputs.end());
-            inputs.erase(pos);
-        }
-        auto expectedOutputs = calculate_refs();
-        if (expectedOutputs.empty()) {
-                return;
-        }
-        ASSERT_EQ(actualOutputs.size(), expectedOutputs.size())
-                << "nGraph interpreter has " << expectedOutputs.size() << " outputs, while IE " << actualOutputs.size();
-
-        compare(expectedOutputs, actualOutputs);
-    }
-
     void configure_model() override {
         ov::preprocess::PrePostProcessor p(function);
         {
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution_backprop_data.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution_backprop_data.cpp
index 03f1f707254bc5..96a295830079ed 100755
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution_backprop_data.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution_backprop_data.cpp
@@ -93,6 +93,13 @@ class GroupDeconvolutionLayerCPUTest : public testing::WithParamInterface<GroupD
     }
 
     void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
+        if (function->get_parameters().size() != 1) {
+            // WA: output_shape depends on 3rd deconvolution input data
+            // but the reference implementation doesn't implement shape inference
+            // so we need to build a new ngraph function and replace the 3rd input parameter with a constant
+            // to get valid output shapes
+            functionRefs = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT);
+        }
         inputs.clear();
         const auto& funcInputs = function->inputs();
         for (size_t i = 0; i < funcInputs.size(); ++i) {
@@ -110,18 +117,6 @@ class GroupDeconvolutionLayerCPUTest : public testing::WithParamInterface<GroupD
         inferRequestNum++;
     }
 
-    void init_ref_function(std::shared_ptr<ov::Model> &funcRef, const std::vector<ov::Shape>& targetInputStaticShapes) override {
-        if (function->get_parameters().size() == 1) {
-            ngraph::helpers::resize_function(funcRef, targetInputStaticShapes);
-        } else {
-            // WA: output_shape depends on 3rd deconvolution input data
-            // but the reference implementation doesn't implement shape inference
-            // so we need to build a new ngraph function and replace the 3rd input parameter with a constant
-            // to get valid output shapes
-            funcRef = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT);
-        }
-    }
-
     void validate() override {
         auto actualOutputs = get_plugin_outputs();
         if (function->get_parameters().size() == 2) {
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/loop.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/loop.cpp
index b92646e458130e..cda499b042fb4e 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/loop.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/loop.cpp
@@ -371,6 +371,65 @@ class LoopForConcatLayerCPUTest : public LoopLayerCPUTest {
     }
 };
 
+class StaticLoopDynamicSubgraphCPUTest : public SubgraphBaseTest {
+    void SetUp() override {
+        InputShape input_shape = {{25, 1, 1}, {{25, 1, 1}}};
+        InputShape input_exec_flag_shape = {{1}, {{1}}};
+        targetDevice = ov::test::utils::DEVICE_CPU;
+        ElementType netType = ov::element::f32;
+        init_input_shapes({input_shape, input_exec_flag_shape});
+
+        ov::ParameterVector params;
+        params.push_back(std::make_shared<ov::op::v0::Parameter>(netType, inputDynamicShapes[0]));
+
+        // exec_condition
+        params.push_back(std::make_shared<ov::op::v0::Parameter>(ov::element::boolean, inputDynamicShapes[1]));
+
+        auto trip_count_input = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{1}, 2);
+        auto body_condition_const = std::make_shared<ov::op::v0::Constant>(ov::element::boolean, ov::Shape{1}, true);
+
+        // Body parameters
+        ov::ParameterVector body_params = {std::make_shared<ov::op::v0::Parameter>(netType, ov::PartialShape{25, 1, -1})};
+
+        // Body
+        auto broadcast_target_shape = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{3}, std::vector<int64_t>{25, 1, 256});
+        auto broadcast_axis_mapping = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{1}, 0);
+        auto broadcast = std::make_shared<ov::op::v3::Broadcast>(body_params[0], broadcast_target_shape);
+        auto body = std::make_shared<ov::Model>(ov::OutputVector{body_condition_const, broadcast}, body_params);
+
+        auto loop = std::make_shared<ov::op::v5::Loop>(trip_count_input, params[1]);
+        loop->set_function(body);
+        loop->set_special_body_ports(ov::op::v5::Loop::SpecialBodyPorts{-1, 0});
+
+        loop->set_merged_input(body_params.front(), params.front(), broadcast);
+
+        auto out0 = loop->get_iter_value(body_condition_const, -1);
+        auto out1 = loop->get_iter_value(broadcast, -1);
+
+        auto result0 = std::make_shared<ov::op::v0::Result>(out0);
+        auto result1 = std::make_shared<ov::op::v0::Result>(out1);
+        function = std::make_shared<ov::Model>(ov::ResultVector{result0, result1}, params, "loop");
+    }
+    void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
+        inputs.clear();
+        const auto& funcInputs = function->inputs();
+        for (size_t i = 0; i < funcInputs.size(); ++i) {
+            const auto& funcInput = funcInputs[i];
+            ov::Tensor tensor;
+
+            if (i == 1) {
+                tensor = ov::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
+                auto* dataPtr = tensor.data<bool>();
+                *dataPtr = true;
+            } else {
+                tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256);
+            }
+            inputs.insert({funcInput.get_node_shared_ptr(), tensor});
+        }
+    }
+};
+
+
 TEST_P(LoopLayerCPUTest, CompareWithRefs) {
     run();
 }
@@ -387,6 +446,10 @@ TEST_P(LoopForConcatLayerCPUTest, CompareWithRefs) {
     run();
 }
 
+TEST_F(StaticLoopDynamicSubgraphCPUTest, smoke_StaticLoopWithDynSubgraph) {
+    run();
+}
+
 namespace {
 
 const std::vector<ElementType> inputPrecisions = {
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp
index 8eda5f4221e77e..84f8c4b4740b22 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp
@@ -101,13 +101,10 @@ class OneHotLayerCPUTest : public testing::WithParamInterface<oneHotCPUTestParam
         }
 
         function = createFunction(inputType.first == ngraph::helpers::InputLayerType::CONSTANT);
-    }
-    void init_ref_function(std::shared_ptr<ov::Model> &funcRef, const std::vector<ov::Shape>& targetInputStaticShapes) override {
         if (function->get_parameters().size() == 2) {
             generateDepth();
-            funcRef = createFunction(true);
+            functionRefs = createFunction(true);
         }
-        ngraph::helpers::resize_function(funcRef, targetInputStaticShapes);
     }
     void validate() override {
             auto actualOutputs = get_plugin_outputs();
diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/inplace_edge.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/inplace_edge.cpp
new file mode 100644
index 00000000000000..1385313ce88d41
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/inplace_edge.cpp
@@ -0,0 +1,77 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <shared_test_classes/base/ov_subgraph.hpp>
+#include "test_utils/cpu_test_utils.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "ov_models/utils/ov_helpers.hpp"
+#include "ov_models/builders.hpp"
+
+using namespace CPUTestUtils;
+using namespace InferenceEngine;
+
+namespace SubgraphTestsDefinitions {
+// If a node (CumSum) with constant parents has several non-constant nodes (Eltwises) than the edge is broken.
+// The fix is to check node type - is should be Input.
+// Subgraph:
+/*
+ *            Constant  Constant
+ *                 \    /
+ *                  \  /
+ *                 CumSum
+ *  Parameter      /   \     Parameter
+ *        \       /     \       /
+ *         \     /       \     /
+ *         Eltwise       Eltwise
+ *               \       /  
+ *                Eltwise
+ *                   |
+ *                 Result
+ */
+
+using namespace ov::test;
+
+class NonInputInPlaceTest : public testing::WithParamInterface<ElementType>,
+                            virtual public SubgraphBaseTest {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<ElementType> obj) {
+        std::ostringstream result;
+        result << "NonInputInPlaceTest_inPrc=outPrc=" << obj.param;
+        return result.str();
+    }
+
+    void SetUp() override {
+        targetDevice = utils::DEVICE_CPU;
+        configuration.insert({ov::hint::inference_precision.name(), ov::element::f16.to_string()});
+        const std::vector<size_t> inputShape = {1, 11, 3, 3};
+        targetStaticShapes = {{inputShape, inputShape}};
+        ElementType prc = this->GetParam();
+
+        ov::ParameterVector inputParams {std::make_shared<ov::op::v0::Parameter>(prc, ov::Shape(inputShape)),
+                                         std::make_shared<ov::op::v0::Parameter>(prc, ov::Shape(inputShape))};
+
+        auto cumsum_tensor = ngraph::opset8::Constant::create(prc, inputShape, {10.0f});
+        auto axis_node = ngraph::opset8::Constant::create(ngraph::element::i32, {}, {0});
+        const auto cumsum = std::make_shared<ov::op::v0::CumSum>(cumsum_tensor, axis_node);
+
+        auto eltwiseMul = ngraph::builder::makeEltwise(inputParams[0], cumsum, ngraph::helpers::EltwiseTypes::MULTIPLY);
+        auto eltwiseAdd1 = ngraph::builder::makeEltwise(inputParams[1], cumsum, ngraph::helpers::EltwiseTypes::ADD);
+        auto eltwiseAdd2 = ngraph::builder::makeEltwise(eltwiseAdd1, eltwiseMul, ngraph::helpers::EltwiseTypes::ADD);
+
+        ngraph::ResultVector results{std::make_shared<ngraph::opset8::Result>(eltwiseAdd2)};
+        function = std::make_shared<ngraph::Function>(results, inputParams, "NonInputInPlaceT");
+    }
+};
+
+namespace {
+    TEST_P(NonInputInPlaceTest, CompareWithRefs) {
+        run();
+    }
+
+INSTANTIATE_TEST_SUITE_P(smoke_NonInputInPlaceTest_CPU, NonInputInPlaceTest,
+    testing::Values(ngraph::element::f32, ngraph::element::f16),
+    NonInputInPlaceTest::getTestCaseName);
+
+} // namespace
+} // namespace SubgraphTestsDefinitions
diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/input_output_tensor_reuse.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/input_output_tensor_reuse.cpp
index 612006be75dc2b..1a55b7204045d6 100644
--- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/input_output_tensor_reuse.cpp
+++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/input_output_tensor_reuse.cpp
@@ -61,7 +61,6 @@ class InputOutputTensorReuse : public SubgraphBaseTest {
 TEST_F(InputOutputTensorReuse, smoke_Input_Output_Binding) {
     compile_model();
     std::vector<ov::Shape> inputShapes = {{1, 32, 5, 16}, {1, 32, 1, 16}};
-    init_ref_function(functionRefs, inputShapes);
     generate_inputs(inputShapes);
     validate();
 
@@ -69,7 +68,6 @@ TEST_F(InputOutputTensorReuse, smoke_Input_Output_Binding) {
     for (size_t i = 0; i < num_iter; i++) {
         auto outputTensor = inferRequest.get_output_tensor(0);
         inputShapes.back() = outputTensor.get_shape();
-        init_ref_function(functionRefs, inputShapes);
         auto itr = std::find_if(inputs.begin(), inputs.end(), [](const std::pair<std::shared_ptr<ov::Node>, ov::Tensor>& item) {
             return item.first->get_friendly_name() == "Param_1";
         });
diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/mul_add_to_fma.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/mul_add_to_fma.cpp
index 7a6ef6e67c1d21..3a760050d0159a 100644
--- a/src/plugins/intel_cpu/tests/unit/snippets_transformations/mul_add_to_fma.cpp
+++ b/src/plugins/intel_cpu/tests/unit/snippets_transformations/mul_add_to_fma.cpp
@@ -6,8 +6,10 @@
 #include <subgraph_simple.hpp>
 #include <transformations/snippets/x64/pass/mul_add_to_fma.hpp>
 #include <transformations/snippets/x64/op/fused_mul_add.hpp>
+#include <transformations/snippets/x64/shape_inference.hpp>
 #include "snippets/op/scalar.hpp"
 #include "lowering_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
 #include "snippets/pass_manager.hpp"
 
 namespace ov {
@@ -61,7 +63,7 @@ class EltwiseWithMulAddFunction : public SnippetsFunctionBase {
         ParameterVector parameters{data0, data1};
         std::shared_ptr<Node> data2;
         if (scalar_input) {
-            data2 = std::make_shared<ov::snippets::op::Scalar>(precision, Shape{}, 2.f);
+            data2 = std::make_shared<ov::snippets::op::Scalar>(precision, Shape{1}, 2.f);
         } else {
             auto parameter = std::make_shared<op::v0::Parameter>(precision, input_shapes[2]);
             parameters.push_back(parameter);
@@ -110,8 +112,8 @@ class MulAddToFMATests : public LoweringTests, public testing::WithParamInterfac
 
         std::ostringstream result;
         for (size_t i = 0; i < inputShapes.size(); i++)
-            result << "IS[" << i << "]=" << inputShapes[i] << "_";
-        result << "MS=" << master_shape << "_";
+            result << "IS[" << i << "]=" <<  ov::test::utils::partialShape2str({inputShapes[i]}) << "_";
+        result << "MS=" << ov::test::utils::partialShape2str({master_shape}) << "_";
         result << "add_input_idx=" << add_input_idx;
         return result.str();
     }
@@ -146,7 +148,8 @@ TEST_P(MulAddToFMATests, MulAddToFMATests) {
                                        backend_passes,
                                        {},
                                        {},
-                                       generator);
+                                       generator,
+                                       std::make_shared<ov::snippets::CPUShapeInferSnippetsFactory>());
     model = subgraph->body_ptr();
     model_ref = snippets_model->getLowered();
 }
diff --git a/src/plugins/intel_cpu/thirdparty/mlas b/src/plugins/intel_cpu/thirdparty/mlas
index c7c8a631315000..f6425b13943348 160000
--- a/src/plugins/intel_cpu/thirdparty/mlas
+++ b/src/plugins/intel_cpu/thirdparty/mlas
@@ -1 +1 @@
-Subproject commit c7c8a631315000f17c650af34431009d2f22129c
+Subproject commit f6425b1394334822390fcd9da12788c9cd0d11da
diff --git a/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp b/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp
index 32903887e81181..b1eb1e0539e9a0 100644
--- a/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp
+++ b/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp
@@ -51,6 +51,7 @@
 #include "legacy/ngraph_ops/selu_ie.hpp"
 #include "legacy/ngraph_ops/tile_ie.hpp"
 #include "legacy/ngraph_ops/topk_ie.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
 #include "transformations/rt_info/fused_names_attribute.hpp"
 #include "transformations/rt_info/primitives_priority_attribute.hpp"
 #include "transformations/utils/utils.hpp"
@@ -475,6 +476,11 @@ void CNNLayerCreator::on_adapter(const std::string& name, ::ngraph::ValueAccesso
             const auto data_beg = static_cast<char*>(a->get()->get_ptr());
             params[name] = std::string(data_beg, a->get()->size());
         }
+    } else if (auto a = ::ngraph::as_type<::ngraph::AttributeAdapter<std::shared_ptr<ov::AlignedBuffer>>>(&adapter)) {
+        if (std::string(node->get_type_name()) != "Constant") {
+            const auto data_beg = static_cast<char*>(a->get()->get_ptr());
+            params[name] = std::string(data_beg, a->get()->size());
+        }
     } else if (const auto& a = ngraph::as_type<ngraph::AttributeAdapter<ngraph::element::TypeVector>>(&adapter)) {
         const auto& attrs = a->get();
         params[name] = details::joinVec(attrs);
diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp
index 106b7696ccec40..05fac378c01874 100644
--- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -102,5 +102,7 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*SplitConvTest.CompareWithRefImpl.*IS=\(1.(128|256)\).*IC=4.*OC=4.*configItem=GNA_DEVICE_MODE_GNA_SW_FP32)",
         // TODO: Issue: 114149
         R"(.*smoke_Decompose2DConv.*)",
+        // TODO: Issue: 123306
+        R"(smoke_convert_matmul_to_fc/ConvertMatmulToFcWithTransposesPass.CompareWithRefImpl/netPRC=FP(32|16)_targetDevice=GNA__configItem=GNA_COMPACT_MODE_NO_configItem=GNA_DEVICE_MODE_GNA_SW_(FP32|EXACT)_IS=\(8.*)",
     };
 }
diff --git a/src/plugins/intel_gpu/include/intel_gpu/op/rms.hpp b/src/plugins/intel_gpu/include/intel_gpu/op/rms.hpp
new file mode 100644
index 00000000000000..a6b7432ac28c51
--- /dev/null
+++ b/src/plugins/intel_gpu/include/intel_gpu/op/rms.hpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/op/op.hpp"
+
+namespace ov {
+namespace intel_gpu {
+namespace op {
+/// \brief Operator performing Root Mean Square Normalization
+///
+/// \note Performs re-scaling invariance and regularizes the summed input according to RMS statistics
+class RMS : public ov::op::Op {
+public:
+    OPENVINO_OP("RMS", "gpu_opset");
+
+    RMS() = default;
+    /// \brief Constructs an RMS operation.
+    ///
+    /// \param data Input tensor with data
+    /// \param gamma Gamma values for weight
+    /// \param eps Epsilon for not dividing by zero while normalizing the value
+    /// \param output_type Output element type
+    RMS(const Output<Node>& data,
+        const Output<Node>& gamma,
+        double epsilson,
+        const ov::element::Type output_type = ov::element::undefined);
+
+    bool visit_attributes(ov::AttributeVisitor& visitor) override;
+
+    void validate_and_infer_types() override;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override;
+
+    double get_epsilon() const { return m_epsilon; }
+
+    void set_epsilon(double epsilon) { m_epsilon = epsilon; }
+
+private:
+    double m_epsilon{0};
+    ov::element::Type m_output_type;
+};
+
+}   // namespace op
+}   // namespace intel_gpu
+}   // namespace ov
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
index 17e62ca926397b..ceba5be5a5dd53 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
@@ -268,3 +268,4 @@ REGISTER_FACTORY(internal, NmsStaticShapeIE8);
 REGISTER_FACTORY(internal, MulticlassNmsIEInternal);
 REGISTER_FACTORY(internal, FullyConnected);
 REGISTER_FACTORY(internal, FullyConnectedCompressed);
+REGISTER_FACTORY(internal, RMS);
diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp
index b9614cd47258c0..a0c5c7138764bf 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp
@@ -17,6 +17,12 @@ namespace cldnn {
 struct non_max_suppression : public primitive_base<non_max_suppression> {
     CLDNN_DECLARE_PRIMITIVE(non_max_suppression)
 
+    enum Rotation {
+        NONE,
+        CLOCKWISE,
+        COUNTERCLOCKWISE
+    };
+
     non_max_suppression() : primitive_base("", {}),
                             selected_indices_num(0),
                             center_point_box(false),
@@ -68,6 +74,7 @@ struct non_max_suppression : public primitive_base<non_max_suppression> {
     primitive_id soft_nms_sigma;
     primitive_id second_output;
     primitive_id third_output;
+    Rotation rotation{Rotation::NONE};
 
     size_t hash() const override {
         size_t seed = primitive::hash();
@@ -79,6 +86,7 @@ struct non_max_suppression : public primitive_base<non_max_suppression> {
         seed = hash_combine(seed, soft_nms_sigma.empty());
         seed = hash_combine(seed, second_output.empty());
         seed = hash_combine(seed, third_output.empty());
+        seed = hash_combine(seed, rotation);
         return seed;
     }
 
@@ -97,7 +105,8 @@ struct non_max_suppression : public primitive_base<non_max_suppression> {
                cmp_fields(score_threshold.empty()) &&
                cmp_fields(soft_nms_sigma.empty()) &&
                cmp_fields(second_output.empty()) &&
-               cmp_fields(third_output.empty());
+               cmp_fields(third_output.empty()) &&
+               cmp_fields(rotation);
         #undef cmp_fields
     }
 
@@ -130,6 +139,7 @@ struct non_max_suppression : public primitive_base<non_max_suppression> {
         ob << soft_nms_sigma;
         ob << second_output;
         ob << third_output;
+        ob << make_data(&rotation, sizeof(rotation));
     }
 
     void load(BinaryInputBuffer& ib) override {
@@ -143,6 +153,7 @@ struct non_max_suppression : public primitive_base<non_max_suppression> {
         ib >> soft_nms_sigma;
         ib >> second_output;
         ib >> third_output;
+        ib >> make_data(&rotation, sizeof(rotation));
     }
 };
 }  // namespace cldnn
diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/rms.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/rms.hpp
new file mode 100644
index 00000000000000..cf2ad7af502da9
--- /dev/null
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/rms.hpp
@@ -0,0 +1,58 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+#include "primitive.hpp"
+
+namespace cldnn {
+
+/// @brief Root Mean Square Normalization primitive
+/// @details Performs re-scaling invariance and regularizes the summed input according to RMS statistics
+struct rms : public primitive_base<rms> {
+    CLDNN_DECLARE_PRIMITIVE(rms);
+
+    rms() : primitive_base("", {}) {}
+
+    /// @brief Constructs rms primitive
+    /// @param id This primitive id
+    /// @param input Input primitive id
+    /// @param gamma Gamma values for weight
+    /// @param epsilon Epsilon for not dividing by zero while normalizing
+    rms(const primitive_id& id,
+        const input_info& input,
+        const input_info& gamma,
+        const float epsilon,
+        const padding& output_padding = padding())
+        : primitive_base(id, {input, gamma}, {output_padding}),
+          epsilon(epsilon) {}
+
+    /// @brief Epsilon for not dividing by zero while normalizing
+    float epsilon;
+
+    size_t hash() const override {
+        size_t seed = primitive::hash();
+        seed = hash_combine(seed, epsilon);
+        return seed;
+    }
+
+    bool operator==(const primitive& rhs) const override {
+        if (!compare_common_params(rhs))
+            return false;
+
+        auto rhs_casted = downcast<const rms>(rhs);
+
+        return epsilon == rhs_casted.epsilon;
+    }
+
+    void save(BinaryOutputBuffer& ob) const override {
+        primitive_base<rms>::save(ob);
+        ob << epsilon;
+    }
+
+    void load(BinaryInputBuffer& ib) override {
+        primitive_base<rms>::load(ib);
+        ib >> epsilon;
+    }
+};
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
index 45739c78a1e36a..cc9d8602316fa5 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
@@ -1221,6 +1221,8 @@ void prepare_primitive_fusing::fuse_constant_transposes(program& p) {
         return format::find_format(new_order, fmt.block_sizes());
     };
 
+    std::vector<std::pair<program_node*, program_node*>> to_replace_nodes;
+
     auto& proc_order = p.get_processing_order();
     auto itr = proc_order.begin();
     while (itr != proc_order.end()) {
@@ -1285,9 +1287,7 @@ void prepare_primitive_fusing::fuse_constant_transposes(program& p) {
 
                 auto new_reorder = std::make_shared<reorder>(next_node->id() + "_reorder_fmt", new_const_node.id(), reorder_layout);
                 auto& new_reorder_node = p.get_or_create(new_reorder);
-                p.replace(*next_node, new_reorder_node);
-                new_reorder_node.recalc_output_layout(false);
-                itr = std::find(proc_order.begin(), proc_order.end(), &new_reorder_node);
+                to_replace_nodes.emplace_back(std::make_pair(next_node, &new_reorder_node));
             } else {
                 layout reorder_layout = new_const_node.get_output_layout();
                 reorder_layout.format = format::bfyx;
@@ -1299,6 +1299,11 @@ void prepare_primitive_fusing::fuse_constant_transposes(program& p) {
             }
         }
     }
+
+    for (auto& nodes : to_replace_nodes) {
+        p.replace(*nodes.first, *nodes.second);
+        nodes.second->recalc_output_layout(false);
+    }
 }
 
 void prepare_primitive_fusing::optimize_fused_ops(program& p) {
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp
index 7405729120bfbd..f89980a3f936d4 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp
@@ -143,6 +143,17 @@ struct non_max_suppression_impl : typed_primitive_impl_ocl<non_max_suppression>
         params.sort_result_descending = primitive->sort_result_descending;
         params.box_encoding = primitive->center_point_box ? kernel_selector::BoxEncodingType::BOX_ENCODING_CENTER
                                                           : kernel_selector::BoxEncodingType::BOX_ENCODING_CORNER;
+        switch (primitive->rotation) {
+            case non_max_suppression::Rotation::CLOCKWISE:
+                params.rotation = kernel_selector::NMSRotationType::CLOCKWISE;
+                break;
+            case non_max_suppression::Rotation::COUNTERCLOCKWISE:
+                params.rotation = kernel_selector::NMSRotationType::COUNTERCLOCKWISE;
+                break;
+            default:
+                params.rotation = kernel_selector::NMSRotationType::NONE;
+        }
+
         if (impl_param.get_program().get_node(primitive->id).is_dynamic()) {
             params.reuse_internal_buffer = true;
         }
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp
index 6b35b9cdfb16ce..3cc96ee00735ce 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp
@@ -65,6 +65,7 @@ void register_implementations() {
     REGISTER_OCL(reshape);
     REGISTER_OCL(reverse);
     REGISTER_OCL(reverse_sequence);
+    REGISTER_OCL(rms);
     REGISTER_OCL(roi_align);
     REGISTER_OCL(roi_pooling);
     REGISTER_OCL(roll);
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp
index 45f4018bf90dac..d591499148e77a 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp
@@ -59,6 +59,7 @@
 #include "intel_gpu/primitives/resample.hpp"
 #include "intel_gpu/primitives/reshape.hpp"
 #include "intel_gpu/primitives/reverse_sequence.hpp"
+#include "intel_gpu/primitives/rms.hpp"
 #include "intel_gpu/primitives/roi_align.hpp"
 #include "intel_gpu/primitives/roi_pooling.hpp"
 #include "intel_gpu/primitives/roll.hpp"
@@ -146,6 +147,7 @@ REGISTER_OCL(reorg_yolo);
 REGISTER_OCL(reshape);
 REGISTER_OCL(reverse);
 REGISTER_OCL(reverse_sequence);
+REGISTER_OCL(rms);
 REGISTER_OCL(roi_align);
 REGISTER_OCL(roi_pooling);
 REGISTER_OCL(roll);
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/rms.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/rms.cpp
new file mode 100644
index 00000000000000..71f44e685157b0
--- /dev/null
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/rms.cpp
@@ -0,0 +1,65 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "primitive_base.hpp"
+
+#include "rms_inst.h"
+#include "rms/rms_kernel_selector.h"
+#include "rms/rms_kernel_ref.h"
+
+namespace cldnn {
+namespace ocl {
+
+struct rms_impl : typed_primitive_impl_ocl<rms> {
+    using parent = typed_primitive_impl_ocl<rms>;
+    using parent::parent;
+    using kernel_selector_t = kernel_selector::rms_kernel_selector;
+    using kernel_params_t = std::pair<kernel_selector::rms_params, kernel_selector::rms_optional_params>;
+
+    DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::ocl::rms_impl);
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<rms_impl>(*this);
+    }
+
+    static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
+        const auto& primitive = impl_param.typed_desc<rms>();
+        auto params = get_default_params<kernel_selector::rms_params>(impl_param, is_shape_agnostic);
+        auto optional_params = get_default_optional_params<kernel_selector::rms_optional_params>(impl_param.get_program());
+
+        params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(1)));
+        params.epsilon = primitive->epsilon;
+        return {params, optional_params};
+    }
+
+    void update_dispatch_data(const kernel_impl_params& impl_param) override {
+        auto kernel_params = get_kernel_params(impl_param, true);
+        (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
+    }
+};
+
+namespace detail {
+
+attach_rms_impl::attach_rms_impl() {
+    auto types = {
+        data_types::f32,
+        data_types::f16,
+        data_types::i32
+    };
+
+    auto formats = {
+        format::bfyx,
+        format::bfzyx
+    };
+
+    implementation_map<rms>::add(impl_types::ocl,
+                                 shape_types::any,
+                                 typed_primitive_impl_ocl<rms>::create<rms_impl>,
+                                 types,
+                                 formats);
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/graph/include/rms_inst.h b/src/plugins/intel_gpu/src/graph/include/rms_inst.h
new file mode 100644
index 00000000000000..a7800249f40421
--- /dev/null
+++ b/src/plugins/intel_gpu/src/graph/include/rms_inst.h
@@ -0,0 +1,44 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+#include "intel_gpu/primitives/rms.hpp"
+#include "primitive_inst.h"
+
+#include <string>
+
+namespace cldnn {
+
+template <>
+struct typed_program_node<rms> : public typed_program_node_base<rms> {
+    using parent = typed_program_node_base<rms>;
+
+public:
+    using parent::parent;
+
+    program_node& input(size_t index = 0) const { return get_dependency(index); }
+    std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
+};
+
+using rms_node = typed_program_node<rms>;
+
+template <>
+class typed_primitive_inst<rms> : public typed_primitive_inst_base<rms> {
+    using parent = typed_primitive_inst_base<rms>;
+    using parent::parent;
+
+public:
+    template<typename ShapeType>
+    static std::vector<layout> calc_output_layouts(rms_node const& /*node*/, const kernel_impl_params& impl_params) {
+        return forward_input0_shape<ShapeType>(impl_params);
+    }
+    static layout calc_output_layout(rms_node const& node, kernel_impl_params const& impl_params);
+    static std::string to_string(rms_node const& node);
+
+    typed_primitive_inst(network& network, rms_node const& node);
+};
+
+using rms_inst = typed_primitive_inst<rms>;
+
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
index 69b1e12fa3b4ae..ca4569a7df7099 100644
--- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
+++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@@ -1484,17 +1484,21 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
         if (blocked_formats.find(node.get_input_layout(0).format) != blocked_formats.end()) {
             preferred_impl = impl_types::ocl;
         } else {
-            auto& nms_node = node.as<non_max_suppression>();
-            auto scores_layout = nms_node.input_scores().get_output_layout();
-            if (scores_layout.is_dynamic()) {
+            const auto& nms_node = node.as<non_max_suppression>();
+            if (nms_node.get_primitive()->rotation != non_max_suppression::Rotation::NONE) {
                 preferred_impl = impl_types::ocl;
             } else {
-                const size_t kBatchNum = scores_layout.batch();
-                const size_t kClassNum = scores_layout.feature();
-                const size_t kNStreams =
-                    static_cast<size_t>(node.get_program().get_config().get_property(ov::streams::num));
-                const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast<size_t>(8)) * kNStreams;
-                preferred_impl = (kKeyValue > 64) ? impl_types::ocl : impl_types::cpu;
+                const auto scores_layout = nms_node.input_scores().get_output_layout();
+                if (scores_layout.is_dynamic()) {
+                    preferred_impl = impl_types::ocl;
+                } else {
+                    const size_t kBatchNum = scores_layout.batch();
+                    const size_t kClassNum = scores_layout.feature();
+                    const size_t kNStreams =
+                            static_cast<size_t>(node.get_program().get_config().get_property(ov::streams::num));
+                    const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast<size_t>(8)) * kNStreams;
+                    preferred_impl = (kKeyValue > 64) ? impl_types::ocl : impl_types::cpu;
+                }
             }
         }
     } else if (node.is_type<reorder>()) {
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index 6c1e88de349115..92f9f60743b9f5 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -242,6 +242,7 @@ event::ptr primitive_inst::set_output_memory(memory::ptr mem_new, bool check, si
 }
 
 void primitive_inst::update_shape() {
+    OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("update_shape: " + id()));
     GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::shape_inference);
     if (update_shape_done_by_other) {
         update_shape_done_by_other = false; // reset
@@ -341,6 +342,7 @@ void primitive_inst::update_shape() {
     }
 
     if (has_runtime_deps) {
+        OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("update_shape_sync: " + id()));
         if (!dependencies_events.empty() && queue_type == QueueTypes::out_of_order) {
             _network.get_stream().wait_for_events(dependencies_events);
         } else if (queue_type == QueueTypes::in_order) {
@@ -380,6 +382,7 @@ void primitive_inst::update_shape() {
 }
 
 event::ptr primitive_inst::realloc_if_needed() {
+    OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("realloc_if_needed: " + id()));
     GPU_DEBUG_GET_INSTANCE(debug_config);
     GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::memory_allocation);
 
@@ -493,6 +496,7 @@ bool primitive_inst::use_async_compilation() {
 }
 
 bool primitive_inst::update_impl() {
+    OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("update_impl: " + id()));
     GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::update_implementation);
     auto prev_impl_str =  _impl != nullptr ? _impl->get_kernel_name() : "nullptr";
 
@@ -656,6 +660,7 @@ bool primitive_inst::update_impl() {
 }
 
 void primitive_inst::do_runtime_skip_reorder() {
+    OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_skip_reorder: " + id()));
     GPU_DEBUG_GET_INSTANCE(debug_config);
     GPU_DEBUG_IF(debug_config->disable_runtime_skip_reorder) {
         return;
@@ -713,6 +718,7 @@ void primitive_inst::do_runtime_skip_reorder() {
 }
 
 void primitive_inst::do_runtime_in_place_concat() {
+    OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_in_place_concat: " + id()));
     GPU_DEBUG_GET_INSTANCE(debug_config);
     GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) {
         return;
@@ -780,6 +786,7 @@ bool primitive_inst::has_inner_networks() const {
 }
 
 event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
+    OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("primitive_inst::execute: " + id()));
     const auto primitive_id = id();
     OPENVINO_ASSERT(_has_valid_input, primitive_id, " has invalid/unset input");
     GPU_DEBUG_GET_INSTANCE(debug_config);
@@ -802,6 +809,7 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
         }
 
         if (!is_valid_fusion()) {
+            OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("unfused_subgraph_exec: " + id()));
             auto subgraph = get_unfused_subgraph();
 
             for (auto& d : _deps) {
@@ -859,16 +867,16 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
     GPU_DEBUG_TRACE << id() << ": execute " << _impl->get_kernel_name() << " (is_dynamic=" << _impl->is_dynamic() << ", "
                     << "can_be_optimized=" << can_be_optimized() << ")" << std::endl;
 
+    const bool out_of_order_queue = get_network().get_stream().get_queue_type() == QueueTypes::out_of_order;
     if (_exec_deps.empty() && dependencies.empty()) {
         dependencies = events;
     } else {
-        auto queue_type = get_network().get_stream().get_queue_type();
         // Prepare dependencies events in case of OOO queue, CPU implementation,
         // or optimized_out impl which has CPU users (needs_completion_event() && !is_output() condition)
-        if (queue_type == QueueTypes::out_of_order || _impl->is_cpu() || (can_be_optimized() && needs_completion_event() && !is_output())) {
+        if (out_of_order_queue || _impl->is_cpu() || (can_be_optimized() && needs_completion_event() && !is_output())) {
             dependencies.reserve(dependencies.size() + _exec_deps.size());
             for (auto& input : _exec_deps) {
-                if (input->is_input() && queue_type != QueueTypes::out_of_order)
+                if (input->is_input() && !out_of_order_queue)
                     continue;
                 auto id = input->id();
                 try {
@@ -883,6 +891,13 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
         }
     }
 
+    // Replace multiple events with single grouped event in case of barriers synchronization to prevent `_last_barrier_ev` usage as a dependency
+    // event of optimized_out instance's users, which may lead to unwanted extra synchronization of CPU impls with GPU kernels
+    if (_node && _node->is_in_shape_of_subgraph() && can_be_optimized() && dependencies.size() > 1 && out_of_order_queue) {
+        auto grouped_ev = get_network().get_stream().group_events(dependencies);
+        dependencies = {grouped_ev};
+    }
+
     {
         GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::inference);
         auto ev = _impl->execute(dependencies, *this);
@@ -905,6 +920,7 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
 }
 
 void primitive_inst::set_arguments() {
+    OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("set_arguments: " + id()));
     GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::set_arguments);
     OPENVINO_ASSERT(_has_valid_input, id(), " has invalid/unset input");
     _impl->set_arguments(*this);
@@ -1138,6 +1154,7 @@ void primitive_inst::allocate_internal_buffers(bool reset) {
 }
 
 event::ptr primitive_inst::update_weights() {
+    OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("update_weights: " + id()));
     GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::update_weights);
     if (!_impl)
         return nullptr;
diff --git a/src/plugins/intel_gpu/src/graph/rms.cpp b/src/plugins/intel_gpu/src/graph/rms.cpp
new file mode 100644
index 00000000000000..5002417df57394
--- /dev/null
+++ b/src/plugins/intel_gpu/src/graph/rms.cpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "rms_inst.h"
+
+#include "primitive_type_base.h"
+#include "json_object.h"
+#include <string>
+
+namespace cldnn {
+GPU_DEFINE_PRIMITIVE_TYPE_ID(rms);
+
+layout rms_inst::calc_output_layout(rms_node const& node, kernel_impl_params const& impl_param) {
+    auto desc = impl_param.typed_desc<rms>();
+    auto input_layout = impl_param.get_input_layout();
+    auto output_type = desc->output_data_types[0].value_or(input_layout.data_type);
+    auto output_format = input_layout.format;
+
+    return layout(output_type, output_format, input_layout.get_tensor());
+}
+
+std::string rms_inst::to_string(rms_node const& node) {
+    auto desc = node.get_primitive();
+    auto node_info = node.desc_to_json();
+
+    std::stringstream primitive_description;
+
+    json_composite rms_info;
+    rms_info.add("input_id", node.input(0).id());
+    rms_info.add("epsilon", desc->epsilon);
+
+    node_info->add("rms_info", rms_info);
+    node_info->dump(primitive_description);
+
+    return primitive_description.str();
+}
+
+rms_inst::typed_primitive_inst(network& network, rms_node const& node) : parent(network, node) {}
+
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/non_max_suppression_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/non_max_suppression_gpu_ref.cl
index 36651d8773fe6c..cf26d0cbc276c0 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/non_max_suppression_gpu_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/non_max_suppression_gpu_ref.cl
@@ -62,7 +62,7 @@ inline COORD_TYPE_4 FUNC(getBoxCoords)(const __global INPUT0_TYPE *boxes, const
                                        boxes[INPUT0_GET_INDEX(batch, boxId, 2, 0)],
                                        boxes[INPUT0_GET_INDEX(batch, boxId, 3, 0)]);
 
-#if BOX_ENCODING == 0
+#if !defined(ROTATION) && BOX_ENCODING == 0
     const COORD_TYPE ax1 = min(coords[1], coords[3]);
     const COORD_TYPE ax2 = max(coords[1], coords[3]);
     const COORD_TYPE ay1 = min(coords[0], coords[2]);
@@ -76,9 +76,331 @@ inline COORD_TYPE_4 FUNC(getBoxCoords)(const __global INPUT0_TYPE *boxes, const
     return coords;
 }
 
+#ifdef ROTATION
+
+typedef struct {
+    float x, y;
+} FUNC(Point2D);
+#define POINT_2D FUNC(Point2D)
+
+inline void FUNC(getRotatedVertices)(const COORD_TYPE_4 box, const INPUT0_TYPE angle, POINT_2D* pts) {
+    const float theta = angle
+                        #if ROTATION == 2
+                            * -1.0f
+                        #endif
+                        ;
+    float cosTheta2 = cos(theta) * 0.5f;
+    float sinTheta2 = sin(theta) * 0.5f;
+
+    // y: top --> down; x: left --> right
+    // Left-Down
+    pts[0].x = box[0]/*.x_ctr*/ - sinTheta2 * box[3]/*.h*/ - cosTheta2 * box[2]/*.w*/;
+    pts[0].y = box[1]/*.y_ctr*/ + cosTheta2 * box[3]/*.h*/ - sinTheta2 * box[2]/*.w*/;
+    // Left-Top
+    pts[1].x = box[0]/*.x_ctr*/ + sinTheta2 * box[3]/*.h*/ - cosTheta2 * box[2]/*.w*/;
+    pts[1].y = box[1]/*.y_ctr*/ - cosTheta2 * box[3]/*.h*/ - sinTheta2 * box[2]/*.w*/;
+    // Right-Top
+    pts[2].x = 2 * box[0]/*.x_ctr*/ - pts[0].x;
+    pts[2].y = 2 * box[1]/*.y_ctr*/ - pts[0].y;
+    // Right-Down
+    pts[3].x = 2 * box[0]/*.x_ctr*/ - pts[1].x;
+    pts[3].y = 2 * box[1]/*.y_ctr*/ - pts[1].y;
+}
+
+inline float FUNC(dot2D)(const POINT_2D A, const POINT_2D B) {
+    return A.x * B.x + A.y * B.y;
+}
+
+inline float FUNC(cross2D)(const POINT_2D A, const POINT_2D B) {
+    return A.x * B.y - B.x * A.y;
+}
+
+inline int FUNC(getIntersectionPoints)(const POINT_2D* pts1, const POINT_2D* pts2, POINT_2D* intersections) {
+    // Line vector
+    // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1]
+    POINT_2D vec1[4], vec2[4];
+    for (int i = 0; i < 4; i++) {
+        vec1[i].x = pts1[(i + 1) % 4].x - pts1[i].x;
+        vec1[i].y = pts1[(i + 1) % 4].y - pts1[i].y;
+        vec2[i].x = pts2[(i + 1) % 4].x - pts2[i].x;
+        vec2[i].y = pts2[(i + 1) % 4].y - pts2[i].y;
+    }
+
+    // Line test - test all line combos for intersection
+    int num = 0;  // number of intersections
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            // Solve for 2x2 Ax=b
+            float det = FUNC_CALL(cross2D)(vec2[j], vec1[i]);
+            // This takes care of parallel lines
+            if (fabs(det) <= 1e-14f) {
+                continue;
+            }
+
+            POINT_2D vec12;
+            vec12.x= pts2[j].x - pts1[i].x;
+            vec12.y= pts2[j].y - pts1[i].y;
+
+            float t1 = FUNC_CALL(cross2D)(vec2[j], vec12) / det;
+            float t2 = FUNC_CALL(cross2D)(vec1[i], vec12) / det;
+
+            if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) {
+                intersections[num].x = pts1[i].x + vec1[i].x * t1;
+                intersections[num].y = pts1[i].y + vec1[i].y * t1;
+                ++num;
+            }
+        }
+    }
+
+    // Check for vertices of rect1 inside rect2
+    {
+        const POINT_2D AB = vec2[0];
+        const POINT_2D DA = vec2[3];
+        float ABdotAB = FUNC_CALL(dot2D)(AB, AB);
+        float ADdotAD = FUNC_CALL(dot2D)(DA, DA);
+        for (int i = 0; i < 4; i++) {
+            // assume ABCD is the rectangle, and P is the point to be judged
+            // P is inside ABCD iff. P's projection on AB lies within AB
+            // and P's projection on AD lies within AD
+
+            POINT_2D AP;
+            AP.x = pts1[i].x - pts2[0].x;
+            AP.y = pts1[i].y - pts2[0].y;
+
+            float APdotAB = FUNC_CALL(dot2D)(AP, AB);
+            float APdotAD = -FUNC_CALL(dot2D)(AP, DA);
+
+            if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && (APdotAD <= ADdotAD)) {
+                intersections[num].x = pts1[i].x;
+                intersections[num].y = pts1[i].y;
+                ++num;
+            }
+        }
+    }
+
+    // Reverse the check - check for vertices of rect2 inside rect1
+    {
+        const POINT_2D AB = vec1[0];
+        const POINT_2D DA = vec1[3];
+        float ABdotAB = FUNC_CALL(dot2D)(AB, AB);
+        float ADdotAD = FUNC_CALL(dot2D)(DA, DA);
+        for (int i = 0; i < 4; i++) {
+            POINT_2D AP;
+            AP.x = pts2[i].x - pts1[0].x;
+            AP.y = pts2[i].y - pts1[0].y;
+
+            float APdotAB = FUNC_CALL(dot2D)(AP, AB);
+            float APdotAD = -FUNC_CALL(dot2D)(AP, DA);
+
+            if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && (APdotAD <= ADdotAD)) {
+                intersections[num].x = pts2[i].x;
+                intersections[num].y = pts2[i].y;
+                ++num;
+            }
+        }
+    }
+
+    return num;
+}
+
+inline void FUNC(swapPoints)(POINT_2D* a, POINT_2D* b)
+{
+    POINT_2D temp = *a;
+    *a = *b;
+    *b = temp;
+}
+
+inline void FUNC(sortPoints)(POINT_2D* arr, int l, int h)
+{
+    for (int i = 0; i < h-l; i++) {
+        bool swapped = false;
+
+        for (int j = l; j < h-i; j++) {
+            bool is_less = false;
+            const float temp = FUNC_CALL(cross2D)(arr[j], arr[j+1]);
+            if (fabs(temp) < 1e-6f) {
+                is_less = FUNC_CALL(dot2D)(arr[j], arr[j]) < FUNC_CALL(dot2D)(arr[j+1], arr[j+1]);
+            } else {
+                is_less = temp > 0;
+            }
+
+            if (is_less) {
+                continue;
+            }
+
+            FUNC_CALL(swapPoints)(&arr[j], &arr[j+1]);
+            swapped = true;
+        }
+
+        if (!swapped) {
+            break;
+        }
+    }
+}
+
+inline int FUNC(convex_hull_graham)(const POINT_2D* p, const int num_in, POINT_2D* q, bool shift_to_zero) {
+    if (num_in < 2) {
+        return -1;
+    }
+
+    // Step 1:
+    // Find point with minimum y
+    // if more than 1 points have the same minimum y,
+    // pick the one with the minimum x.
+    int t = 0;
+    for (int i = 1; i < num_in; i++) {
+        if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) {
+            t = i;
+        }
+    }
+    const POINT_2D start = p[t];  // starting point
+
+    // Step 2:
+    // Subtract starting point from every points (for sorting in the next step)
+    for (int i = 0; i < num_in; i++) {
+        q[i].x = p[i].x - start.x;
+        q[i].y = p[i].y - start.y;
+    }
+
+    // Swap the starting point to position 0
+    FUNC_CALL(swapPoints)(&q[t], &q[0]);
+
+    // Step 3:
+    // Sort point 1 ~ num_in according to their relative cross-product values
+    // (essentially sorting according to angles)
+    // If the angles are the same, sort according to their distance to origin
+    float dist[24];
+    for (int i = 0; i < num_in; i++) {
+        dist[i] = FUNC_CALL(dot2D)(q[i], q[i]);
+    }
+
+    FUNC_CALL(sortPoints)(q, 1, num_in - 1);
+
+    // compute distance to origin after sort, since the points are now different.
+    for (int i = 0; i < num_in; i++) {
+        dist[i] = FUNC_CALL(dot2D)(q[i], q[i]);
+    }
+
+    // Step 4:
+    // Make sure there are at least 2 points (that don't overlap with each other)
+    // in the stack
+    int k;  // index of the non-overlapped second point
+    for (k = 1; k < num_in; k++) {
+        if (dist[k] > 1e-8f) {
+            break;
+        }
+    }
+    if (k == num_in) {
+        // We reach the end, which means the convex hull is just one point
+        q[0].x = p[t].x;
+        q[0].y = p[t].y;
+        return 1;
+    }
+
+    q[1].x = q[k].x;
+    q[1].y = q[k].y;
+    int m = 2;  // 2 points in the stack
+    // Step 5:
+    // Finally we can start the scanning process.
+    // When a non-convex relationship between the 3 points is found
+    // (either concave shape or duplicated points),
+    // we pop the previous point from the stack
+    // until the 3-point relationship is convex again, or
+    // until the stack only contains two points
+    for (int i = k + 1; i < num_in; i++) {
+        POINT_2D diff1, diff2;
+        diff1.x = q[i].x - q[m - 2].x;
+        diff1.y = q[i].y - q[m - 2].y;
+        diff2.x = q[m - 1].x - q[m - 2].x;
+        diff2.y = q[m - 1].y - q[m - 2].y;
+
+        float cross2d_diff = FUNC_CALL(cross2D)(diff1, diff2);
+
+        while (m > 1 && cross2d_diff >= 0) {
+            m--;
+        }
+        q[m].x = q[i].x;
+        q[m].y = q[i].y;
+        ++m;
+    }
+
+    // Step 6 (Optional):
+    // In general sense we need the original coordinates, so we
+    // need to shift the points back (reverting Step 2)
+    // But if we're only interested in getting the area/perimeter of the shape
+    // We can simply return.
+    if (!shift_to_zero) {
+        for (int i = 0; i < m; i++) {
+            q[i].x += start.x;
+            q[i].y += start.y;
+        }
+    }
+
+    return m;
+}
+
+inline float FUNC(polygon_area)(const POINT_2D* q, const int m) {
+    if (m <= 2) {
+        return 0.f;
+    }
+
+    float area = 0.f;
+    for (int i = 1; i < m - 1; i++) {
+        POINT_2D diff1, diff2;
+        diff1.x = q[i].x - q[0].x;
+        diff1.y = q[i].y - q[0].y;
+        diff2.x = q[i + 1].x - q[0].x;
+        diff2.y = q[i + 1].y - q[0].y;
+        float cross_result = FUNC_CALL(cross2D)(diff1, diff2);
+
+        area += fabs(cross_result);
+    }
+
+    return area / 2.0f;
+}
+
+inline float FUNC(rotatedBoxesIntersection)(const COORD_TYPE_4 boxA, const INPUT0_TYPE angleA,
+        const COORD_TYPE_4 boxB, const INPUT0_TYPE angleB) {
+    // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned
+    // from get_intersection_points
+    POINT_2D intersectPts[24], orderedPts[24];
+    POINT_2D pts1[4];
+    POINT_2D pts2[4];
+    FUNC_CALL(getRotatedVertices)(boxA, angleA, pts1);
+    FUNC_CALL(getRotatedVertices)(boxB, angleB, pts2);
+    // Find points defining area of the boxes intersection
+    int num = FUNC_CALL(getIntersectionPoints)(pts1, pts2, intersectPts);
+
+    if (num <= 2) {
+        return 0.f;
+    }
+
+    // Convex Hull to order the intersection points in clockwise order and find
+    // the contour area.
+    int num_convex = FUNC_CALL(convex_hull_graham)(intersectPts, num, orderedPts, true);
+    return FUNC_CALL(polygon_area)(orderedPts, num_convex);
+}
+
+
+inline float FUNC(intersectionOverUnion)(const COORD_TYPE_4 boxA, const INPUT0_TYPE angleA,
+        const COORD_TYPE_4 boxB, const INPUT0_TYPE angleB)
+{
+    const float areaA = convert_float(boxA[3]) * convert_float(boxA[2]);
+    const float areaB = convert_float(boxB[3]) * convert_float(boxB[2]);
+
+    if (areaA <= 0.0f || areaB <= 0.0f)
+        return 0.0f;
+
+    const float intersection_area = FUNC_CALL(rotatedBoxesIntersection)(boxA, angleA, boxB, angleB);
+    const float union_area = areaA + areaB - intersection_area;
+    return intersection_area / union_area;
+}
+
+#else
+
 inline float FUNC(intersectionOverUnion)(const COORD_TYPE_4 boxA, const COORD_TYPE_4 boxB)
 {
-#if BOX_ENCODING == 0
+#if !defined(ROTATION) && BOX_ENCODING == 0
     /// CORNER
     const float areaA = convert_float(boxA[3] - boxA[1]) * convert_float(boxA[2] - boxA[0]);
     const float areaB = convert_float(boxB[3] - boxB[1]) * convert_float(boxB[2] - boxB[0]);
@@ -110,6 +432,7 @@ inline float FUNC(intersectionOverUnion)(const COORD_TYPE_4 boxA, const COORD_TY
     const float union_area = areaA + areaB - intersection_area;
     return intersection_area / union_area;
 }
+#endif // ROTATION
 
 inline float FUNC(scaleIOU)(float iou, float iou_threshold, float scale)
 {
@@ -240,6 +563,16 @@ inline void FUNC(swap)(__global BOX_INFO* a, __global BOX_INFO* b)
     *b = temp;
 }
 
+#ifdef ROTATION
+inline void FUNC(reverseOutputBoxList)(__global BOX_INFO *outBoxes, int boxNum)
+{
+    for (int i = 0; i < boxNum / 2; ++i) {
+        FUNC_CALL(swap)(&outBoxes[i], &outBoxes[boxNum - 1 - i]);
+    }
+}
+
+#else
+
 inline void FUNC(sortOutputBoxList)(__global BOX_INFO *outSortedBoxes, int boxNum)
 {
     for (int i = 0; i < boxNum - 1; ++i) {
@@ -261,6 +594,7 @@ inline void FUNC(sortOutputBoxList)(__global BOX_INFO *outSortedBoxes, int boxNu
             break;
     }
 }
+#endif // ROTATION
 
 
 #ifdef NMS_STAGE_0
@@ -427,9 +761,11 @@ KERNEL (non_max_suppression_ref_stage_2)(
     const ushort classId = get_global_id(1);
 
     float scale = 0.0f;
+    #ifndef ROTATION
     if (SOFT_NMS_SIGMA_VAL > 0.0f) {
         scale = -0.5f / SOFT_NMS_SIGMA_VAL;
     }
+    #endif
 
     __global SBOX_INFO *sortedBoxList = (__global SBOX_INFO*)&buffer0[(batchId * NUM_CLASSES + classId) * BUFFER_STRIDE];
     const int kSortedBoxNum = buffer2[batchId * NUM_CLASSES + classId];
@@ -442,12 +778,22 @@ KERNEL (non_max_suppression_ref_stage_2)(
         SBOX_INFO next_candidate = sortedBoxList[i];
         INPUT1_TYPE original_score = next_candidate.score;
         const COORD_TYPE_4 next_candidate_coord = FUNC_CALL(getBoxCoords)(boxes, batchId, next_candidate.boxId);
+        #ifdef ROTATION
+        const INPUT0_TYPE next_candidate_angle = boxes[INPUT0_GET_INDEX(batchId, next_candidate.boxId, 4, 0)];
+        #endif
+
         ++i;
 
         bool should_hard_suppress = false;
         for (int j = selectedBoxNum - 1; j >= next_candidate.suppress_begin_index; --j) {
             const COORD_TYPE_4 selected_box_coord = FUNC_CALL(getBoxCoords)(boxes, batchId, selectedBoxList[j].boxId);
+            #ifdef ROTATION
+            const INPUT0_TYPE selected_box_angle = boxes[INPUT0_GET_INDEX(batchId, selectedBoxList[j].boxId, 4, 0)];
+            const float iou = FUNC_CALL(intersectionOverUnion)(next_candidate_coord, next_candidate_angle,
+                    selected_box_coord, selected_box_angle);
+            #else
             const float iou = FUNC_CALL(intersectionOverUnion)(next_candidate_coord, selected_box_coord);
+            #endif
             next_candidate.score *= FUNC_CALL(scaleIOU)(iou, IOU_THRESHOLD_VAL, scale);
 
             if (iou >= IOU_THRESHOLD_VAL && !(SOFT_NMS_SIGMA_VAL > 0.0f)) {
@@ -531,7 +877,11 @@ KERNEL (non_max_suppression_ref_stage_3)(
     }
 
 #if SORT_RESULT_DESCENDING == 1
+#ifdef ROTATION
+    FUNC_CALL(reverseOutputBoxList)(sortedBoxList, outputIdx);
+#else
     FUNC_CALL(sortOutputBoxList)(sortedBoxList, outputIdx);
+#endif
 #endif
 
     unroll_for (int i = 0; i < outputIdx; i++) {
diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl
new file mode 100644
index 00000000000000..77d16d06405aa5
--- /dev/null
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl
@@ -0,0 +1,72 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "include/batch_headers/fetch_data.cl"
+
+KERNEL(rms_gpu_bfyx_opt)(
+    OPTIONAL_SHAPE_INFO_ARG
+    const __global INPUT0_TYPE* input,
+    const __global INPUT1_TYPE* gamma,
+    __global OUTPUT_TYPE* output)
+{
+    const uint in_data_idx = get_global_id(0);
+    const uint data_idx = get_global_id(1);
+    const uint lws_size = LWS;
+    const uint items_num = VEC_SIZE;
+    const uint data_size = DATA_SIZE;
+    const uint total_items_num = lws_size * items_num;
+#if !IS_DYNAMIC
+    const uint leftovers = LEFTOVERS;
+#else
+    const uint leftovers = data_size % items_num;
+#endif
+
+    const uint data_offset = data_idx * data_size;
+    const uint in_data_offset = data_offset + in_data_idx * items_num;
+    const uint gamma_offset = in_data_idx * items_num;
+
+    ACCUMULATOR_TYPE rms = ACCUMULATOR_VAL_ZERO;
+
+    __local ACCUMULATOR_TYPE slm_buf[SLM_SIZE];
+
+    INPUT_VEC_TYPE inputs = AS_INPUT_VEC_TYPE(VLOAD(0, input + in_data_offset));
+    ACCUMULATOR_VEC_TYPE square = native_powr(TO_ACCUMULATOR_VEC_TYPE(inputs), (ACCUMULATOR_VEC_TYPE)(2));
+    unroll_for (uint i = 0; i < VEC_SIZE; ++i) {
+        rms += square[i];
+    }
+
+    if (in_data_idx < leftovers) {
+        const uint input_idx = data_offset + total_items_num + in_data_idx;
+        rms += native_powr(TO_ACCUMULATOR_TYPE(input[input_idx]), 2);
+    }
+
+    slm_buf[in_data_idx] = rms;
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (in_data_idx == 0) {
+#if !IS_DYNAMIC
+        unroll_for (uint i = 1; i < LWS; ++i)
+#else
+        for (uint i = 1; i < lws_size; ++i)
+#endif
+            rms += slm_buf[i];
+
+        rms = rms / data_size;
+        slm_buf[0] = native_powr(sqrt(rms + TO_ACCUMULATOR_TYPE(EPSILON)), -1);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    rms = slm_buf[0];
+
+    OUTPUT_VEC_TYPE results = TO_OUTPUT_VEC_TYPE((ACCUMULATOR_VEC_TYPE)(rms) * TO_ACCUMULATOR_VEC_TYPE(inputs) * AS_ACCUMULATOR_VEC_TYPE(VLOAD(0, gamma + gamma_offset)));
+    VSTORE(results, 0, output + in_data_offset);
+
+    if (in_data_idx < leftovers) {
+        const uint input_idx = data_offset + total_items_num + in_data_idx;
+        const uint output_idx = data_offset + total_items_num + in_data_idx;
+        const uint gamma_idx = total_items_num + in_data_idx;
+        OUTPUT_TYPE result = TO_OUTPUT_TYPE(rms * TO_ACCUMULATOR_TYPE(input[input_idx]) * TO_ACCUMULATOR_TYPE(gamma[gamma_idx]));
+        output[output_idx] = result;
+    }
+}
diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_ref.cl
new file mode 100644
index 00000000000000..88c5eb520d33e3
--- /dev/null
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_ref.cl
@@ -0,0 +1,45 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "include/fetch_utils.cl"
+
+KERNEL(rms_gpu_ref)(
+    OPTIONAL_SHAPE_INFO_ARG
+    const __global INPUT0_TYPE* input,
+    const __global INPUT1_TYPE* gamma,
+    __global OUTPUT_TYPE* output)
+{
+    const uint b = get_global_id(0);
+    const uint f = get_global_id(1);
+    const uint w = 0;
+
+    ACCUMULATOR_TYPE rms = ACCUMULATOR_VAL_ZERO;
+    for (uint z = 0; z < INPUT0_SIZE_Z; z++) {
+        for (uint y = 0; y < INPUT0_SIZE_Y; y++) {
+            for (uint x = 0; x < INPUT0_SIZE_X; x++) {
+                const uint input_idx = FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, x);
+                rms += pow(TO_ACCUMULATOR_TYPE(input[input_idx]), 2);
+            }
+        }
+    }
+
+    rms /= INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z;
+    rms = pow(sqrt(rms + TO_ACCUMULATOR_TYPE(EPSILON)), -1);
+
+    for (uint z = 0; z < INPUT0_SIZE_Z; z++) {
+        for (uint y = 0; y < INPUT0_SIZE_Y; y++) {
+            for (uint x = 0; x < INPUT0_SIZE_X; x++) {
+                const uint input_idx = FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, x);
+                const uint output_idx = FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, x);
+#if INPUT0_DIMS == 4
+                const uint gamma_idx = y;
+#elif INPUT0_DIMS == 5
+                const uint gamma_idx = z;
+#endif
+                OUTPUT_TYPE result = TO_OUTPUT_TYPE(rms) * TO_OUTPUT_TYPE(input[input_idx]) * TO_OUTPUT_TYPE(gamma[gamma_idx]);
+                output[output_idx] = result;
+            }
+        }
+    }
+}
diff --git a/src/plugins/intel_gpu/src/kernel_selector/common_types.h b/src/plugins/intel_gpu/src/kernel_selector/common_types.h
index 7706da6003fe74..1acc0aa89e6af6 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/common_types.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/common_types.h
@@ -97,6 +97,7 @@ enum class KernelType {
     MULTICLASS_NMS,
     UNIQUE_COUNT,
     UNIQUE_GATHER,
+    RMS,
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -569,6 +570,15 @@ enum class BoxEncodingType {
     BOX_ENCODING_CENTER,
 };
 
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// NMSRotationType
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+enum class NMSRotationType {
+    NONE,
+    CLOCKWISE,
+    COUNTERCLOCKWISE
+};
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // ConvertColor
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.cpp
index 28a6b2fa9e0bb6..fc85b23005ec84 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.cpp
@@ -149,11 +149,17 @@ JitConstants NonMaxSuppressionKernelRef::GetJitConstants(const non_max_suppressi
         jit.AddConstant(MakeJitConstant("SCORE_THRESHOLD_VAL", params.score_threshold));
     }
 
-    if (params.soft_nms_sigma_type == base_params::ArgType::Input) {
-        jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_TYPE", GetInputTypeStr(params.GetIndexSoftNmsSigma())));
-        jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", "convert_float(soft_nms_sigma[0])"));
+    if (params.rotation == NMSRotationType::NONE) {
+        if (params.soft_nms_sigma_type == base_params::ArgType::Input) {
+            jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_TYPE", GetInputTypeStr(params.GetIndexSoftNmsSigma())));
+            jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", "convert_float(soft_nms_sigma[0])"));
+        } else {
+            jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", params.soft_nms_sigma));
+        }
     } else {
-        jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", params.soft_nms_sigma));
+        jit.AddConstant(MakeJitConstant("ROTATION", static_cast<int>(params.rotation)));
+        // for NMSRotated it is always zero
+        jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", 0.0f));
     }
 
     if (params.has_second_output) {
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.h
index 5ace6fbebffac3..8fc2dc2724a9bd 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.h
@@ -36,6 +36,7 @@ struct non_max_suppression_params : public base_params {
     bool has_third_output;
     bool use_multiple_outputs;
     bool reuse_internal_buffer = false;
+    NMSRotationType rotation = NMSRotationType::NONE;
 
     uint32_t GetIndexNumSelectPerClass() const {
         uint32_t input_idx = 2;
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.cpp
new file mode 100644
index 00000000000000..f93cee2876de93
--- /dev/null
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.cpp
@@ -0,0 +1,94 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "rms_kernel_base.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+bool RMSKernelBase::Validate(const Params& p, const optional_params& o) const {
+    if (!KernelBaseOpenCL::Validate(p, o))
+        return false;
+
+    const rms_params& params = static_cast<const rms_params&>(p);
+    auto supported_dyn_layouts = { DataLayout::bfyx, DataLayout::bfzyx };
+    if (params.has_dynamic_tensors() && (!layout_is_one_of(params.inputs, supported_dyn_layouts) || !layout_is_one_of(params.outputs, supported_dyn_layouts)))
+        return false;
+
+    return true;
+}
+
+JitConstants RMSKernelBase::GetJitConstants(const rms_params& params, RMSKernelBase::DispatchData) const {
+    JitConstants jit = MakeBaseParamsJitConstants(params);
+
+    jit.AddConstant(MakeJitConstant("EPSILON", params.epsilon));
+    jit.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR"));
+
+    return jit;
+}
+
+RMSKernelBase::DispatchData RMSKernelBase::SetDefault(const rms_params& params) const {
+    DispatchData dispatchData;
+    const auto& output = params.outputs[0];
+
+    dispatchData.gws = {output.Batch().v, output.Feature().v, 1};
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
+
+    return dispatchData;
+}
+
+KernelsData RMSKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const {
+    assert(params.GetType() == KernelType::RMS);
+
+    if (!Validate(params, options))
+        return {};
+
+    const rms_params& orgParams = static_cast<const rms_params&>(params);
+    auto dispatchData = SetDefault(orgParams);
+
+    KernelData kd = KernelData::Default<rms_params>(params);
+
+    auto cldnn_jit = GetJitConstants(orgParams, dispatchData);
+    auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
+    auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+    kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
+        const auto& prim_params = static_cast<const rms_params&>(params);
+        auto dispatchData = SetDefault(prim_params);
+        OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
+        kd.kernels[0].params.workGroups.global = dispatchData.gws;
+        kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
+    };
+
+    auto& kernel = kd.kernels[0];
+    FillCLKernelData(kernel,
+                     dispatchData,
+                     params.engineInfo,
+                     kernelName,
+                     jit,
+                     entry_point,
+                     EXE_MODE_DEFAULT,
+                     false,
+                     false,
+                     2,
+                     GetFusedPrimitiveInputsCount(params),
+                     1,
+                     orgParams.outputs[0].is_dynamic());
+
+    return {kd};
+}
+
+Datatype RMSKernelBase::GetAccumulatorType(const rms_params& params) const {
+    const auto& input_dt = params.inputs[0].GetDType();
+
+    switch (input_dt) {
+        case Datatype::F32:
+        case Datatype::F16:
+            return Datatype::F32;
+        case Datatype::INT8: return Datatype::INT32;
+        case Datatype::UINT8: return Datatype::INT32;
+        default: return Datatype::F32;
+    }
+}
+}  // namespace kernel_selector
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.h
new file mode 100644
index 00000000000000..546c209bf03d77
--- /dev/null
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.h
@@ -0,0 +1,50 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "kernel_base_opencl.h"
+
+namespace kernel_selector {
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// rms_params
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct rms_params : public base_params {
+    rms_params() : base_params(KernelType::RMS) {}
+    float epsilon = 0.0f;
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// rms_optional_params
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct rms_optional_params : optional_params {
+    rms_optional_params() : optional_params(KernelType::RMS) {}
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// RMSKernelBase
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+class RMSKernelBase : public KernelBaseOpenCL {
+public:
+    using KernelBaseOpenCL::KernelBaseOpenCL;
+    virtual ~RMSKernelBase() {}
+
+    struct DispatchData : public CommonDispatchData {
+        size_t dataSize;
+        size_t dataCount;
+        size_t slmSize;
+        size_t maxSlmSize;
+        size_t leftovers;
+
+        DispatchData() : dataSize(0), dataCount(0), slmSize(0), maxSlmSize(0), leftovers(0) {}
+    };
+
+protected:
+    bool Validate(const Params&, const optional_params&) const override;
+    virtual JitConstants GetJitConstants(const rms_params& params, DispatchData dispatchData) const;
+    virtual DispatchData SetDefault(const rms_params& params) const;
+    KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
+    Datatype GetAccumulatorType(const rms_params& params) const;
+};
+}  // namespace kernel_selector
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp
new file mode 100644
index 00000000000000..ad49fd86370e0a
--- /dev/null
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp
@@ -0,0 +1,120 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "rms_kernel_bfyx_opt.h"
+#include "kernel_selector_utils.h"
+#include <string>
+
+namespace kernel_selector {
+ParamsKey RMSKernelBfyxOpt::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::F16);
+    k.EnableInputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableInputLayout(DataLayout::bfyx);
+    k.EnableInputLayout(DataLayout::bfzyx);
+    k.EnableOutputLayout(DataLayout::bfyx);
+    k.EnableOutputLayout(DataLayout::bfzyx);
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableBatching();
+    k.EnableDifferentTypes();
+    k.EnableDynamicShapesSupport();
+    return k;
+}
+
+JitConstants RMSKernelBfyxOpt::GetJitConstants(const rms_params& params, DispatchData dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
+
+    if (params.has_dynamic_tensors()) {
+        const auto& input = params.inputs[0];
+        DimensionAccessHelper dims(input);
+        const std::string data_size = toVectorMulString({dims.x(), dims.y(), dims.z()});
+        const std::string lws_0 = "get_local_size(0)";
+        jit.AddConstants({
+            MakeJitConstant("DATA_SIZE", data_size),
+            MakeJitConstant("LWS", lws_0),
+            MakeJitConstant("SLM_SIZE", dispatchData.maxSlmSize)
+        });
+    } else {
+        jit.AddConstants({
+            MakeJitConstant("DATA_SIZE", dispatchData.dataSize),
+            MakeJitConstant("LWS", dispatchData.slmSize),
+            MakeJitConstant("SLM_SIZE", dispatchData.slmSize),
+            MakeJitConstant("LEFTOVERS", dispatchData.leftovers)
+        });
+    }
+    jit.AddConstants({
+        MakeJitConstant("VEC_SIZE", 8),
+        MakeJitConstant("VLOAD", "CAT(vload, VEC_SIZE)"),
+        MakeJitConstant("VSTORE", "CAT(vstore, VEC_SIZE)"),
+        MakeJitConstant("INPUT_VEC_TYPE", "MAKE_VECTOR_TYPE(INPUT0_TYPE, VEC_SIZE)"),
+        MakeJitConstant("ACCUMULATOR_VEC_TYPE", "MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, VEC_SIZE)"),
+        MakeJitConstant("OUTPUT_VEC_TYPE", "MAKE_VECTOR_TYPE(OUTPUT_TYPE, VEC_SIZE)"),
+        MakeJitConstant("AS_INPUT_VEC_TYPE", "CAT(as_, INPUT_VEC_TYPE)"),
+        MakeJitConstant("AS_ACCUMULATOR_VEC_TYPE", "CAT(as_, ACCUMULATOR_VEC_TYPE)"),
+        MakeJitConstant("TO_ACCUMULATOR_VEC_TYPE", "CAT(convert_, ACCUMULATOR_VEC_TYPE)"),
+        MakeJitConstant("TO_OUTPUT_VEC_TYPE", "CAT(convert_, OUTPUT_VEC_TYPE)"),
+    });
+
+    return jit;
+}
+
+RMSKernelBase::DispatchData RMSKernelBfyxOpt::SetDefault(const rms_params& params) const {
+    DispatchData dispatchData;
+    const auto& input = params.inputs[0];
+
+    auto local_mem_per_wi = 2 * BytesPerElement(params.inputs[0].GetDType());
+    auto max_lws = std::min(params.engineInfo.maxWorkGroupSize, params.engineInfo.maxLocalMemSize / local_mem_per_wi);
+    dispatchData.maxSlmSize = max_lws;
+
+    if (!params.has_dynamic_tensors()) {
+        dispatchData.dataSize = input.X().v * input.Y().v * input.Z().v;
+        dispatchData.dataCount = input.Batch().v * input.Feature().v;
+        dispatchData.slmSize = dispatchData.dataSize / 8;
+        dispatchData.leftovers = dispatchData.dataSize % 8;
+
+        dispatchData.gws[0] = dispatchData.slmSize;
+        dispatchData.gws[1] = dispatchData.dataCount;
+        dispatchData.gws[2] = 1;
+
+        dispatchData.lws[0] = dispatchData.slmSize;
+        dispatchData.lws[1] = 1;
+        dispatchData.lws[2] = 1;
+    }
+    return dispatchData;
+}
+
+bool RMSKernelBfyxOpt::Validate(const Params& p, const optional_params& o) const {
+    if (!Parent::Validate(p, o))
+        return false;
+
+    const rms_params& params = static_cast<const rms_params&>(p);
+    const auto& gamma = params.inputs[1];
+
+    if (!gamma.is_dynamic()) {
+        size_t data_size = gamma.LogicalSize();
+        if (data_size < 8) {
+            return false;
+        }
+        auto local_mem_per_wi = 2 * BytesPerElement(params.inputs[0].GetDType());
+        auto max_lws = std::min(params.engineInfo.maxWorkGroupSize, params.engineInfo.maxLocalMemSize / local_mem_per_wi);
+        auto slm_size = data_size / 8;
+        if (slm_size > max_lws) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+KernelsData RMSKernelBfyxOpt::GetKernelsData(const Params& params, const optional_params& options) const {
+    return GetCommonKernelsData(params, options);
+}
+
+KernelsPriority RMSKernelBfyxOpt::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const {
+    return FORCE_PRIORITY_7;
+}
+}  // namespace kernel_selector
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.h
new file mode 100644
index 00000000000000..a9b49c4c1cc654
--- /dev/null
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.h
@@ -0,0 +1,25 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "rms_kernel_base.h"
+
+namespace kernel_selector {
+class RMSKernelBfyxOpt : public RMSKernelBase {
+public:
+    using Parent = RMSKernelBase;
+    RMSKernelBfyxOpt() : RMSKernelBase("rms_gpu_bfyx_opt") {}
+    virtual ~RMSKernelBfyxOpt() {}
+
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
+    ParamsKey GetSupportedKey() const override;
+
+protected:
+    bool Validate(const Params&, const optional_params&) const override;
+    DispatchData SetDefault(const rms_params& params) const override;
+    JitConstants GetJitConstants(const rms_params& params, DispatchData dispatchData) const override;
+};
+}  // namespace kernel_selector
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.cpp
new file mode 100644
index 00000000000000..9dbdf30154aea9
--- /dev/null
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.cpp
@@ -0,0 +1,35 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "rms_kernel_ref.h"
+#include "kernel_selector_utils.h"
+#include <string>
+
+namespace kernel_selector {
+ParamsKey RMSKernelRef::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::F16);
+    k.EnableInputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableInputLayout(DataLayout::bfyx);
+    k.EnableInputLayout(DataLayout::bfzyx);
+    k.EnableOutputLayout(DataLayout::bfyx);
+    k.EnableOutputLayout(DataLayout::bfzyx);
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableBatching();
+    k.EnableDifferentTypes();
+    k.EnableDynamicShapesSupport();
+    return k;
+}
+
+KernelsData RMSKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
+    return GetCommonKernelsData(params, options);
+}
+
+KernelsPriority RMSKernelRef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const {
+    return FORCE_PRIORITY_9;
+}
+}  // namespace kernel_selector
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.h
new file mode 100644
index 00000000000000..7c2e3dd512e8f7
--- /dev/null
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.h
@@ -0,0 +1,20 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "rms_kernel_base.h"
+
+namespace kernel_selector {
+class RMSKernelRef : public RMSKernelBase {
+public:
+    using Parent = RMSKernelBase;
+    RMSKernelRef() : RMSKernelBase("rms_gpu_ref") {}
+    virtual ~RMSKernelRef() {}
+
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
+    ParamsKey GetSupportedKey() const override;
+};
+}  // namespace kernel_selector
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.cpp
new file mode 100644
index 00000000000000..13cabf77011d48
--- /dev/null
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.cpp
@@ -0,0 +1,18 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "rms_kernel_selector.h"
+#include "rms_kernel_ref.h"
+#include "rms_kernel_bfyx_opt.h"
+
+namespace kernel_selector {
+rms_kernel_selector::rms_kernel_selector() {
+    Attach<RMSKernelRef>();
+    Attach<RMSKernelBfyxOpt>();
+}
+
+KernelsData rms_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
+    return GetNaiveBestKernel(params, options, KernelType::RMS);
+}
+}  // namespace kernel_selector
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.h
new file mode 100644
index 00000000000000..f951264c7f5c6b
--- /dev/null
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.h
@@ -0,0 +1,23 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "kernel_selector.h"
+
+namespace kernel_selector {
+class rms_kernel_selector : public kernel_selector_base {
+public:
+    static rms_kernel_selector& Instance() {
+        static rms_kernel_selector instance_;
+        return instance_;
+    }
+
+    rms_kernel_selector();
+
+    virtual ~rms_kernel_selector() {}
+
+    KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+};
+}  // namespace kernel_selector
diff --git a/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp b/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp
index fd36533e2a5d47..6e91cc7db9fe2f 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp
@@ -17,7 +17,18 @@ namespace ov {
 namespace intel_gpu {
 
 static void CreateNonMaxSuppressionIEInternalOp(ProgramBuilder& p, const std::shared_ptr<ov::op::internal::NonMaxSuppressionIEInternal>& op) {
-    validate_inputs_count(op, {2, 3, 4, 5, 6});
+    cldnn::non_max_suppression::Rotation rotation = cldnn::non_max_suppression::Rotation::NONE;
+    const bool is_nms_rotated = op->m_rotation != ov::op::internal::NonMaxSuppressionIEInternal::Rotation_None;
+    if (is_nms_rotated) {
+        // For NMSRotated threshold inputs are mandatory, and soft_nms_sigma input is absent
+        validate_inputs_count(op, {5});
+
+        rotation = op->m_rotation == ov::op::internal::NonMaxSuppressionIEInternal::Rotation_Clockwise ?
+                    cldnn::non_max_suppression::Rotation::CLOCKWISE
+                    : cldnn::non_max_suppression::Rotation::COUNTERCLOCKWISE;
+    } else {
+        validate_inputs_count(op, {2, 3, 4, 5, 6});
+    }
     auto inputs = p.GetInputInfo(op);
     std::vector<cldnn::input_info> reordered_inputs;
     reordered_inputs.resize(inputs.size());
@@ -75,6 +86,7 @@ static void CreateNonMaxSuppressionIEInternalOp(ProgramBuilder& p, const std::sh
 
         prim.output_paddings = get_output_paddings();
         prim.output_data_types = get_output_data_types();
+        prim.rotation = rotation;
 
         switch (reordered_inputs.size()) {
             case 6: prim.soft_nms_sigma = reordered_inputs[5].pid;
@@ -142,6 +154,7 @@ static void CreateNonMaxSuppressionIEInternalOp(ProgramBuilder& p, const std::sh
                 "", "", "", "", "", "");
 
         prim.output_data_types = get_output_data_types();
+        prim.rotation = rotation;
 
         switch (reordered_inputs.size()) {
             case 6: prim.soft_nms_sigma = reordered_inputs[5].pid;
diff --git a/src/plugins/intel_gpu/src/plugin/ops/rms.cpp b/src/plugins/intel_gpu/src/plugin/ops/rms.cpp
new file mode 100644
index 00000000000000..01289bd5022d6d
--- /dev/null
+++ b/src/plugins/intel_gpu/src/plugin/ops/rms.cpp
@@ -0,0 +1,43 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "intel_gpu/op/rms.hpp"
+#include "intel_gpu/plugin/program_builder.hpp"
+#include "intel_gpu/plugin/common_utils.hpp"
+#include "intel_gpu/primitives/rms.hpp"
+
+namespace ov {
+namespace op {
+namespace internal {
+using RMS = ov::intel_gpu::op::RMS;
+}  // namespace internal
+}  // namespace op
+}  // namespace ov
+
+namespace ov {
+namespace intel_gpu {
+
+static void CreateRMSOp(ProgramBuilder& p, const std::shared_ptr<op::RMS>& op) {
+    validate_inputs_count(op, {2});
+    auto inputs = p.GetInputInfo(op);
+    std::string primitive_name = layer_type_name_ID(op);
+
+    auto get_output_data_types = [&]() {
+        std::vector<cldnn::optional_data_type> output_data_types;
+        auto type = op->get_output_element_type(0);
+        output_data_types.push_back(cldnn::element_type_to_data_type(type));
+        return output_data_types;
+    };
+    auto rms = cldnn::rms(primitive_name,
+                          inputs[0],
+                          inputs[1],
+                          op->get_epsilon());
+    rms.output_data_types = get_output_data_types();
+    p.add_primitive(*op, rms);
+}
+
+REGISTER_FACTORY_IMPL(internal, RMS);
+
+}  // namespace intel_gpu
+}  // namespace ov
diff --git a/src/plugins/intel_gpu/src/plugin/transformations/op/rms.cpp b/src/plugins/intel_gpu/src/plugin/transformations/op/rms.cpp
new file mode 100644
index 00000000000000..5dcd12071d1712
--- /dev/null
+++ b/src/plugins/intel_gpu/src/plugin/transformations/op/rms.cpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "intel_gpu/op/rms.hpp"
+
+namespace ov {
+namespace intel_gpu {
+namespace op {
+
+RMS::RMS(const Output<Node>& data,
+         const Output<Node>& gamma,
+         double epsilson,
+         const ov::element::Type output_type)
+    : Op({data, gamma}), m_epsilon(epsilson), m_output_type(output_type) {
+    validate_and_infer_types();
+}
+
+bool RMS::visit_attributes(ov::AttributeVisitor& visitor) {
+    visitor.on_attribute("epsilon", m_epsilon);
+    visitor.on_attribute("output_type", m_output_type);
+    return true;
+}
+
+void RMS::validate_and_infer_types() {
+    auto output_type = m_output_type == ov::element::undefined ? get_input_element_type(0) : m_output_type;
+    set_output_type(0, output_type, get_input_partial_shape(0));
+}
+
+std::shared_ptr<Node> RMS::clone_with_new_inputs(const ov::OutputVector& new_args) const {
+    check_new_args_count(this, new_args);
+    return std::make_shared<RMS>(new_args.at(0),
+                                 new_args.at(1),
+                                 m_epsilon);
+}
+
+}  // namespace op
+}  // namespace intel_gpu
+}  // namespace ov
diff --git a/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.cpp
new file mode 100644
index 00000000000000..bcd192454f3d3a
--- /dev/null
+++ b/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.cpp
@@ -0,0 +1,103 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "rms_fusion.hpp"
+
+#include "intel_gpu/op/rms.hpp"
+
+#include "openvino/core/rt_info.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/divide.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/power.hpp"
+#include "openvino/op/reduce_mean.hpp"
+#include "openvino/op/sqrt.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "transformations/utils/utils.hpp"
+
+namespace ov {
+namespace intel_gpu {
+
+static std::function<bool(ov::Output<ov::Node>)> constant_value(const float target_value) {
+    return [=](const ov::Output<ov::Node>& output) -> bool {
+        auto node = std::dynamic_pointer_cast<ov::op::v0::Constant>(output.get_node_shared_ptr());
+        if (!node) {
+            return false;
+        }
+        float value;
+        if (!ov::op::util::get_single_value(node, value)) {
+            return false;
+        }
+        return value == target_value;
+    };
+}
+
+RMSFusion::RMSFusion() {
+    using namespace ov::pass::pattern;
+
+    // Detect RMS decomposition pattern
+    //  x * 1/Sqrt(ReduceMean(x^2,axes)+eps) * gamma
+    auto x = any_input();
+
+    // x^2
+    auto const_power = wrap_type<ov::op::v0::Constant>(constant_value(2));
+    auto power = wrap_type<ov::op::v1::Power>({x, const_power});
+
+    // ReduceMean(x^2,axes)
+    auto mean_axes = wrap_type<ov::op::v0::Constant>(constant_value(-1));
+    auto mean = wrap_type<ov::op::v1::ReduceMean>({power, mean_axes});
+
+    // ReduceMean(x^2,axes)+eps
+    auto eps = wrap_type<ov::op::v0::Constant>();
+    auto add_eps = wrap_type<ov::op::v1::Add>({mean, eps});
+
+    // Sqrt(ReduceMean(x^2,axes)+eps)
+    auto sqrt = wrap_type<ov::op::v0::Sqrt>({add_eps});
+
+    // 1/Sqrt(ReduceMean(x^2,axes)+eps)
+    auto const_div = wrap_type<ov::op::v0::Constant>(constant_value(-1));
+    auto div = wrap_type<ov::op::v1::Power>({sqrt, const_div});
+
+    // x * 1/Sqrt(ReduceMean(x^2,axes)+eps)
+    auto mul1 = wrap_type<ov::op::v1::Multiply>({x, div});
+
+    // x * 1/Sqrt(ReduceMean(x^2,axes)+eps) * gamma
+    auto gamma = wrap_type<ov::op::v0::Constant>(type_matches(element::f32));
+    auto mul2 = wrap_type<ov::op::v1::Multiply>({gamma, mul1});
+
+    // compress RMS result
+    auto comp = wrap_type<ov::op::v0::Convert>({mul2});
+
+    ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        auto x_output = pattern_map.at(x);
+
+        auto const_eps_node =
+            std::dynamic_pointer_cast<ov::op::v0::Constant>(pattern_map.at(eps).get_node_shared_ptr());
+        float eps_value;
+        if (!ov::op::util::get_single_value(const_eps_node, eps_value)) {
+            return false;
+        }
+
+        const auto& gamma_node = pattern_map.at(gamma).get_node_shared_ptr();
+        auto output_type = m.get_match_root()->get_output_element_type(0);
+
+        auto rms = std::make_shared<op::RMS>(x_output,
+                                             gamma_node,
+                                             eps_value,
+                                             output_type);
+        rms->set_friendly_name(m.get_match_root()->get_friendly_name());
+        ov::copy_runtime_info(m.get_matched_nodes(), rms);
+        ov::replace_node(m.get_match_root(), rms);
+
+        return true;
+    };
+
+    auto m = std::make_shared<ov::pass::pattern::Matcher>(comp, "RMSFusion");
+    this->register_matcher(m, callback);
+}
+
+}  // namespace intel_gpu
+}  // namespace ov
diff --git a/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.hpp b/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.hpp
new file mode 100644
index 00000000000000..66f236f3f26c38
--- /dev/null
+++ b/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.hpp
@@ -0,0 +1,19 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/graph_rewrite.hpp"
+
+namespace ov {
+namespace intel_gpu {
+
+class RMSFusion : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("RMSFusion", "0");
+    RMSFusion();
+};
+
+}   // namespace intel_gpu
+}   // namespace ov
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index 10275dae95d729..ac567cd998f9a2 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -88,6 +88,7 @@
 #include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
 #include "transformations/op_conversions/convert_previous_nms_to_nms_9.hpp"
 #include "transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp"
+#include "transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp"
 #include "transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp"
 #include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp"
 #include "transformations/op_conversions/convert_gather_downgrade.hpp"
@@ -113,6 +114,7 @@
 #include "plugin/transformations/convert_matmul_to_fc.hpp"
 #include "plugin/transformations/move_fc_reshape_to_weights.hpp"
 #include "plugin/transformations/convert_fc_to_compressed.hpp"
+#include "plugin/transformations/rms_fusion.hpp"
 
 #include "transformations/low_precision/mark_dequantization_subgraph.hpp"
 #include "low_precision/pull_reshape_through_dequantization.hpp"
@@ -271,6 +273,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         manager.register_pass<ov::pass::ConvertNMS4ToNMS9>();
         manager.register_pass<ov::pass::ConvertNMS5ToNMS9>();
         manager.register_pass<ov::pass::ConvertNMS9ToNMSIEInternal>();
+        manager.register_pass<ov::pass::ConvertNMSRotatedToNMSIEInternal>();
         manager.register_pass<ov::pass::ConvertGP9ToGPIEInternal>();
         manager.register_pass<ov::pass::ConvertMatrixNmsToMatrixNmsIE>();
         manager.register_pass<ov::pass::ConvertGather0D>();
@@ -642,6 +645,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         manager.register_pass<ov::intel_gpu::ConvertMatMulToFullyConnected>();
         manager.register_pass<ov::intel_gpu::MoveFCReshapeToWeights>();
         manager.register_pass<ov::intel_gpu::ConvertFullyConnectedToFullyConnectedCompressed>();
+        manager.register_pass<ov::intel_gpu::RMSFusion>();
 
         manager.run_passes(func);
     }
diff --git a/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_batch_shape_tests.cpp b/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_batch_shape_tests.cpp
index 964e6dcad3dcc5..9f4c18ef5d2ce4 100644
--- a/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_batch_shape_tests.cpp
+++ b/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_batch_shape_tests.cpp
@@ -7,6 +7,7 @@
 #include "common_test_utils/common_utils.hpp"
 #include "common_test_utils/file_utils.hpp"
 #include "functional_test_utils/skip_tests_config.hpp"
+#include "functional_test_utils/ov_plugin_cache.hpp"
 #include "ov_models/subgraph_builders.hpp"
 #include "shared_test_classes/base/ov_subgraph.hpp"
 
@@ -59,8 +60,11 @@ class OVDynamicBatchShape_Tests : public WithParamInterface<OVDynamicBatchParams
 
 protected:
     void SetUp() override {
-        if (core)
+        if (core) {
             core.reset();
+            core = ov::test::utils::PluginCache::get().core();
+        }
+
         std::tie(inputShape, netPrecision, targetDevice, configuration) = this->GetParam();
 
         init_input_shapes(inputShape);
@@ -73,6 +77,7 @@ class OVDynamicBatchShape_Tests : public WithParamInterface<OVDynamicBatchParams
         dynShape["input_tensor"] = inputShape.front().first;
         function->reshape(dynShape);
     }
+
     std::shared_ptr<ov::Model> src_func;
     // std::map<std::string, std::string> configuration;
     std::vector<InputShape> inputShape;
@@ -81,7 +86,6 @@ class OVDynamicBatchShape_Tests : public WithParamInterface<OVDynamicBatchParams
 
 TEST_P(OVDynamicBatchShape_Tests, InferDynamicBatchBound) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
-    core = std::make_shared<ov::Core>();
     run();
 }
 
@@ -97,12 +101,12 @@ TEST_P(OVDynamicBatchShape_Tests, InferDynamicBatchBound_cached) {
         ov::test::utils::removeFilesWithExt(cacheFolderName, "cl_cache");
         ov::test::utils::removeDir(cacheFolderName);
 
-        core = std::make_shared<ov::Core>();
         core->set_property(ov::cache_dir(cacheFolderName));
         run();
     }
     {
-        core = std::make_shared<ov::Core>();
+        core.reset();
+        core = ov::test::utils::PluginCache::get().core();
         core->set_property(ov::cache_dir(cacheFolderName));
         run();
 
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convert_color_nv12.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convert_color_nv12.cpp
index 63a57786fb3b79..5f74409c75d2f9 100644
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convert_color_nv12.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convert_color_nv12.cpp
@@ -4,12 +4,11 @@
 
 #include <vector>
 
-#include "single_layer_tests/convert_color_nv12.hpp"
+#include "single_op_tests/convert_color_nv12.hpp"
 #include "common_test_utils/test_constants.hpp"
 
-using namespace LayerTestsDefinitions;
-
 namespace {
+using ov::test::ConvertColorNV12LayerTest;
 
 const std::vector<ov::Shape> inShapes_nhwc = {
     {1, 10, 10, 1}
@@ -20,27 +19,60 @@ const std::vector<ov::element::Type> inTypes = {
     ov::element::f32
 };
 
-INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorNV12,
+auto generate_input_static_shapes = [] (const std::vector<ov::Shape>& original_shapes, bool single_plane) {
+    std::vector<std::vector<ov::Shape>> result_shapes;
+    for (const auto& original_shape : original_shapes) {
+        std::vector<ov::Shape> one_result_shapes;
+        if (single_plane) {
+            auto shape = original_shape;
+            shape[1] = shape[1] * 3 / 2;
+            one_result_shapes.push_back(shape);
+        } else {
+            auto shape = original_shape;
+            one_result_shapes.push_back(shape);
+            auto uvShape = ov::Shape{shape[0], shape[1] / 2, shape[2] / 2, 2};
+            one_result_shapes.push_back(uvShape);
+        }
+        result_shapes.push_back(one_result_shapes);
+    }
+    return result_shapes;
+};
+
+auto in_shapes_single_plane_static = generate_input_static_shapes(inShapes_nhwc, true);
+auto in_shapes_two_planes_static = generate_input_static_shapes(inShapes_nhwc, false);
+
+INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorNV12SinglePlane,
                          ConvertColorNV12LayerTest,
-                         ::testing::Combine(::testing::ValuesIn(inShapes_nhwc),
+                         ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(in_shapes_single_plane_static)),
                                             ::testing::ValuesIn(inTypes),
                                             ::testing::Bool(),
+                                            ::testing::Values(true),
+                                            ::testing::Values(ov::test::utils::DEVICE_GPU)),
+                         ConvertColorNV12LayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorNV12TwoPlane,
+                         ConvertColorNV12LayerTest,
+                         ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(in_shapes_two_planes_static)),
+                                            ::testing::ValuesIn(inTypes),
                                             ::testing::Bool(),
+                                            ::testing::Values(false),
                                             ::testing::Values(ov::test::utils::DEVICE_GPU)),
                          ConvertColorNV12LayerTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorNV12_acc,
-                         ConvertColorNV12AccuracyTest,
-                         ::testing::Combine(::testing::Values(ov::Shape{1, 16 * 6, 16, 1}),
+INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorNV12SinglePlane_acc,
+                         ConvertColorNV12LayerTest,
+                         ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(
+                                                generate_input_static_shapes({{1, 16 * 6, 16, 1}}, true))),
                                             ::testing::Values(ov::element::u8),
-                                            ::testing::Bool(),
-                                            ::testing::Bool(),
+                                            ::testing::Values(false),
+                                            ::testing::Values(true),
                                             ::testing::Values(ov::test::utils::DEVICE_GPU)),
                          ConvertColorNV12LayerTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(nightly_TestsConvertColorNV12_acc,
-                         ConvertColorNV12AccuracyTest,
-                         ::testing::Combine(::testing::Values(ov::Shape{1, 256 * 256, 256, 1}),
+INSTANTIATE_TEST_SUITE_P(nightly_TestsConvertColorNV12SinglePlane_acc,
+                         ConvertColorNV12LayerTest,
+                         ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(
+                                                generate_input_static_shapes({{1, 256 * 256, 256, 1}}, true))),
                                             ::testing::Values(ov::element::u8),
                                             ::testing::Values(false),
                                             ::testing::Values(true),
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp
index 13293d3f6dc42a..2f00b4e38e7090 100644
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp
@@ -4,15 +4,14 @@
 
 #include <vector>
 
-#include "single_layer_tests/convolution.hpp"
+#include "single_op_tests/convolution.hpp"
 #include "common_test_utils/test_constants.hpp"
 
-using namespace LayerTestsDefinitions;
-
 namespace {
-const std::vector<InferenceEngine::Precision> netPrecisions = {
-        InferenceEngine::Precision::FP32,
-        InferenceEngine::Precision::FP16
+using ov::test::ConvolutionLayerTest;
+const std::vector<ov::element::Type> netPrecisions = {
+        ov::element::f32,
+        ov::element::f16
 };
 
 /* ============= 2D Convolution ============= */
@@ -27,9 +26,9 @@ const std::vector<std::vector<ptrdiff_t>> padEnds = {{0, 0},
 const std::vector<std::vector<size_t >> dilations = {{1, 1},
                                                      {3, 1}};
 const std::vector<size_t> numOutChannels = {1, 5};
-const std::vector<ngraph::op::PadType> padTypes = {
-        ngraph::op::PadType::EXPLICIT,
-        ngraph::op::PadType::VALID
+const std::vector<ov::op::PadType> padTypes = {
+        ov::op::PadType::EXPLICIT,
+        ov::op::PadType::VALID
 };
 const auto conv2DParams_ExplicitPadding = ::testing::Combine(
         ::testing::ValuesIn(kernels),
@@ -38,7 +37,7 @@ const auto conv2DParams_ExplicitPadding = ::testing::Combine(
         ::testing::ValuesIn(padEnds),
         ::testing::ValuesIn(dilations),
         ::testing::ValuesIn(numOutChannels),
-        ::testing::Values(ngraph::op::PadType::EXPLICIT)
+        ::testing::Values(ov::op::PadType::EXPLICIT)
 );
 const auto conv2DParams_AutoPadValid = ::testing::Combine(
         ::testing::ValuesIn(kernels),
@@ -54,11 +53,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_ExplicitPadding, ConvolutionLayerTe
                          ::testing::Combine(
                                  conv2DParams_ExplicitPadding,
                                  ::testing::ValuesIn(netPrecisions),
-                                 ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                 ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                 ::testing::Values(InferenceEngine::Layout::ANY),
-                                 ::testing::Values(InferenceEngine::Layout::ANY),
-                                 ::testing::Values(std::vector<size_t >({1, 3, 30, 30})),
+                                 ::testing::Values(ov::test::static_shapes_to_test_representation({{1, 3, 30, 30}})),
                                  ::testing::Values(ov::test::utils::DEVICE_GPU)),
                          ConvolutionLayerTest::getTestCaseName);
 
@@ -66,11 +61,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_AutoPadValid, ConvolutionLayerTest,
                          ::testing::Combine(
                                  conv2DParams_AutoPadValid,
                                  ::testing::ValuesIn(netPrecisions),
-                                 ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                 ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                 ::testing::Values(InferenceEngine::Layout::ANY),
-                                 ::testing::Values(InferenceEngine::Layout::ANY),
-                                 ::testing::Values(std::vector<size_t >({1, 3, 30, 30})),
+                                 ::testing::Values(ov::test::static_shapes_to_test_representation({{1, 3, 30, 30}})),
                                  ::testing::Values(ov::test::utils::DEVICE_GPU)),
                          ConvolutionLayerTest::getTestCaseName);
 /* ============= 3D Convolution ============= */
@@ -94,18 +85,14 @@ const auto conv3DParams = ::testing::Combine(
         ::testing::ValuesIn(paddings3d),
         ::testing::ValuesIn(dilations3d),
         ::testing::ValuesIn(numOutChannels3d),
-        ::testing::Values(ngraph::op::PadType::EXPLICIT)
+        ::testing::Values(ov::op::PadType::EXPLICIT)
 );
 
 INSTANTIATE_TEST_SUITE_P(smoke_Convolution3D_Basic1, ConvolutionLayerTest,
                          ::testing::Combine(
                                  conv3DParams,
                                  ::testing::ValuesIn(netPrecisions),
-                                 ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                 ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                 ::testing::Values(InferenceEngine::Layout::ANY),
-                                 ::testing::Values(InferenceEngine::Layout::ANY),
-                                 ::testing::Values(std::vector<size_t >({1, 3, 10, 10, 10})),
+                                 ::testing::Values(ov::test::static_shapes_to_test_representation({{1, 3, 10, 10, 10}})),
                                  ::testing::Values(ov::test::utils::DEVICE_GPU)),
                          ConvolutionLayerTest::getTestCaseName);
 }  // namespace
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp
index f80df8897d6e84..1488f7cbf6358b 100644
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp
@@ -4,32 +4,32 @@
 
 #include <vector>
 
-#include "single_layer_tests/convolution_backprop_data.hpp"
+#include "single_op_tests/convolution_backprop_data.hpp"
 #include "common_test_utils/test_constants.hpp"
 
-using namespace LayerTestsDefinitions;
-
 namespace {
+using ov::test::ConvolutionBackpropDataLayerTest;
+using ov::test::convBackpropDataLayerTestParamsSet;
 
-const std::vector<InferenceEngine::Precision> netPrecisions = {
-        InferenceEngine::Precision::FP32,
-        InferenceEngine::Precision::FP16
+const std::vector<ov::element::Type> netPrecisions = {
+        ov::element::f32,
+        ov::element::f16
 };
 
 const std::vector<size_t> numOutChannels = {1, 5, 16};
-const std::vector<std::vector<size_t >> emptyOutputShape = {{}};
+const std::vector<ov::Shape> emptyOutputShape = {{}};
 const std::vector<std::vector<ptrdiff_t>> emptyOutputPadding = {{}};
 
 /* ============= 2D ConvolutionBackpropData ============= */
-const std::vector<InferenceEngine::Precision> netPrecisions2D = {
-        InferenceEngine::Precision::FP32,
-        InferenceEngine::Precision::FP16
+const std::vector<ov::element::Type> netPrecisions2D = {
+        ov::element::f32,
+        ov::element::f16
 };
 
-const std::vector<std::vector<size_t >> inputShapes2D = {{1, 3, 30, 30},
-                                                         {1, 16, 10, 10},
-                                                         {1, 32, 10, 10}};
-const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}, {3, 5}};
+const std::vector<std::vector<ov::Shape>> inputShapes2D = {{{1, 3, 30, 30}},
+                                                           {{1, 16, 10, 10}},
+                                                           {{1, 32, 10, 10}}};
+const std::vector<std::vector<size_t >> kernels2D = {/*{1, 1},*/ {3, 3}, {3, 5}};
 const std::vector<std::vector<size_t >> strides2D = {{1, 3}};
 const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
 const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{0, 0}, {1, 1}};
@@ -42,7 +42,7 @@ const auto conv2DParams_ExplicitPadding = ::testing::Combine(
         ::testing::ValuesIn(padEnds2D),
         ::testing::ValuesIn(dilations2D),
         ::testing::ValuesIn(numOutChannels),
-        ::testing::Values(ngraph::op::PadType::EXPLICIT),
+        ::testing::Values(ov::op::PadType::EXPLICIT),
         ::testing::ValuesIn(emptyOutputPadding)
 );
 const auto conv2DParams_AutoPadValid = ::testing::Combine(
@@ -52,7 +52,7 @@ const auto conv2DParams_AutoPadValid = ::testing::Combine(
         ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
         ::testing::ValuesIn(dilations2D),
         ::testing::ValuesIn(numOutChannels),
-        ::testing::Values(ngraph::op::PadType::VALID),
+        ::testing::Values(ov::op::PadType::VALID),
         ::testing::ValuesIn(emptyOutputPadding)
 );
 
@@ -60,11 +60,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding, Convol
                         ::testing::Combine(
                                 conv2DParams_ExplicitPadding,
                                 ::testing::ValuesIn(netPrecisions2D),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)),
                                 ::testing::ValuesIn(emptyOutputShape),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         ConvolutionBackpropDataLayerTest::getTestCaseName);
@@ -73,11 +69,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadValid, Convoluti
                         ::testing::Combine(
                                 conv2DParams_AutoPadValid,
                                 ::testing::ValuesIn(netPrecisions2D),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)),
                                 ::testing::ValuesIn(emptyOutputShape),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         ConvolutionBackpropDataLayerTest::getTestCaseName);
@@ -92,7 +84,7 @@ const auto conv2DParams_ExplicitPadding_output_padding = ::testing::Combine(
         ::testing::ValuesIn(padEnds2D),
         ::testing::ValuesIn(dilations2D),
         ::testing::ValuesIn(numOutChannels),
-        ::testing::Values(ngraph::op::PadType::EXPLICIT),
+        ::testing::Values(ov::op::PadType::EXPLICIT),
         ::testing::ValuesIn(outputPadding2D)
 );
 const auto conv2DParams_AutoPadValid_output_padding = ::testing::Combine(
@@ -102,7 +94,7 @@ const auto conv2DParams_AutoPadValid_output_padding = ::testing::Combine(
         ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
         ::testing::ValuesIn(dilations2D),
         ::testing::ValuesIn(numOutChannels),
-        ::testing::Values(ngraph::op::PadType::VALID),
+        ::testing::Values(ov::op::PadType::VALID),
         ::testing::ValuesIn(outputPadding2D)
 );
 
@@ -110,11 +102,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding_OutputP
                         ::testing::Combine(
                                 conv2DParams_AutoPadValid_output_padding,
                                 ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)),
                                 ::testing::ValuesIn(emptyOutputShape),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         ConvolutionBackpropDataLayerTest::getTestCaseName);
@@ -123,27 +111,24 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadding_OutputPaddi
                         ::testing::Combine(
                                 conv2DParams_ExplicitPadding_output_padding,
                                 ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)),
                                 ::testing::ValuesIn(emptyOutputShape),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         ConvolutionBackpropDataLayerTest::getTestCaseName);
 
 /* ============= 3D ConvolutionBackpropData ============= */
-const std::vector<InferenceEngine::Precision> netPrecisions3D = {
-        InferenceEngine::Precision::FP32,
+
+const std::vector<ov::element::Type> netPrecisions3D = {
+        ov::element::f32,
 };
-const std::vector<std::vector<size_t >> inputShapes3D = {{1, 3, 10, 10, 10},
-                                                         {1, 16, 5, 5, 5},
-                                                         {1, 32, 5, 5, 5}};
-const std::vector<std::vector<size_t >> kernels3D = {{1, 1, 1}, {3, 3, 3}};
-const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
+const std::vector<std::vector<ov::Shape>> inputShapes3D = {{{1, 3, 10, 10, 10}},
+                                                           {{1, 16, 5, 5, 5}},
+                                                           {{1, 32, 5, 5, 5}}};
+const std::vector<std::vector<size_t>> kernels3D = {/*{1, 1, 1}, */{3, 3, 3}};
+const std::vector<std::vector<size_t>> strides3D = {{1, 1, 1}};
 const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
 const std::vector<std::vector<ptrdiff_t>> padEnds3D = {{0, 0, 0}, {1, 1, 1}};
-const std::vector<std::vector<size_t >> dilations3D = {{1, 1, 1}};
+const std::vector<std::vector<size_t>> dilations3D = {{1, 1, 1}};
 
 const auto conv3DParams_ExplicitPadding = ::testing::Combine(
         ::testing::ValuesIn(kernels3D),
@@ -170,11 +155,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding, Convol
                         ::testing::Combine(
                                 conv3DParams_ExplicitPadding,
                                 ::testing::ValuesIn(netPrecisions3D),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)),
                                 ::testing::ValuesIn(emptyOutputShape),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         ConvolutionBackpropDataLayerTest::getTestCaseName);
@@ -183,11 +164,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadValid, Convoluti
                         ::testing::Combine(
                                 conv3DParams_AutoPadValid,
                                 ::testing::ValuesIn(netPrecisions3D),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)),
                                 ::testing::ValuesIn(emptyOutputShape),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         ConvolutionBackpropDataLayerTest::getTestCaseName);
@@ -202,7 +179,7 @@ const auto conv3DParams_ExplicitPadding_output_padding = ::testing::Combine(
         ::testing::ValuesIn(padEnds3D),
         ::testing::ValuesIn(dilations3D),
         ::testing::ValuesIn(numOutChannels),
-        ::testing::Values(ngraph::op::PadType::EXPLICIT),
+        ::testing::Values(ov::op::PadType::EXPLICIT),
         ::testing::ValuesIn(outputPadding3D)
 );
 const auto conv3DParams_AutoPadValid_output_padding = ::testing::Combine(
@@ -212,7 +189,7 @@ const auto conv3DParams_AutoPadValid_output_padding = ::testing::Combine(
         ::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
         ::testing::ValuesIn(dilations3D),
         ::testing::ValuesIn(numOutChannels),
-        ::testing::Values(ngraph::op::PadType::VALID),
+        ::testing::Values(ov::op::PadType::VALID),
         ::testing::ValuesIn(outputPadding3D)
 );
 
@@ -220,11 +197,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding_OutputP
                         ::testing::Combine(
                                 conv3DParams_AutoPadValid_output_padding,
                                 ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)),
                                 ::testing::ValuesIn(emptyOutputShape),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         ConvolutionBackpropDataLayerTest::getTestCaseName);
@@ -233,11 +206,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadding_OutputPaddi
                         ::testing::Combine(
                                 conv3DParams_ExplicitPadding_output_padding,
                                 ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)),
                                 ::testing::ValuesIn(emptyOutputShape),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         ConvolutionBackpropDataLayerTest::getTestCaseName);
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/grn.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/grn.cpp
index 9d3c5b80145d0f..2990b49ee6e44e 100644
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/grn.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/grn.cpp
@@ -3,30 +3,28 @@
 //
 
 #include <vector>
-#include "single_layer_tests/grn.hpp"
+#include "single_op_tests/grn.hpp"
 #include "common_test_utils/test_constants.hpp"
 
-using namespace LayerTestsDefinitions;
-using namespace ngraph::helpers;
-
 namespace {
-    // Common params
-    const std::vector<InferenceEngine::Precision> netPrecisions = {
-            InferenceEngine::Precision::FP32,
-            InferenceEngine::Precision::FP16
-    };
+using ov::test::GrnLayerTest;
+// Common params
+const std::vector<ov::element::Type> netPrecisions = {
+        ov::element::f32,
+        ov::element::f16
+};
+
+std::vector<std::vector<ov::Shape>> input_shapes_static = {
+    {{1, 3, 30, 30}},
+    {{2, 16, 15, 20}}
+};
 
-    INSTANTIATE_TEST_SUITE_P(smoke_Grn_Basic,
-                             GrnLayerTest,
-                             ::testing::Combine(::testing::ValuesIn(netPrecisions),
-                                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                                ::testing::Values(std::vector<size_t>({1, 3, 30, 30}),
-                                                                  std::vector<size_t>({2, 16, 15, 20})),
-                                                ::testing::Values(0.33f, 1.1f),
-                                                ::testing::Values(ov::test::utils::DEVICE_GPU)),
-                             GrnLayerTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_Grn_Basic,
+                        GrnLayerTest,
+                        ::testing::Combine(::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_static)),
+                                ::testing::ValuesIn({0.33f, 1.1f}),
+                                ::testing::Values(ov::test::utils::DEVICE_GPU)),
+                        GrnLayerTest::getTestCaseName);
 
 }  // namespace
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution.cpp
index 446b0bc46336d0..e643f5716e5696 100644
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution.cpp
@@ -4,16 +4,16 @@
 
 #include <vector>
 
-#include "single_layer_tests/group_convolution.hpp"
+#include "single_op_tests/group_convolution.hpp"
 #include "common_test_utils/test_constants.hpp"
 
-using namespace LayerTestsDefinitions;
-
 namespace {
+using ov::test::GroupConvolutionLayerTest;
 
-const std::vector<InferenceEngine::Precision> netPrecisions = {
-        InferenceEngine::Precision::FP32
+const std::vector<ov::element::Type> netPrecisions = {
+    ov::element::f32
 };
+
 /* ============= 1D GroupConvolution ============= */
 // 1D group convolution is not working correctly
 const std::vector<std::vector<size_t >> kernels1D = {{3}};
@@ -33,18 +33,15 @@ const auto groupConv1DParams_ExplicitPadding = ::testing::Combine(
         ::testing::ValuesIn(dilations1D),
         ::testing::ValuesIn(numOutChannels1D),
         ::testing::ValuesIn(numGroups1D),
-        ::testing::Values(ngraph::op::PadType::EXPLICIT)
+        ::testing::Values(ov::op::PadType::EXPLICIT)
 );
 
 INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution1D_ExplicitPadding_Disabled, GroupConvolutionLayerTest,
                         ::testing::Combine(
                                 groupConv1DParams_ExplicitPadding,
                                 ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(std::vector<size_t >({1, 16, 30})),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(
+                                        std::vector<std::vector<ov::Shape>>({{{1, 16, 30}}}))),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         GroupConvolutionLayerTest::getTestCaseName);
 
@@ -56,18 +53,15 @@ const auto dwConv1DParams_ExplicitPadding = ::testing::Combine(
         ::testing::ValuesIn(dilations1D),
         ::testing::ValuesIn(numOutChannels1D),
         ::testing::ValuesIn(numDWGroups1D),
-        ::testing::Values(ngraph::op::PadType::EXPLICIT)
+        ::testing::Values(ov::op::PadType::EXPLICIT)
 );
 
 INSTANTIATE_TEST_SUITE_P(smoke_DwGroupConvolution1D_ExplicitPadding, GroupConvolutionLayerTest,
                         ::testing::Combine(
                                 dwConv1DParams_ExplicitPadding,
                                 ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(std::vector<size_t >({1, 16, 30})),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(
+                                        std::vector<std::vector<ov::Shape>>({{{1, 16, 30}}}))),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         GroupConvolutionLayerTest::getTestCaseName);
 
@@ -105,11 +99,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution2D_ExplicitPadding, GroupConvolut
                         ::testing::Combine(
                                 groupConv2DParams_ExplicitPadding,
                                 ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(std::vector<size_t >({1, 16, 30, 30})),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(
+                                        std::vector<std::vector<ov::Shape>>({{{1, 16, 30, 30}}}))),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         GroupConvolutionLayerTest::getTestCaseName);
 
@@ -117,11 +108,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution2D_AutoPadValid, GroupConvolution
                         ::testing::Combine(
                                 groupConv2DParams_AutoPadValid,
                                 ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(std::vector<size_t >({1, 16, 30, 30})),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(
+                                        std::vector<std::vector<ov::Shape>>({{{1, 16, 30, 30}}}))),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         GroupConvolutionLayerTest::getTestCaseName);
 
@@ -157,11 +145,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution3D_ExplicitPadding, GroupConvolut
                         ::testing::Combine(
                                 groupConv3DParams_ExplicitPadding,
                                 ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(std::vector<size_t >({1, 4, 10, 10, 10})),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(
+                                        std::vector<std::vector<ov::Shape>>({{{1, 4, 10, 10, 10}}}))),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         GroupConvolutionLayerTest::getTestCaseName);
 
@@ -169,11 +154,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution3D_AutoPadValid, GroupConvolution
                         ::testing::Combine(
                                 groupConv3DParams_AutoPadValid,
                                 ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(std::vector<size_t >({1, 4, 10, 10, 10})),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(
+                                        std::vector<std::vector<ov::Shape>>({{{1, 4, 10, 10, 10}}}))),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         GroupConvolutionLayerTest::getTestCaseName);
 
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp
index d7baff2c2e0d7f..40d364385fe7d9 100644
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp
@@ -4,26 +4,28 @@
 
 #include <vector>
 
-#include "single_layer_tests/group_convolution_backprop_data.hpp"
+#include "single_op_tests/group_convolution_backprop_data.hpp"
 #include "common_test_utils/test_constants.hpp"
 
-using namespace LayerTestsDefinitions;
-
 namespace {
+using ov::test::GroupConvBackpropLayerTest;
 
-const std::vector<InferenceEngine::Precision> netPrecisions = {
-        InferenceEngine::Precision::FP32
+const std::vector<ov::element::Type> netPrecisions = {
+        ov::element::f32
 };
 
-const std::vector<std::vector<size_t >> emptyOutputShape = {{}};
+const std::vector<ov::Shape> emptyOutputShape = {{}};
 const std::vector<std::vector<ptrdiff_t>> emptyOutputPadding = {{}};
 
 const std::vector<size_t> numOutChannels = {16, 32};
 const std::vector<size_t> numGroups = {2, 8, 16};
 
 /* ============= 2D GroupConvolution ============= */
-const std::vector<std::vector<size_t >> inputShapes2D = {{1, 16, 10, 10},
-                                                         {1, 32, 10, 10}};
+const std::vector<std::vector<ov::Shape>> inputShapes2D = {
+        {{1, 16, 10, 10}},
+        {{1, 32, 10, 10}}
+};
+
 const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}};
 const std::vector<std::vector<size_t >> strides2D = {{1, 1}};
 const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
@@ -38,7 +40,7 @@ const auto groupConvBackpropData2DParams_ExplicitPadding = ::testing::Combine(
         ::testing::ValuesIn(dilations2D),
         ::testing::ValuesIn(numOutChannels),
         ::testing::ValuesIn(numGroups),
-        ::testing::Values(ngraph::op::PadType::EXPLICIT),
+        ::testing::Values(ov::op::PadType::EXPLICIT),
         ::testing::ValuesIn(emptyOutputPadding)
 );
 const auto groupConvBackpropData2DParams_AutoPadValid = ::testing::Combine(
@@ -49,7 +51,7 @@ const auto groupConvBackpropData2DParams_AutoPadValid = ::testing::Combine(
         ::testing::ValuesIn(dilations2D),
         ::testing::ValuesIn(numOutChannels),
         ::testing::ValuesIn(numGroups),
-        ::testing::Values(ngraph::op::PadType::VALID),
+        ::testing::Values(ov::op::PadType::VALID),
         ::testing::ValuesIn(emptyOutputPadding)
 );
 
@@ -57,11 +59,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvBackpropData2D_ExplicitPadding, GroupCon
                         ::testing::Combine(
                                 groupConvBackpropData2DParams_ExplicitPadding,
                                 ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)),
                                 ::testing::ValuesIn(emptyOutputShape),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         GroupConvBackpropLayerTest::getTestCaseName);
@@ -70,18 +68,17 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvBackpropData2D_AutoPadValid, GroupConvBa
                         ::testing::Combine(
                                 groupConvBackpropData2DParams_AutoPadValid,
                                 ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)),
                                 ::testing::ValuesIn(emptyOutputShape),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         GroupConvBackpropLayerTest::getTestCaseName);
 
 /* ============= 3D GroupConvolution ============= */
-const std::vector<std::vector<size_t >> inputShapes3D = {{1, 16, 5, 5, 5},
-                                                         {1, 32, 5, 5, 5}};
+const std::vector<std::vector<ov::Shape>> inputShapes3D = {
+        {{1, 16, 5, 5, 5}},
+        {{1, 32, 5, 5, 5}}
+};
+
 const std::vector<std::vector<size_t >> kernels3D = {{1, 1, 1}, {3, 3, 3}};
 const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
 const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
@@ -96,7 +93,7 @@ const auto groupConvBackpropData3DParams_ExplicitPadding = ::testing::Combine(
         ::testing::ValuesIn(dilations3D),
         ::testing::ValuesIn(numOutChannels),
         ::testing::ValuesIn(numGroups),
-        ::testing::Values(ngraph::op::PadType::EXPLICIT),
+        ::testing::Values(ov::op::PadType::EXPLICIT),
         ::testing::ValuesIn(emptyOutputPadding)
 );
 const auto groupConvBackpropData3DParams_AutoPadValid = ::testing::Combine(
@@ -107,7 +104,7 @@ const auto groupConvBackpropData3DParams_AutoPadValid = ::testing::Combine(
         ::testing::ValuesIn(dilations3D),
         ::testing::ValuesIn(numOutChannels),
         ::testing::ValuesIn(numGroups),
-        ::testing::Values(ngraph::op::PadType::VALID),
+        ::testing::Values(ov::op::PadType::VALID),
         ::testing::ValuesIn(emptyOutputPadding)
 );
 
@@ -115,11 +112,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvBackpropData3D_ExplicitPadding, GroupCon
                         ::testing::Combine(
                                 groupConvBackpropData3DParams_ExplicitPadding,
                                 ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)),
                                 ::testing::ValuesIn(emptyOutputShape),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         GroupConvBackpropLayerTest::getTestCaseName);
@@ -128,11 +121,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvBackpropData3D_AutoPadValid, GroupConvBa
                         ::testing::Combine(
                                 groupConvBackpropData3DParams_AutoPadValid,
                                 ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)),
                                 ::testing::ValuesIn(emptyOutputShape),
                                 ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         GroupConvBackpropLayerTest::getTestCaseName);
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gru_cell.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gru_cell.cpp
index df58f1314d6346..70fe6f0d8fd995 100644
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gru_cell.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gru_cell.cpp
@@ -4,28 +4,34 @@
 
 #include <vector>
 
-#include "single_layer_tests/gru_cell.hpp"
+#include "single_op_tests/gru_cell.hpp"
 #include "common_test_utils/test_constants.hpp"
 
-using namespace LayerTestsDefinitions;
-
 namespace {
-    std::vector<bool> should_decompose{false, true};
-    std::vector<size_t> batch{5};
-    std::vector<size_t> hidden_size{1, 10};
-    std::vector<size_t> input_size{1, 30};
-    std::vector<std::vector<std::string>> activations = {{"relu", "tanh"}, {"tanh", "sigmoid"}, {"sigmoid", "tanh"},
-                                                         {"tanh", "relu"}};
-    std::vector<float> clip = {0.0f, 0.7f};
-    std::vector<bool> linear_before_reset = {true, false};
-    std::vector<ngraph::helpers::InputLayerType> layer_types = {
-        ngraph::helpers::InputLayerType::CONSTANT,
-        ngraph::helpers::InputLayerType::PARAMETER
-    };
-    std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
-                                                             InferenceEngine::Precision::FP16};
-
-    INSTANTIATE_TEST_SUITE_P(GRUCellCommon, GRUCellTest,
+using ov::test::GRUCellTest;
+
+std::vector<bool> should_decompose{false, true};
+std::vector<size_t> batch{5};
+std::vector<size_t> hidden_size{1, 10};
+std::vector<size_t> input_size{1, 30};
+std::vector<std::vector<std::string>> activations = {
+        {"relu", "tanh"},
+        {"tanh", "sigmoid"},
+        {"sigmoid", "tanh"},
+        {"tanh", "relu"}
+};
+
+std::vector<float> clip = {0.0f, 0.7f};
+std::vector<bool> linear_before_reset = {true, false};
+std::vector<ov::test::utils::InputLayerType> layer_types = {
+        ov::test::utils::InputLayerType::CONSTANT,
+        ov::test::utils::InputLayerType::PARAMETER
+};
+
+std::vector<ov::element::Type> netPrecisions = {ov::element::f32,
+                                                ov::element::f16};
+
+INSTANTIATE_TEST_SUITE_P(GRUCellCommon, GRUCellTest,
             ::testing::Combine(
             ::testing::ValuesIn(should_decompose),
             ::testing::ValuesIn(batch),
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/is_inf.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/is_inf.cpp
index 6614ab44b7e294..5f4e9c3f1acd84 100644
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/is_inf.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/is_inf.cpp
@@ -2,17 +2,12 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "shared_test_classes/single_layer/is_inf.hpp"
-
-#include <array>
-#include <vector>
-
-using namespace ov::test;
-using namespace ov::test::subgraph;
+#include "single_op_tests/is_inf.hpp"
 
 namespace {
+using ov::test::IsInfLayerTest;
 
-const std::vector<std::vector<InputShape>> inShapesStatic = {
+const std::vector<std::vector<ov::test::InputShape>> inShapesStatic = {
     {{{}, {{2}}}},
     {{{}, {{10, 200}}}},
     {{{}, {{4, 4, 16}}}},
@@ -21,7 +16,7 @@ const std::vector<std::vector<InputShape>> inShapesStatic = {
     {{{}, {{16, 16, 16, 16, 16, 16}}}},
 };
 
-constexpr std::array<ElementType, 2> netPrecisions = {ov::element::f32, ov::element::f16};
+constexpr std::array<ov::element::Type, 2> netPrecisions = {ov::element::f32, ov::element::f16};
 
 constexpr std::array<bool, 2> detectNegative = {true, false};
 
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/log_softmax.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/log_softmax.cpp
index 1f27e97f8f97c3..57359e6d928776 100644
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/log_softmax.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/log_softmax.cpp
@@ -4,21 +4,20 @@
 
 #include <vector>
 
-#include "single_layer_tests/log_softmax.hpp"
+#include "single_op_tests/log_softmax.hpp"
 #include "common_test_utils/test_constants.hpp"
 
-using namespace LayerTestsDefinitions;
-
 namespace {
+using ov::test::LogSoftmaxLayerTest;
 
-const std::vector<InferenceEngine::Precision> netPrecisions = {
-    InferenceEngine::Precision::FP32,
+const std::vector<ov::element::Type> netPrecisions = {
+    ov::element::f32,
 };
 
-const std::vector<InferenceEngine::SizeVector> inputShapes2D = {
-    InferenceEngine::SizeVector {1, 100},
-    InferenceEngine::SizeVector {100, 1},
-    InferenceEngine::SizeVector {10, 10},
+const std::vector<std::vector<ov::Shape>> inputShapes2D = {
+    {{1, 100}},
+    {{100, 1}},
+    {{10, 10}},
 };
 
 const std::vector<int64_t> axis2D = {
@@ -28,20 +27,15 @@ const std::vector<int64_t> axis2D = {
 INSTANTIATE_TEST_SUITE_P(smoke_LogSoftmax2D,
                          LogSoftmaxLayerTest,
                          testing::Combine(testing::ValuesIn(netPrecisions),
-                                          testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                          testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                          testing::Values(InferenceEngine::Layout::ANY),
-                                          testing::Values(InferenceEngine::Layout::ANY),
-                                          testing::ValuesIn(inputShapes2D),
+                                          testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)),
                                           testing::ValuesIn(axis2D),
-                                          testing::Values(ov::test::utils::DEVICE_GPU),
-                                          testing::Values(std::map<std::string, std::string>())),
+                                          testing::Values(ov::test::utils::DEVICE_GPU)),
                          LogSoftmaxLayerTest::getTestCaseName);
 
-const std::vector<InferenceEngine::SizeVector> inputShapes4D = {
-    InferenceEngine::SizeVector {1, 100, 1, 1},
-    InferenceEngine::SizeVector {1, 3, 4, 3},
-    InferenceEngine::SizeVector {2, 3, 4, 5},
+const std::vector<std::vector<ov::Shape>> inputShapes4D = {
+    {{1, 100, 1, 1}},
+    {{1, 3, 4, 3}},
+    {{2, 3, 4, 5}},
 };
 
 const std::vector<int64_t> axis4D = {
@@ -51,14 +45,9 @@ const std::vector<int64_t> axis4D = {
 INSTANTIATE_TEST_SUITE_P(smoke_LogSoftmax4D,
                          LogSoftmaxLayerTest,
                          testing::Combine(testing::ValuesIn(netPrecisions),
-                                          testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                          testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                          testing::Values(InferenceEngine::Layout::ANY),
-                                          testing::Values(InferenceEngine::Layout::ANY),
-                                          testing::ValuesIn(inputShapes4D),
+                                          testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes4D)),
                                           testing::ValuesIn(axis4D),
-                                          testing::Values(ov::test::utils::DEVICE_GPU),
-                                          testing::Values(std::map<std::string, std::string>())),
+                                          testing::Values(ov::test::utils::DEVICE_GPU)),
                          LogSoftmaxLayerTest::getTestCaseName);
 
 }  // namespace
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/logical.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/logical.cpp
index f0b1d390149936..604b557aeab6f3 100644
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/logical.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/logical.cpp
@@ -3,15 +3,27 @@
 //
 
 #include <vector>
-#include "single_layer_tests/logical.hpp"
+#include "single_op_tests/logical.hpp"
 #include "common_test_utils/test_constants.hpp"
 
-using namespace LayerTestsDefinitions;
-using namespace LayerTestsDefinitions::LogicalParams;
-
 namespace {
+using ov::test::LogicalLayerTest;
+
+std::vector<std::vector<ov::Shape>> combine_shapes(const std::map<ov::Shape, std::vector<ov::Shape>>& input_shapes_static) {
+    std::vector<std::vector<ov::Shape>> result;
+    for (const auto& input_shape : input_shapes_static) {
+        for (auto& item : input_shape.second) {
+            result.push_back({input_shape.first, item});
+        }
+
+        if (input_shape.second.empty()) {
+            result.push_back({input_shape.first, {}});
+        }
+    }
+    return result;
+}
 
-std::map<std::vector<size_t>, std::vector<std::vector<size_t >>> inputShapes = {
+std::map<ov::Shape, std::vector<ov::Shape>> inputShapes = {
         {{1}, {{1}, {17}, {1, 1}, {2, 18}, {1, 1, 2}, {2, 2, 3}, {1, 1, 2, 3}}},
         {{5}, {{1}, {1, 1}, {2, 5}, {1, 1, 1}, {2, 2, 5}}},
         {{2, 200}, {{1}, {200}, {1, 200}, {2, 200}, {2, 2, 200}}},
@@ -20,7 +32,7 @@ std::map<std::vector<size_t>, std::vector<std::vector<size_t >>> inputShapes = {
         {{2, 1, 1, 3, 1}, {{1}, {1, 3, 4}, {2, 1, 3, 4}, {1, 1, 1, 1, 1}}},
 };
 
-std::map<std::vector<size_t>, std::vector<std::vector<size_t >>> inputShapesNot = {
+std::map<ov::Shape, std::vector<ov::Shape>> inputShapesNot = {
         {{1}, {}},
         {{5}, {}},
         {{2, 200}, {}},
@@ -29,51 +41,39 @@ std::map<std::vector<size_t>, std::vector<std::vector<size_t >>> inputShapesNot
         {{2, 1, 1, 3, 1}, {}},
 };
 
-std::vector<InferenceEngine::Precision> inputsPrecisions = {
-        InferenceEngine::Precision::BOOL,
-};
-
-std::vector<ngraph::helpers::LogicalTypes> logicalOpTypes = {
-        ngraph::helpers::LogicalTypes::LOGICAL_AND,
-        ngraph::helpers::LogicalTypes::LOGICAL_OR,
-        ngraph::helpers::LogicalTypes::LOGICAL_XOR,
+std::vector<ov::test::utils::LogicalTypes> logicalOpTypes = {
+        ov::test::utils::LogicalTypes::LOGICAL_AND,
+        ov::test::utils::LogicalTypes::LOGICAL_OR,
+        ov::test::utils::LogicalTypes::LOGICAL_XOR,
 };
 
-std::vector<ngraph::helpers::InputLayerType> secondInputTypes = {
-        ngraph::helpers::InputLayerType::CONSTANT,
-        ngraph::helpers::InputLayerType::PARAMETER,
+std::vector<ov::test::utils::InputLayerType> secondInputTypes = {
+        ov::test::utils::InputLayerType::CONSTANT,
+        ov::test::utils::InputLayerType::PARAMETER,
 };
 
-std::vector<InferenceEngine::Precision> netPrecisions = {
-        InferenceEngine::Precision::FP32,
+std::vector<ov::element::Type> netPrecisions = {
+        ov::element::boolean,
 };
 
 std::map<std::string, std::string> additional_config = {};
 
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs,
                          LogicalLayerTest,
-                         ::testing::Combine(::testing::ValuesIn(LogicalLayerTest::combineShapes(inputShapes)),
+                         ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(combine_shapes(inputShapes))),
                                             ::testing::ValuesIn(logicalOpTypes),
                                             ::testing::ValuesIn(secondInputTypes),
                                             ::testing::ValuesIn(netPrecisions),
-                                            ::testing::ValuesIn(inputsPrecisions),
-                                            ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                            ::testing::Values(InferenceEngine::Layout::ANY),
-                                            ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::Values(ov::test::utils::DEVICE_GPU),
                                             ::testing::Values(additional_config)),
                          LogicalLayerTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefsNot,
                          LogicalLayerTest,
-                         ::testing::Combine(::testing::ValuesIn(LogicalLayerTest::combineShapes(inputShapesNot)),
-                                            ::testing::Values(ngraph::helpers::LogicalTypes::LOGICAL_NOT),
-                                            ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
+                         ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(combine_shapes(inputShapesNot))),
+                                            ::testing::Values(ov::test::utils::LogicalTypes::LOGICAL_NOT),
+                                            ::testing::Values(ov::test::utils::InputLayerType::CONSTANT),
                                             ::testing::ValuesIn(netPrecisions),
-                                            ::testing::ValuesIn(inputsPrecisions),
-                                            ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                            ::testing::Values(InferenceEngine::Layout::ANY),
-                                            ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::Values(ov::test::utils::DEVICE_GPU),
                                             ::testing::Values(additional_config)),
                          LogicalLayerTest::getTestCaseName);
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lrn.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lrn.cpp
index 916690bc246a1e..776095cb34a4cd 100644
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lrn.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lrn.cpp
@@ -2,17 +2,17 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "single_layer_tests/lrn.hpp"
+#include "single_op_tests/lrn.hpp"
 
 #include <vector>
 
 #include "common_test_utils/test_constants.hpp"
 
-using namespace LayerTestsDefinitions;
-
 namespace {
-const std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
-                                                               InferenceEngine::Precision::FP16};
+using ov::test::LrnLayerTest;
+
+const std::vector<ov::element::Type> netPrecisions = {ov::element::f32,
+                                                      ov::element::f16};
 
 const std::vector<std::vector<int64_t>> axes = {{1}, {2, 3}};
 
@@ -28,9 +28,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_LrnCheck, LrnLayerTest,
                                            ::testing::Values(size),
                                            ::testing::ValuesIn(axes),
                                            ::testing::ValuesIn(netPrecisions),
-                                           ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                           ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                           ::testing::Values(std::vector<size_t>({10, 10, 3, 2})),
+                                           ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(std::vector<std::vector<ov::Shape>>(
+                                                                {{{10, 10, 3, 2}}}))),
                                            ::testing::Values(ov::test::utils::DEVICE_GPU)),
                         LrnLayerTest::getTestCaseName);
 
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lstm_cell.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lstm_cell.cpp
index 99bd6279d2f141..37db834db0e465 100644
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lstm_cell.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lstm_cell.cpp
@@ -4,12 +4,12 @@
 
 #include <vector>
 
-#include "single_layer_tests/lstm_cell.hpp"
+#include "single_op_tests/lstm_cell.hpp"
 #include "common_test_utils/test_constants.hpp"
 
-using namespace LayerTestsDefinitions;
-
 namespace {
+using ov::test::LSTMCellTest;
+
 std::vector<bool> should_decompose{false, true};
 std::vector<size_t> batch{5};
 std::vector<size_t> hidden_size{1, 10};
@@ -20,12 +20,12 @@ std::vector<std::vector<std::string>> activations = {{"relu", "sigmoid", "tanh"}
                                                      {"tanh", "relu", "sigmoid"}, {"sigmoid", "sigmoid", "sigmoid"},
                                                      {"tanh", "tanh", "tanh"}, {"relu", "relu", "relu"}};
 std::vector<float> clip{0.f, 0.7f};
-std::vector<ngraph::helpers::InputLayerType> layer_types = {
-    ngraph::helpers::InputLayerType::CONSTANT,
-    ngraph::helpers::InputLayerType::PARAMETER
+std::vector<ov::test::utils::InputLayerType> layer_types = {
+    ov::test::utils::InputLayerType::CONSTANT,
+    ov::test::utils::InputLayerType::PARAMETER
 };
-std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
-                                                        InferenceEngine::Precision::FP16};
+std::vector<ov::element::Type> netPrecisions = {ov::element::f32,
+                                                ov::element::f16};
 
 INSTANTIATE_TEST_SUITE_P(LSTMCellCommon, LSTMCellTest,
                         ::testing::Combine(
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/mat_mul.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/mat_mul.cpp
index 026a97154cee4a..95f9f58da6fcd8 100644
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/mat_mul.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/mat_mul.cpp
@@ -4,65 +4,102 @@
 
 #include <vector>
 
-#include "single_layer_tests/mat_mul.hpp"
-
-using namespace LayerTestsDefinitions;
+#include "single_op_tests/mat_mul.hpp"
 
 namespace {
+using ov::test::MatMulLayerTest;
+using ov::test::utils::InputLayerType;
+
+const std::vector<ov::element::Type> inputPrecisions = {
+    ov::element::f32,
+    ov::element::f16,
+};
+
+std::vector<std::vector<ov::Shape>> no_transpose_shapeRelatedParams = {
+        { {2, 1, 1, 5, 6}, {1, 1, 6, 4} },
+        { {2, 1, 2, 3, 5, 6}, {1, 1, 6, 4} },
+        { {1, 4, 5, 6}, {1, 4, 6, 4} },
+        { {4, 5, 6}, {6, 3} },
+        { {9, 9, 9}, {9, 9} },
+        { {1, 2, 3}, {1, 1, 3, 2} },
+        { {1, 3, 2, 4}, {2, 1, 4, 2} },
+        { {2, 1, 2, 4}, {1, 3, 4, 2} },
+        { {3, 2, 4}, {2, 1, 4, 2} },
+        { {2, 1, 4, 2}, {3, 2, 4} },
+        { {3}, {2, 2, 3, 1} },
+        { {2, 2, 1, 3}, {3} },
+        { {1, 5}, {5, 1} },
+        { {1, 5}, {5} },
+        { {5}, {5, 1} },
+        { {5}, {5} },
+};
+
+std::vector<std::vector<ov::Shape>> first_transpose_shapeRelatedParams = {
+        { {2, 1, 2, 3}, {3, 2, 4} },
+        { {100, 65}, {100, 73} },
+        { {5, 1}, {5, 1} },
+};
 
-const std::vector<InferenceEngine::Precision> inputPrecisions = {
-        InferenceEngine::Precision::FP32,
-        InferenceEngine::Precision::FP16
+std::vector<std::vector<ov::Shape>> second_transpose_shapeRelatedParams = {
+        { {1, 16, 128}, {1, 64, 128} },
+        { {2, 1, 3, 2}, {3, 4, 2} },
+        { {1, 64, 80}, {1, 77, 80} },
+        { {65, 100}, {73, 100} },
+        { {1, 5}, {1, 5} },
 };
 
-const std::vector<ShapeRelatedParams> shapeRelatedParams = {
-        { { {2, 1, 1, 5, 6}, false }, { {1, 1, 6, 4}, false } },
-        { { {2, 2, 4, 16}, true }, { {1, 1, 1, 4}, true } },
-        { { {2, 1, 2, 3, 5, 6}, false }, { {1, 1, 6, 4}, false } },
-        { { {1, 4, 5, 6}, false }, { {1, 4, 6, 4}, false } },
-        { { {1, 16, 128}, false }, { {1, 64, 128}, true } },
-        { { {4, 5, 6}, false }, { {6, 3}, false } },
-        { { {9, 9, 9}, false }, { {9, 9}, false } },
-        { { {1, 2, 3}, false }, { {1, 1, 3, 2}, false } },
-        { { {1, 3, 2, 4}, false }, { {2, 1, 4, 2}, false } },
-        { { {2, 1, 2, 4}, false }, { {1, 3, 4, 2}, false } },
-        { { {3, 2, 4}, false }, { {2, 1, 4, 2}, false } },
-        { { {2, 1, 4, 2}, false }, { {3, 2, 4}, false } },
-        { { {2, 1, 2, 3}, true }, { {3, 2, 4}, false } },
-        { { {2, 1, 3, 2}, false }, { {3, 4, 2}, true } },
-        { { {2, 1, 2, 3}, true }, { {3, 4, 2}, true } },
-        { { {1, 64, 80}, false }, { {1, 77, 80}, true } },
-        { { {3}, false }, { {2, 2, 3, 1}, false } },
-        { { {2, 2, 1, 3}, false }, { {3}, false } },
-        { { {65, 100}, false }, { {73, 100}, true } },
-        { { {100, 65}, true }, { {100, 73}, false } },
-        { { {100, 65}, true }, { {73, 100}, true } },
-        { { {1, 5}, false }, { {5, 1}, false } },
-        { { {5, 1}, true }, { {5, 1}, false } },
-        { { {1, 5}, false }, { {1, 5}, true } },
-        { { {1, 5}, false }, { {5}, false } },
-        { { {5}, false }, { {5, 1}, false } },
-        { { {5}, false }, { {5}, false } },
-        { { {5}, true }, { {5}, true } }
+std::vector<std::vector<ov::Shape>> both_transpose_shapeRelatedParams = {
+        { {2, 2, 4, 16}, {1, 1, 1, 4} },
+        { {2, 1, 2, 3}, {3, 4, 2} },
+        { {100, 65}, {73, 100} },
+        { {5}, {5} },
 };
 
-std::vector<ngraph::helpers::InputLayerType> secondaryInputTypes = {
-        ngraph::helpers::InputLayerType::CONSTANT,
-        ngraph::helpers::InputLayerType::PARAMETER,
+std::vector<InputLayerType> secondaryInputTypes = {
+        InputLayerType::CONSTANT,
+        InputLayerType::PARAMETER,
 };
 
 std::map<std::string, std::string> additional_config = {};
 
-INSTANTIATE_TEST_SUITE_P(smoke_MatMul, MatMulTest,
+INSTANTIATE_TEST_SUITE_P(smoke_MatMul_NoTranspose, MatMulLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(no_transpose_shapeRelatedParams)),
+                ::testing::Values(std::make_pair(false, false)),
+                ::testing::ValuesIn(inputPrecisions),
+                ::testing::ValuesIn(secondaryInputTypes),
+                ::testing::Values(ov::test::utils::DEVICE_GPU),
+                ::testing::Values(additional_config)),
+        MatMulLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_MatMul_FirstTranspose, MatMulLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(first_transpose_shapeRelatedParams)),
+                ::testing::Values(std::make_pair(true, false)),
+                ::testing::ValuesIn(inputPrecisions),
+                ::testing::ValuesIn(secondaryInputTypes),
+                ::testing::Values(ov::test::utils::DEVICE_GPU),
+                ::testing::Values(additional_config)),
+        MatMulLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_MatMul_SecondTranspose, MatMulLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(second_transpose_shapeRelatedParams)),
+                ::testing::Values(std::make_pair(false, true)),
+                ::testing::ValuesIn(inputPrecisions),
+                ::testing::ValuesIn(secondaryInputTypes),
+                ::testing::Values(ov::test::utils::DEVICE_GPU),
+                ::testing::Values(additional_config)),
+        MatMulLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_MatMul_BothTranspose, MatMulLayerTest,
         ::testing::Combine(
-                ::testing::ValuesIn(shapeRelatedParams),
+                ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(both_transpose_shapeRelatedParams)),
+                ::testing::Values(std::make_pair(true, true)),
                 ::testing::ValuesIn(inputPrecisions),
-                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                ::testing::Values(InferenceEngine::Layout::ANY),
                 ::testing::ValuesIn(secondaryInputTypes),
                 ::testing::Values(ov::test::utils::DEVICE_GPU),
                 ::testing::Values(additional_config)),
-        MatMulTest::getTestCaseName);
+        MatMulLayerTest::getTestCaseName);
 
 } // namespace
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/nms_rotated.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/nms_rotated.cpp
new file mode 100644
index 00000000000000..80224b57ebcff7
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/nms_rotated.cpp
@@ -0,0 +1,40 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/nms_rotated.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace InferenceEngine;
+using namespace ngraph;
+
+const std::vector<InputShapeParams> inShapeParams = {
+    InputShapeParams{2, 50, 50},
+    InputShapeParams {9, 10, 10}
+};
+
+const std::vector<int32_t> maxOutBoxPerClass = {5, 20};
+const std::vector<float> threshold = {0.3f, 0.7f};
+const std::vector<bool> sortResDesc = {true, false};
+const std::vector<element::Type> outType = {element::i32, element::i64};
+const std::vector<bool> clockwise = {true, false};
+
+const std::vector<Precision> inputPrecisions = {Precision::FP32, Precision::FP16};
+
+INSTANTIATE_TEST_SUITE_P(smoke_NmsRotatedLayerTest,
+                         NmsRotatedLayerTest,
+                         ::testing::Combine(::testing::ValuesIn(inShapeParams),
+                                            ::testing::Combine(::testing::ValuesIn(inputPrecisions),
+                                                               ::testing::Values(Precision::I32),
+                                                               ::testing::Values(Precision::FP32)),
+                                            ::testing::ValuesIn(maxOutBoxPerClass),
+                                            ::testing::ValuesIn(threshold),
+                                            ::testing::ValuesIn(threshold),
+                                            ::testing::ValuesIn(sortResDesc),
+                                            ::testing::ValuesIn(outType),
+                                            ::testing::ValuesIn(clockwise),
+                                            ::testing::Values(ov::test::utils::DEVICE_GPU)),
+                         NmsRotatedLayerTest::getTestCaseName);
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
index 798282680dbccd..25b679cf22cc82 100644
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -115,5 +115,7 @@ std::vector<std::string> disabledTestPatterns() {
             R"(.*smoke_LPT.*ElementwiseBranchSelectionTransformation.*)",
             // Dynamic state unsupported for now
             R"(.*MemoryDynamicBatch.*)",
+            // Issue: 123493
+            R"(.*GroupNormalizationTest.*CompareWithRefs.*NetType=f16.*)",
     };
 }
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp
new file mode 100644
index 00000000000000..6e95d1e29a15af
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp
@@ -0,0 +1,14 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "subgraph_tests/transpose_matmul_fusion.hpp"
+
+using namespace ov::test;
+
+namespace {
+INSTANTIATE_TEST_SUITE_P(smoke_TransposeMatMulFusion, TransposeMatMulFusion,
+                         ::testing::Values(ov::test::utils::DEVICE_GPU),
+                         TransposeMatMulFusion::getTestCaseName);
+
+}  // namespace
diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp
index d394eb7d05de17..a36426cd84c373 100644
--- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp
+++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp
@@ -89,6 +89,13 @@ class DeconvolutionLayerGPUTest : public testing::WithParamInterface<DeconvLayer
     }
 
     void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
+        if (function->get_parameters().size() != 1) {
+            // WA: output_shape depends on 3rd deconvolution input data
+            // but the reference implementation doesn't implement shape inference
+            // so we need to build a new ngraph function and replace the 3rd input parameter with a constant
+            // to get valid output shapes
+            functionRefs = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT);
+        }
         inputs.clear();
         const auto& funcInputs = function->inputs();
         for (size_t i = 0; i < funcInputs.size(); ++i) {
@@ -106,18 +113,6 @@ class DeconvolutionLayerGPUTest : public testing::WithParamInterface<DeconvLayer
         inferRequestNum++;
     }
 
-    void init_ref_function(std::shared_ptr<ov::Model> &funcRef, const std::vector<ov::Shape>& targetInputStaticShapes) override {
-        if (function->get_parameters().size() == 1) {
-            ngraph::helpers::resize_function(funcRef, targetInputStaticShapes);
-        } else {
-            // WA: output_shape depends on 3rd deconvolution input data
-            // but the reference implementation doesn't implement shape inference
-            // so we need to build a new ngraph function and replace the 3rd input parameter with a constant
-            // to get valid output shapes
-            funcRef = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT);
-        }
-    }
-
     void validate() override {
         auto actualOutputs = get_plugin_outputs();
         if (function->get_parameters().size() == 2) {
diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/group_convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/group_convolution_backprop_data.cpp
index fb955a63a837de..531c97fa218a16 100644
--- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/group_convolution_backprop_data.cpp
+++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/group_convolution_backprop_data.cpp
@@ -90,6 +90,13 @@ class GroupDeconvolutionLayerGPUTest : public testing::WithParamInterface<GroupD
     }
 
     void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
+        if (function->get_parameters().size() != 1) {
+            // WA: output_shape depends on 3rd deconvolution input data
+            // but the reference implementation doesn't implement shape inference
+            // so we need to build a new ngraph function and replace the 3rd input parameter with a constant
+            // to get valid output shapes
+            functionRefs = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT);
+        }
         inputs.clear();
         const auto& funcInputs = function->inputs();
         for (size_t i = 0; i < funcInputs.size(); ++i) {
@@ -107,18 +114,6 @@ class GroupDeconvolutionLayerGPUTest : public testing::WithParamInterface<GroupD
         inferRequestNum++;
     }
 
-    void init_ref_function(std::shared_ptr<ov::Model> &funcRef, const std::vector<ov::Shape>& targetInputStaticShapes) override {
-        if (function->get_parameters().size() == 1) {
-            ngraph::helpers::resize_function(funcRef, targetInputStaticShapes);
-        } else {
-            // WA: output_shape depends on 3rd deconvolution input data
-            // but the reference implementation doesn't implement shape inference
-            // so we need to build a new ngraph function and replace the 3rd input parameter with a constant
-            // to get valid output shapes
-            funcRef = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT);
-        }
-    }
-
     void validate() override {
         auto actualOutputs = get_plugin_outputs();
         if (function->get_parameters().size() == 2) {
diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/random_uniform.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/random_uniform.cpp
index 440924fd0a541e..755371e1b0a548 100644
--- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/random_uniform.cpp
+++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/random_uniform.cpp
@@ -140,12 +140,6 @@ class RandomUnifromDynamicGPUTest : public testing::WithParamInterface<RandomUni
         function = std::make_shared<ov::Model>(results, params, "random_uniform_test");
     }
 
-    precisions_map get_ref_precisions_convert_map() override {
-        // Do not convert reference function from FP16 to FP32 precision, since in case of RandomUniform operation
-        // data type is matter
-        return {};
-    }
-
 private:
     std::pair<double, double> min_max_values;
 };
diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/rms_norm_decomposition.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/rms_norm_decomposition.cpp
new file mode 100644
index 00000000000000..2ea4fc415b52f0
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/rms_norm_decomposition.cpp
@@ -0,0 +1,153 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ov_models/builders.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "shared_test_classes/base/ov_subgraph.hpp"
+
+using namespace ngraph;
+using namespace ov::test;
+
+namespace SubgraphTestsDefinitions {
+/*
+ *           Input(F32) Const(F32)
+ *              |  \     /
+ *              |  Power(F32) Const(I64)
+ *              |      \       /
+ *              |   ReduceMean(F32)
+ *              |       |  Const(F32)
+ *              |       |  /
+ *              |      Add(F32)
+ *              |       |
+ *              |     Sqrt(F32) Const(F32)
+ *              |       |      /
+ *              |    Divide(F32)
+ *              |      /
+ *  Const(F32) Multiply(F32)
+ *         \    |
+ *         Multiply(F32)
+ *              |
+ *          Convert(F16)
+ */
+using RMSNormDecompositionParams = std::tuple<std::vector<InputShape>,             // input shapes
+                                              ov::test::ElementType,               // input precision
+                                              std::map<std::string, std::string>>; // additional config
+
+class RMSNormDecomposition : public testing::WithParamInterface<RMSNormDecompositionParams>, public SubgraphBaseTest {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<RMSNormDecompositionParams> obj) {
+        std::vector<InputShape> input_shapes;
+        ElementType input_precision;
+        std::map<std::string, std::string> additional_config;
+
+        std::tie(input_shapes, input_precision, additional_config) = obj.param;
+
+        std::ostringstream result;
+        result << "IS=(";
+        for (const auto& shape : input_shapes) {
+            result << ov::test::utils::partialShape2str({shape.first}) << "_";
+        }
+        result << ")_TS=";
+        for (const auto& shape : input_shapes) {
+            result << "(";
+            if (!shape.second.empty()) {
+                auto itr = shape.second.begin();
+                do {
+                    result << ov::test::utils::vec2str(*itr);
+                } while (++itr != shape.second.end() && result << "_");
+            }
+            result << ")_";
+        }
+        result << "input_precision=" << input_precision << "_";
+
+        result << "config=(";
+        for (const auto& configEntry : additional_config) {
+            result << configEntry.first << ", " << configEntry.second << ":";
+        }
+        result << ")";
+
+        return result.str();
+    }
+
+protected:
+    std::shared_ptr<ov::Model> init_subgraph(std::vector<ov::PartialShape>& input_shapes,
+                                             const ov::Shape& target_shape,
+                                             const ov::element::Type input_precision) {
+        ov::ParameterVector params{std::make_shared<ov::op::v0::Parameter>(input_precision, input_shapes[0])};
+
+        // x^2
+        auto power_const = ov::opset10::Constant::create(input_precision, {}, {2.f});
+        auto power = std::make_shared<ov::opset10::Power>(params[0], power_const);
+
+        // ReduceMean(x^2,axes)
+        auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1});
+        auto mean = std::make_shared<ov::opset10::ReduceMean>(power, mean_axes, true);
+
+        // ReduceMean(x^2,axes)+eps
+        auto eps = ov::opset10::Constant::create(input_precision, {}, {1e-5f});
+        auto add_eps = std::make_shared<ov::opset10::Add>(mean, eps);
+
+        // Sqrt(ReduceMean(x^2,axes)+eps)
+        auto sqrt = std::make_shared<ov::opset10::Sqrt>(add_eps);
+
+        // 1/Sqrt(ReduceMean(x^2,axes)+eps)
+        auto div_const = ov::opset10::Constant::create(input_precision, {}, {1});
+        auto div = std::make_shared<ov::opset10::Divide>(div_const, sqrt);
+
+        // x * 1/Sqrt(ReduceMean(x^2,axes)+eps)
+        auto mul1 = std::make_shared<ov::opset10::Multiply>(params[0], div);
+
+        // x * 1/Sqrt(ReduceMean(x^2,axes)+eps) * gamma
+        auto dim = *target_shape.rbegin();
+        auto gamma = ngraph::builder::makeConstant<float>(input_precision, ov::Shape{dim}, std::vector<float>{}, true);
+        auto mul2 = std::make_shared<ov::opset10::Multiply>(gamma, mul1);
+
+        auto comp = std::make_shared<ov::opset10::Convert>(mul2, ov::element::f16);
+
+        return std::make_shared<ov::Model>(NodeVector{comp}, params, "RMSNormDecomposition");
+    }
+
+    void SetUp() override {
+        targetDevice = ov::test::utils::DEVICE_GPU;
+
+        std::vector<InputShape> input_shapes;
+        ElementType input_precision;
+        std::map<std::string, std::string> additional_config;
+
+        std::tie(input_shapes, input_precision, additional_config) = GetParam();
+
+        configuration.insert(additional_config.begin(), additional_config.end());
+        init_input_shapes(input_shapes);
+
+        inType = outType = input_precision;
+
+        function = init_subgraph(inputDynamicShapes, targetStaticShapes.front().front(), input_precision);
+    }
+};
+
+TEST_P(RMSNormDecomposition, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    run();
+}
+
+namespace {
+
+const std::vector<ov::test::ElementType> input_precisions = {ov::element::f32, ov::element::f16};
+
+const std::vector<std::vector<InputShape>> input_shapes_basic = {
+    {{{-1, -1, 96}, {{1, 4, 96}}}},
+    {{{-1, -1, -1}, {{1, 2, 16}}}},
+    {{{}, {{1, 2, 6}}}},
+    {{{}, {{1, 2, 18}}}},
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_RMSNormDecomposition_basic,
+                         RMSNormDecomposition,
+                         ::testing::Combine(::testing::ValuesIn(input_shapes_basic),
+                                            ::testing::ValuesIn(input_precisions),
+                                            ::testing::Values(std::map<std::string, std::string>())),
+                         RMSNormDecomposition::getTestCaseName);
+} // namespace
+
+} // namespace SubgraphTestsDefinitions
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/non_max_suppression_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/non_max_suppression_test.cpp
index 909149b05e32fa..d14c0cab8d69a3 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/non_max_suppression_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/non_max_suppression_test.cpp
@@ -709,3 +709,250 @@ TYPED_TEST(non_max_suppression_basic, soft_nms_sigma_cached) {
 TYPED_TEST(non_max_suppression_basic, multiple_outputs_cached) {
     this->test_multiple_outputs(true);
 }
+
+namespace {
+template<typename T, typename T_IND>
+struct NmsRotatedParams {
+    std::string test_name;
+    int num_batches;
+    int num_boxes;
+    int num_classes;
+    std::vector<T> boxes;
+    std::vector<T> scores;
+    int max_output_boxes_per_class;
+    float iou_threshold;
+    float score_threshold;
+    bool sort_result_descending;
+    bool clockwise;
+    std::vector<T_IND> expected_indices;
+    std::vector<T> expected_scores;
+};
+
+template <typename T> float getError();
+
+template<>
+float getError<float>() {
+    return 0.001;
+}
+
+template<>
+float getError<ov::float16>() {
+    return 0.1;
+}
+
+template<typename T, typename T_IND>
+struct nms_rotated_test : public ::testing::TestWithParam<NmsRotatedParams<T, T_IND>> {
+public:
+    void test(bool is_caching_test = false
+    ) {
+        const NmsRotatedParams<T, T_IND> param = testing::TestWithParam<NmsRotatedParams<T, T_IND>>::GetParam();
+        const auto data_type = ov::element::from<T>();
+
+        auto& engine = tests::get_test_engine();
+
+        const auto boxes_layout = layout(ov::PartialShape{param.num_batches, param.num_boxes, 5}, data_type,
+                                         format::bfyx);
+        const auto scores_layout = layout(ov::PartialShape{param.num_batches, param.num_classes, param.num_boxes},
+                                          data_type, format::bfyx);
+
+        const int selected_indices_num = param.num_batches * param.num_classes * param.num_boxes;
+        const auto selected_scores_layout = layout(ov::PartialShape{selected_indices_num/*expected_indices_count*/, 3},
+                                                   data_type, format::bfyx);
+        const auto valid_outputs_layout = layout(ov::PartialShape{1}, cldnn::data_types::i32, format::bfyx);
+
+        const auto boxes_mem = engine.allocate_memory(boxes_layout);
+        tests::set_values(boxes_mem, param.boxes);
+
+        const auto scores_mem = engine.allocate_memory(scores_layout);
+        tests::set_values(scores_mem, param.scores);
+
+        const auto num_per_class_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1))));
+        tests::set_values(num_per_class_mem, {1.f * param.max_output_boxes_per_class});
+
+        const auto iou_threshold_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1))));
+        tests::set_values(iou_threshold_mem, {param.iou_threshold});
+
+        const auto score_threshold_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1))));
+        tests::set_values(score_threshold_mem, {param.score_threshold});
+
+        const auto selected_scores_mem = engine.allocate_memory(selected_scores_layout);
+        const auto valid_outputs_mem = engine.allocate_memory(valid_outputs_layout);
+
+        topology topo;
+        topo.add(input_layout("boxes", boxes_layout));
+        topo.add(input_layout("scores", scores_layout));
+        topo.add(data("num_per_class", num_per_class_mem));
+        topo.add(data("iou_threshold", iou_threshold_mem));
+        topo.add(data("score_threshold", score_threshold_mem));
+        topo.add(mutable_data("selected_scores", selected_scores_mem));
+        topo.add(mutable_data("valid_outputs", valid_outputs_mem));
+        auto nms = non_max_suppression("nms",
+                                       input_info("boxes"),
+                                       input_info("scores"),
+                                       selected_indices_num,
+                                       false,
+                                       param.sort_result_descending,
+                                       "num_per_class",
+                                       "iou_threshold",
+                                       "score_threshold",
+                                       "",
+                                       "selected_scores",
+                                       "valid_outputs");
+        nms.rotation = param.clockwise ? non_max_suppression::Rotation::CLOCKWISE :
+                       non_max_suppression::Rotation::COUNTERCLOCKWISE;
+
+        topo.add(nms);
+
+        ExecutionConfig config = get_test_default_config(engine);
+        config.set_property(ov::intel_gpu::optimize_data(true));
+
+        cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
+        net->set_input_data("boxes", boxes_mem);
+        net->set_input_data("scores", scores_mem);
+        const auto result = net->execute();
+        const auto indices_mem = result.at("nms").get_memory();
+        const cldnn::mem_lock<T_IND> indices_ptr(indices_mem, get_test_stream());
+        const cldnn::mem_lock<T> selected_scores_ptr(selected_scores_mem, get_test_stream());
+        const cldnn::mem_lock<int> valid_outputs_ptr(valid_outputs_mem, get_test_stream());
+
+        const auto expected_valid_outputs = param.expected_indices.size() / 3;
+        const size_t num_valid_outputs = static_cast<size_t>(valid_outputs_ptr[0]);
+
+        EXPECT_EQ(num_valid_outputs, expected_valid_outputs);
+        ASSERT_GE(indices_ptr.size(), param.expected_indices.size());
+        ASSERT_GE(selected_scores_ptr.size(), param.expected_scores.size());
+
+        for (size_t i = 0; i < indices_ptr.size(); ++i) {
+            if (i < num_valid_outputs * 3) {
+                EXPECT_EQ(param.expected_indices[i], indices_ptr[i]) << "at i = " << i;
+                EXPECT_NEAR(param.expected_scores[i], selected_scores_ptr[i], getError<T>()) << "at i = " << i;
+            } else {
+                EXPECT_EQ(indices_ptr[i], -1) << "at i = " << i;
+                EXPECT_NEAR(selected_scores_ptr[i], -1, getError<T>()) << "at i = " << i;
+            }
+        }
+    }
+};
+
+
+struct PrintToStringParamName {
+    template<class T, class T_IND>
+    std::string operator()(const testing::TestParamInfo<NmsRotatedParams<T, T_IND>>& info) {
+        const auto& p = info.param;
+        std::ostringstream result;
+        result << p.test_name << "_";
+        result << "DataType=" << ov::element::Type(ov::element::from<T>());
+        result << "_IndexType=" << ov::element::Type(ov::element::from<T_IND>());
+        return result.str();
+    }
+};
+
+
+using nms_rotated_test_f32_i32 = nms_rotated_test<float, int32_t>;
+using nms_rotated_test_f16_i32 = nms_rotated_test<ov::float16, int32_t>;
+
+TEST_P(nms_rotated_test_f32_i32, basic) {
+    ASSERT_NO_FATAL_FAILURE(test());
+}
+
+TEST_P(nms_rotated_test_f16_i32, basic) {
+    ASSERT_NO_FATAL_FAILURE(test());
+}
+
+template<typename T, typename T_IND>
+std::vector<NmsRotatedParams<T, T_IND>> getNmsRotatedParams() {
+    const std::vector<NmsRotatedParams<T, T_IND>> params = {
+            {"basic",
+             1, 4, 1,
+             std::vector<T>{
+                7.0, 4.0, 8.0,  7.0,  0.5,
+                4.0, 7.0, 9.0,  11.0, 0.6,
+                4.0, 8.0, 10.0, 12.0, 0.3,
+                2.0, 5.0, 13.0, 7.0,  0.6},
+             std::vector<T>{0.65, 0.7, 0.55, 0.96},
+             5000, 0.5f, 0.0f, false, true,
+             std::vector<T_IND>{0, 0, 3, 0, 0, 1, 0, 0, 0},
+             std::vector<T>{0.0, 0.0, 0.96, 0.0, 0.0, 0.7, 0.0, 0.0, 0.65},
+            },
+            {"max_out_2",
+             1, 4, 1,
+             std::vector<T>{
+                7.0, 4.0, 8.0,  7.0,  0.5,
+                4.0, 7.0, 9.0,  11.0, 0.6,
+                4.0, 8.0, 10.0, 12.0, 0.3,
+                2.0, 5.0, 13.0, 7.0,  0.6},
+             std::vector<T>{0.65, 0.7, 0.55, 0.96},
+             2, 0.5f, 0.0f, false, true,
+             std::vector<T_IND>{0, 0, 3, 0, 0, 1},
+             std::vector<T>{0.0, 0.0, 0.96, 0.0, 0.0, 0.7},
+            },
+            {"score_thresold",
+             1, 4, 1,
+             std::vector<T>{
+                7.0, 4.0, 8.0,  7.0,  0.5,
+                4.0, 7.0, 9.0,  11.0, 0.6,
+                4.0, 8.0, 10.0, 12.0, 0.3,
+                2.0, 5.0, 13.0, 7.0,  0.6},
+             std::vector<T>{0.65, 0.7, 0.55, 0.96},
+             5000, 0.5f, 0.67f, false, true,
+             std::vector<T_IND>{0, 0, 3, 0, 0, 1},
+             std::vector<T>{0.0, 0.0, 0.96, 0.0, 0.0, 0.7},
+            },
+            {"iou_thresold_2",
+             1, 4, 1,
+             std::vector<T>{
+                7.0, 4.0, 8.0,  7.0,  0.5,
+                4.0, 7.0, 9.0,  11.0, 0.6,
+                4.0, 8.0, 10.0, 12.0, 0.3,
+                2.0, 5.0, 13.0, 7.0,  0.6},
+             std::vector<T>{0.65, 0.7, 0.55, 0.96},
+             5000, 0.3f, 0.0f, false, true,
+             std::vector<T_IND>{0, 0, 3, 0, 0, 0},
+             std::vector<T>{0.0, 0.0, 0.96, 0.0, 0.0, 0.65},
+            },
+            {"negative_cw",
+             1, 2, 1,
+             std::vector<T>{6.0, 34.0, 4.0, 8.0, -0.7854, 9.0, 32, 2.0, 4.0, 0.0},
+             std::vector<T>{0.8, 0.7},
+             5000, 0.1f, 0.0f, false, true,
+             std::vector<T_IND>{0, 0, 0, 0, 0, 1},
+             std::vector<T>{0.0, 0.0, 0.8, 0.0, 0.0, 0.7}
+            },
+            {"negative_ccw",
+             1, 2, 1,
+             std::vector<T>{6.0, 34.0, 4.0, 8.0, -0.7854, 9.0, 32, 2.0, 4.0, 0.0},
+             std::vector<T>{0.8, 0.7},
+             5000, 0.1f, 0.0f, false, false,
+             std::vector<T_IND>{0, 0, 0},
+             std::vector<T>{0.0, 0.0, 0.8}
+            },
+            {"positive_ccw",
+             1, 2, 1,
+             std::vector<T>{6.0, 34.0, 4.0, 8.0, 0.7854, 9.0, 32, 2.0, 4.0, 0.0},
+             std::vector<T>{0.8, 0.7},
+             5000, 0.1f, 0.0f, false, false,
+             std::vector<T_IND>{0, 0, 0, 0, 0, 1},
+             std::vector<T>{0.0, 0.0, 0.8, 0.0, 0.0, 0.7}
+            },
+            {"positive_cw",
+             1, 2, 1,
+             std::vector<T>{6.0, 34.0, 4.0, 8.0, 0.7854, 9.0, 32, 2.0, 4.0, 0.0},
+             std::vector<T>{0.8, 0.7},
+             5000, 0.1f, 0.0f, false, true,
+             std::vector<T_IND>{0, 0, 0},
+             std::vector<T>{0.0, 0.0, 0.8}
+            }
+    };
+
+    return params;
+}
+INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test,
+                     nms_rotated_test_f32_i32,
+                     ::testing::ValuesIn(getNmsRotatedParams<float, int32_t>()),
+                     PrintToStringParamName());
+
+INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test,
+                     nms_rotated_test_f16_i32,
+                     ::testing::ValuesIn(getNmsRotatedParams<ov::float16, int32_t>()),
+                     PrintToStringParamName());
+} // namespace
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/rms_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/rms_gpu_test.cpp
new file mode 100644
index 00000000000000..deee8418e23fae
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/rms_gpu_test.cpp
@@ -0,0 +1,184 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils.h"
+
+#include <intel_gpu/primitives/input_layout.hpp>
+#include <intel_gpu/primitives/rms.hpp>
+#include "rms_inst.h"
+
+using namespace cldnn;
+using namespace ::tests;
+
+class rms_gpu_test : public ::testing::TestWithParam<cldnn::format> {};
+
+template <typename T>
+void rms_ref(const memory::ptr input, const memory::ptr gamma, memory::ptr output, float epsilon) {
+    auto input_layout = input->get_layout();
+    auto gamma_layout = gamma->get_layout();
+
+    uint32_t batch_size = input_layout.batch();
+    uint32_t feature_size = input_layout.feature();
+    uint32_t y_size = input_layout.spatial(1);
+    uint32_t x_size = input_layout.spatial(0);
+
+    cldnn::mem_lock<T> src(input, get_test_stream());
+    cldnn::mem_lock<T> weight(gamma, get_test_stream());
+    cldnn::mem_lock<T> dst(output, get_test_stream());
+
+    for (uint32_t b = 0; b < batch_size; ++b) {
+        for (uint32_t f = 0; f < feature_size; ++f) {
+            float rms = 0.f;
+            for (uint32_t y = 0; y < y_size; ++y) {
+                for (uint32_t x = 0; x < x_size; ++x) {
+                    auto tensor_src = tensor(batch(b), feature(f), spatial(x, y, 0, 0));
+                    size_t src_offset = input_layout.get_linear_offset(tensor_src);
+                    rms += std::pow(static_cast<float>(src[src_offset]), 2);
+                }
+            }
+            rms /= y_size * x_size;
+            rms += epsilon;
+            rms = std::pow(std::sqrt(rms), -1);
+
+            for (uint32_t y = 0; y < y_size; ++y) {
+                for (uint32_t x = 0; x < x_size; ++x) {
+                    auto tensor_src = tensor(batch(b), feature(f), spatial(x, y, 0, 0));
+                    auto tensor_weight = tensor(batch(b), feature(0), spatial(x, y, 0, 0));
+                    auto tensor_dst = tensor(batch(b), feature(f), spatial(x, y, 0, 0));
+                    size_t src_offset = input_layout.get_linear_offset(tensor_src);
+                    size_t weight_offset = input_layout.get_linear_offset(tensor_weight);
+                    size_t dst_offset = input_layout.get_linear_offset(tensor_dst);
+                    float result = rms * static_cast<float>(src[src_offset]) * static_cast<float>(weight[weight_offset]);
+                    dst[dst_offset] = static_cast<T>(result);
+                }
+            }
+        }
+    }
+}
+
+TEST(rms_gpu_test, rms_test_bfyx_ref) {
+    auto& engine = get_test_engine();
+
+    auto input = engine.allocate_memory({ov::PartialShape{1, 2, 6}, data_types::f32, format::bfyx});
+    auto gamma = engine.allocate_memory({ov::PartialShape{1, 6}, data_types::f32, format::bfyx});
+    auto output_ref = engine.allocate_memory({ov::PartialShape{1, 2, 6}, data_types::f32, format::bfyx});
+
+    set_values(input, {
+        0.001839f, -0.003815f, 0.000961f, 0.002930f, -0.003998f, -0.008057f,
+        0.006744f, -0.000004f, 0.004303f, -0.002380f, 0.000072f, 0.001404f
+    });
+    set_values(gamma, {
+        0.029785f, 0.014038f, 0.003098f, 0.013123f, 0.015137f, 0.009399f
+    });
+
+    rms_ref<float>(input, gamma, output_ref, 1e-5f);
+
+    topology topology;
+    topology.add(input_layout("input", input->get_layout()));
+    topology.add(input_layout("gamma", gamma->get_layout()));
+    topology.add(rms("rms", input_info("input"), input_info("gamma"), 1e-5f));
+
+    network network(engine, topology, get_test_default_config(engine));
+
+    network.set_input_data("input", input);
+    network.set_input_data("gamma", gamma);
+
+    auto outputs = network.execute();
+    ASSERT_EQ(outputs.size(), size_t(1));
+    ASSERT_EQ(outputs.begin()->first, "rms");
+
+    auto output = outputs.begin()->second.get_memory();
+    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+    cldnn::mem_lock<float> output_ref_ptr(output_ref, get_test_stream());
+
+    for (unsigned int i = 0; i < output_ref->count(); ++i) {
+        EXPECT_NEAR(output_ptr[i], output_ref_ptr[i], 1e-3);
+    }
+}
+
+TEST(rms_gpu_test, rms_test_bfyx_opt) {
+    auto& engine = get_test_engine();
+
+    auto input = engine.allocate_memory({ov::PartialShape{1, 2, 16}, data_types::f32, format::bfyx});
+    auto gamma = engine.allocate_memory({ov::PartialShape{1, 16}, data_types::f32, format::bfyx});
+    auto output_ref = engine.allocate_memory({ov::PartialShape{1, 2, 16}, data_types::f32, format::bfyx});
+
+    set_values(input, {
+        0.001839f, -0.003815f, 0.000961f, 0.002930f, -0.003998f, -0.008057f, -0.005402f, -0.002945f,
+        0.006744f, -0.000004f, 0.004303f, -0.002380f, 0.000072f, 0.001404f, 0.000568f, 0.002579f,
+        0.003098f, -0.006989f, -0.000244f, 0.010193f, 0.002899f, -0.005798f, -0.026978f, 0.008789f,
+        0.002258f, 0.006500f, 0.003159f, -0.012329f, 0.026245f, -0.001839f, 0.000259f, 0.002670f
+    });
+    set_values(gamma, {
+        0.029785f, 0.014038f, 0.003098f, 0.013123f, 0.015137f, 0.009399f, 0.008362f, 0.008179f,
+        0.018188f, 0.021973f, 0.005249f, 0.004639f, 0.004272f, 0.020264f, 0.013489f, 0.008789f
+    });
+
+    rms_ref<float>(input, gamma, output_ref, 1e-5f);
+
+    topology topology;
+    topology.add(input_layout("input", input->get_layout()));
+    topology.add(input_layout("gamma", gamma->get_layout()));
+    topology.add(rms("rms", input_info("input"), input_info("gamma"), 1e-5f));
+
+    network network(engine, topology, get_test_default_config(engine));
+
+    network.set_input_data("input", input);
+    network.set_input_data("gamma", gamma);
+
+    auto outputs = network.execute();
+    ASSERT_EQ(outputs.size(), size_t(1));
+    ASSERT_EQ(outputs.begin()->first, "rms");
+
+    auto output = outputs.begin()->second.get_memory();
+    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+    cldnn::mem_lock<float> output_ref_ptr(output_ref, get_test_stream());
+
+    for (unsigned int i = 0; i < output_ref->count(); ++i) {
+        EXPECT_NEAR(output_ptr[i], output_ref_ptr[i], 1e-3);
+    }
+}
+
+TEST(rms_gpu_test, rms_test_bfyx_opt_leftovers) {
+    auto& engine = get_test_engine();
+
+    auto input = engine.allocate_memory({ov::PartialShape{1, 2, 18}, data_types::f32, format::bfyx});
+    auto gamma = engine.allocate_memory({ov::PartialShape{1, 18}, data_types::f32, format::bfyx});
+    auto output_ref = engine.allocate_memory({ov::PartialShape{1, 2, 18}, data_types::f32, format::bfyx});
+
+    set_values(input, {
+        0.001839f, -0.003815f, 0.000961f, 0.002930f, -0.003998f, -0.008057f, -0.005402f, -0.002945f, 0.006744f,
+        -0.000004f, 0.004303f, -0.002380f, 0.000072f, 0.001404f, 0.000568f, 0.002579f, 0.003098f, -0.006989f,
+        -0.000244f, 0.010193f, 0.002899f, -0.005798f, -0.026978f, 0.008789f, 0.002258f, 0.006500f, 0.003159f,
+        -0.012329f, 0.026245f, -0.001839f, 0.000259f, 0.002670f, 0.001419f, 0.001617f,-0.006622f, 0.010864f
+    });
+    set_values(gamma, {
+        0.029785f, 0.014038f, 0.003098f, 0.013123f, 0.015137f, 0.009399f, 0.008362f, 0.008179f, 0.018188f,
+        0.021973f, 0.005249f, 0.004639f, 0.004272f, 0.020264f, 0.013489f, 0.008789f, 0.006653f, 0.010315f
+    });
+
+    rms_ref<float>(input, gamma, output_ref, 1e-5f);
+
+    topology topology;
+    topology.add(input_layout("input", input->get_layout()));
+    topology.add(input_layout("gamma", gamma->get_layout()));
+    topology.add(rms("rms", input_info("input"), input_info("gamma"), 1e-5f));
+
+    network network(engine, topology, get_test_default_config(engine));
+
+    network.set_input_data("input", input);
+    network.set_input_data("gamma", gamma);
+
+    auto outputs = network.execute();
+    ASSERT_EQ(outputs.size(), size_t(1));
+    ASSERT_EQ(outputs.begin()->first, "rms");
+
+    auto output = outputs.begin()->second.get_memory();
+    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+    cldnn::mem_lock<float> output_ref_ptr(output_ref, get_test_stream());
+
+    for (unsigned int i = 0; i < output_ref->count(); ++i) {
+        EXPECT_NEAR(output_ptr[i], output_ref_ptr[i], 1e-3);
+    }
+}
diff --git a/src/plugins/intel_gpu/tests/unit/transformations/rms_norm_decomposition_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/rms_norm_decomposition_test.cpp
new file mode 100644
index 00000000000000..26d8638d2b904e
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/unit/transformations/rms_norm_decomposition_test.cpp
@@ -0,0 +1,144 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+
+#include <openvino/core/model.hpp>
+#include <openvino/opsets/opset10.hpp>
+#include <intel_gpu/op/rms.hpp>
+#include <plugin/transformations/rms_fusion.hpp>
+#include <transformations/utils/utils.hpp>
+#include <openvino/pass/manager.hpp>
+
+#include "common_test_utils/ov_test_utils.hpp"
+
+using namespace testing;
+using namespace ov::intel_gpu;
+
+TEST_F(TransformationTestsF, RMSNormFusionTest1) {
+    {
+        auto input = std::make_shared<ov::opset10::Parameter>(ov::element::f32, ov::Shape{1, 2, 6});
+        auto power_const = ov::opset10::Constant::create(ov::element::f32, {}, {2.f});
+        auto power = std::make_shared<ov::opset10::Power>(input, power_const);
+        auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1});
+        auto mean = std::make_shared<ov::opset10::ReduceMean>(power, mean_axes, true);
+        auto eps = ov::opset10::Constant::create(ov::element::f32, {}, {1e-5f});
+        auto add_eps = std::make_shared<ov::opset10::Add>(mean, eps);
+        auto sqrt = std::make_shared<ov::opset10::Sqrt>(add_eps);
+        auto div_const = ov::opset10::Constant::create(ov::element::f32, {}, {-1});
+        auto div = std::make_shared<ov::opset10::Power>(sqrt, div_const);
+        auto mul1 = std::make_shared<ov::opset10::Multiply>(input, div);
+        auto gamma = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f});
+        auto mul2 = std::make_shared<ov::opset10::Multiply>(gamma, mul1);
+        auto comp = std::make_shared<ov::opset10::Convert>(mul2, ov::element::f16);
+
+        model = std::make_shared<ov::Model>(ov::NodeVector{comp}, ov::ParameterVector{input});
+        manager.register_pass<RMSFusion>();
+    }
+    {
+        auto input = std::make_shared<ov::opset10::Parameter>(ov::element::f32, ov::Shape{1, 2, 6});
+        auto rms_const = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f});
+        auto rms = std::make_shared<op::RMS>(input, rms_const, 1e-5f, ov::element::f16);
+
+        model_ref = std::make_shared<ov::Model>(ov::NodeVector{rms}, ov::ParameterVector{input});
+    }
+}
+
+TEST_F(TransformationTestsF, RMSNormFusionTest2) {
+    {
+        auto input = std::make_shared<ov::opset10::Parameter>(ov::element::f32, ov::Shape{1, 2, 6});
+        auto power_const = ov::opset10::Constant::create(ov::element::f32, {}, {2.f});
+        auto power = std::make_shared<ov::opset10::Power>(input, power_const);
+        auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1});
+        auto mean = std::make_shared<ov::opset10::ReduceMean>(power, mean_axes, true);
+        auto eps = ov::opset10::Constant::create(ov::element::f32, {}, {1e-5f});
+        auto add_eps = std::make_shared<ov::opset10::Add>(mean, eps);
+        auto sqrt = std::make_shared<ov::opset10::Sqrt>(add_eps);
+        auto div_const = ov::opset10::Constant::create(ov::element::f32, {}, {1});
+        auto div = std::make_shared<ov::opset10::Divide>(div_const, sqrt);
+        auto mul1 = std::make_shared<ov::opset10::Multiply>(input, div);
+        auto gamma = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f});
+        auto mul2 = std::make_shared<ov::opset10::Multiply>(gamma, mul1);
+        auto comp = std::make_shared<ov::opset10::Convert>(mul2, ov::element::f16);
+
+        model = std::make_shared<ov::Model>(ov::NodeVector{comp}, ov::ParameterVector{input});
+        manager.register_pass<RMSFusion>();
+    }
+}
+
+TEST_F(TransformationTestsF, RMSNormFusionTest3) {
+    {
+        auto input = std::make_shared<ov::opset10::Parameter>(ov::element::f32, ov::Shape{1, 2, 6});
+        auto power_const = ov::opset10::Constant::create(ov::element::f32, {}, {2.f});
+        auto power = std::make_shared<ov::opset10::Power>(input, power_const);
+        auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1});
+        auto mean = std::make_shared<ov::opset10::ReduceMean>(power, mean_axes, true);
+        auto eps = ov::opset10::Constant::create(ov::element::f32, {}, {1e-5f});
+        auto add_eps = std::make_shared<ov::opset10::Add>(mean, eps);
+        auto sqrt = std::make_shared<ov::opset10::Sqrt>(add_eps);
+        auto div_const = ov::opset10::Constant::create(ov::element::f32, {}, {1});
+        auto div = std::make_shared<ov::opset10::Power>(sqrt, div_const);
+        auto mul1 = std::make_shared<ov::opset10::Multiply>(input, div);
+        auto gamma = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f});
+        auto mul2 = std::make_shared<ov::opset10::Multiply>(gamma, mul1);
+        auto comp = std::make_shared<ov::opset10::Convert>(mul2, ov::element::f16);
+
+        model = std::make_shared<ov::Model>(ov::NodeVector{comp}, ov::ParameterVector{input});
+        manager.register_pass<RMSFusion>();
+    }
+}
+
+TEST_F(TransformationTestsF, RMSNormFusionTest4) {
+    {
+        auto input = std::make_shared<ov::opset10::Parameter>(ov::element::f32, ov::PartialShape{-1, -1, 6});
+        auto power_const = ov::opset10::Constant::create(ov::element::f32, {}, {2.f});
+        auto power = std::make_shared<ov::opset10::Power>(input, power_const);
+        auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1});
+        auto mean = std::make_shared<ov::opset10::ReduceMean>(power, mean_axes, true);
+        auto eps = ov::opset10::Constant::create(ov::element::f32, {}, {1e-5f});
+        auto add_eps = std::make_shared<ov::opset10::Add>(mean, eps);
+        auto sqrt = std::make_shared<ov::opset10::Sqrt>(add_eps);
+        auto div_const = ov::opset10::Constant::create(ov::element::f32, {}, {1});
+        auto div = std::make_shared<ov::opset10::Divide>(div_const, sqrt);
+        auto mul1 = std::make_shared<ov::opset10::Multiply>(input, div);
+        auto gamma = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f});
+        auto mul2 = std::make_shared<ov::opset10::Multiply>(gamma, mul1);
+        auto comp = std::make_shared<ov::opset10::Convert>(mul2, ov::element::f16);
+
+        model = std::make_shared<ov::Model>(ov::NodeVector{comp}, ov::ParameterVector{input});
+        manager.register_pass<RMSFusion>();
+    }
+}
+
+TEST_F(TransformationTestsF, RMSNormFusionTest5) {
+    {
+        auto input = std::make_shared<ov::opset10::Parameter>(ov::element::f32, ov::PartialShape{-1, -1, 6});
+        auto power_const = ov::opset10::Constant::create(ov::element::f32, {}, {2.f});
+        auto power = std::make_shared<ov::opset10::Power>(input, power_const);
+        auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1});
+        auto mean = std::make_shared<ov::opset10::ReduceMean>(power, mean_axes, true);
+        auto eps = ov::opset10::Constant::create(ov::element::f32, {}, {1e-5f});
+        auto add_eps = std::make_shared<ov::opset10::Add>(mean, eps);
+        auto sqrt = std::make_shared<ov::opset10::Sqrt>(add_eps);
+        auto div_const = ov::opset10::Constant::create(ov::element::f32, {}, {-1});
+        auto div = std::make_shared<ov::opset10::Power>(sqrt, div_const);
+        auto mul1 = std::make_shared<ov::opset10::Multiply>(input, div);
+        auto gamma = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f});
+        auto mul2 = std::make_shared<ov::opset10::Multiply>(gamma, mul1);
+        auto comp = std::make_shared<ov::opset10::Convert>(mul2, ov::element::f16);
+
+        model = std::make_shared<ov::Model>(ov::NodeVector{comp}, ov::ParameterVector{input});
+        manager.register_pass<RMSFusion>();
+    }
+    {
+        auto input = std::make_shared<ov::opset10::Parameter>(ov::element::f32, ov::PartialShape{-1, -1, 6});
+        auto rms_const = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f});
+        auto rms = std::make_shared<op::RMS>(input, rms_const, 1e-5f, ov::element::f16);
+
+        model_ref = std::make_shared<ov::Model>(ov::NodeVector{rms}, ov::ParameterVector{input});
+    }
+}
diff --git a/src/plugins/template/backend/ops/bitwise_and.cpp b/src/plugins/template/backend/ops/bitwise_and.cpp
index d0e5d05b11360d..b6686175377aac 100644
--- a/src/plugins/template/backend/ops/bitwise_and.cpp
+++ b/src/plugins/template/backend/ops/bitwise_and.cpp
@@ -14,9 +14,9 @@ template <element::Type_t ET>
 bool evaluate(const std::shared_ptr<ov::op::v13::BitwiseAnd>& node,
               ov::TensorVector& outputs,
               const ov::TensorVector& inputs) {
-    OPENVINO_ASSERT(inputs.size() == 2);
     OPENVINO_ASSERT(outputs.size() == 1);
-    outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs[0].get_shape(), inputs[1].get_shape()));
+
+    outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs));
     using T = typename ov::element_type_traits<ET>::value_type;
     ov::reference::bitwise_and(inputs[0].data<const T>(),
                                inputs[1].data<const T>(),
diff --git a/src/plugins/template/backend/ops/bitwise_or.cpp b/src/plugins/template/backend/ops/bitwise_or.cpp
index fe163edeccb3a1..69f45d2916731d 100644
--- a/src/plugins/template/backend/ops/bitwise_or.cpp
+++ b/src/plugins/template/backend/ops/bitwise_or.cpp
@@ -14,9 +14,9 @@ template <element::Type_t ET>
 bool evaluate(const std::shared_ptr<ov::op::v13::BitwiseOr>& node,
               ov::TensorVector& outputs,
               const ov::TensorVector& inputs) {
-    OPENVINO_ASSERT(inputs.size() == 2);
     OPENVINO_ASSERT(outputs.size() == 1);
-    outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs[0].get_shape(), inputs[1].get_shape()));
+
+    outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs));
     using T = typename ov::element_type_traits<ET>::value_type;
     ov::reference::bitwise_or(inputs[0].data<const T>(),
                               inputs[1].data<const T>(),
diff --git a/src/plugins/template/backend/ops/bitwise_xor.cpp b/src/plugins/template/backend/ops/bitwise_xor.cpp
index 3fa98775a05e18..43a15c60b5e0a8 100644
--- a/src/plugins/template/backend/ops/bitwise_xor.cpp
+++ b/src/plugins/template/backend/ops/bitwise_xor.cpp
@@ -14,9 +14,9 @@ template <element::Type_t ET>
 bool evaluate(const std::shared_ptr<ov::op::v13::BitwiseXor>& node,
               ov::TensorVector& outputs,
               const ov::TensorVector& inputs) {
-    OPENVINO_ASSERT(inputs.size() == 2);
     OPENVINO_ASSERT(outputs.size() == 1);
-    outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs[0].get_shape(), inputs[1].get_shape()));
+
+    outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs));
     using T = typename ov::element_type_traits<ET>::value_type;
     ov::reference::bitwise_xor(inputs[0].data<const T>(),
                                inputs[1].data<const T>(),
diff --git a/src/plugins/template/tests/functional/op_reference/convert.cpp b/src/plugins/template/tests/functional/op_reference/convert.cpp
index 0f3e47148790be..b6195744c9c6f3 100644
--- a/src/plugins/template/tests/functional/op_reference/convert.cpp
+++ b/src/plugins/template/tests/functional/op_reference/convert.cpp
@@ -103,7 +103,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{2, 2},
                       ov::element::u4,
                       ov::element::f32,
-                      std::vector<uint8_t>{0xFB, 0x0A},
+                      std::vector<uint8_t>{0xBF, 0xA0},
                       std::vector<float>{15.0f, 11.0f, 0.0f, 10.0f},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -134,7 +134,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{2, 2},
                       ov::element::i4,
                       ov::element::f32,
-                      std::vector<uint8_t>{0xFE, 0xF2},
+                      std::vector<uint8_t>{0xEF, 0x2F},
                       std::vector<float>{-1.0f, -2.0f, -1.0f, 2.0f},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -245,7 +245,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u1,
                       ov::element::i4,
                       std::vector<uint8_t>{0xA0},
-                      std::vector<uint8_t>{0x10, 0x10},
+                      std::vector<uint8_t>{0x01, 0x01},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -261,7 +261,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u8,
                       ov::element::i4,
                       std::vector<uint8_t>{1, 2, 0, 3},
-                      std::vector<uint8_t>{0x12, 0x03},
+                      std::vector<uint8_t>{0x21, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -269,7 +269,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u16,
                       ov::element::i4,
                       std::vector<uint16_t>{1, 2, 0, 3},
-                      std::vector<uint8_t>{0x12, 0x03},
+                      std::vector<uint8_t>{0x21, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -277,7 +277,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u32,
                       ov::element::i4,
                       std::vector<uint32_t>{1, 2, 0, 3},
-                      std::vector<uint8_t>{0x12, 0x03},
+                      std::vector<uint8_t>{0x21, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -285,7 +285,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u64,
                       ov::element::i4,
                       std::vector<uint64_t>{1, 2, 0, 3},
-                      std::vector<uint8_t>{0x12, 0x03},
+                      std::vector<uint8_t>{0x21, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -301,7 +301,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::i8,
                       ov::element::i4,
                       std::vector<int8_t>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -309,7 +309,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::i16,
                       ov::element::i4,
                       std::vector<int16_t>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -317,7 +317,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::i32,
                       ov::element::i4,
                       std::vector<int32_t>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -325,7 +325,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::i64,
                       ov::element::i4,
                       std::vector<int64_t>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -333,7 +333,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::f16,
                       ov::element::i4,
                       std::vector<ov::float16>{-1, -2, 0, 3},
-                      std::vector<uint8_t>{0xFE, 0x03},
+                      std::vector<uint8_t>{0xEF, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -341,7 +341,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::bf16,
                       ov::element::i4,
                       std::vector<ov::bfloat16>{-1, -2, 0, 3},
-                      std::vector<uint8_t>{0xFE, 0x03},
+                      std::vector<uint8_t>{0xEF, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -349,7 +349,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::f32,
                       ov::element::i4,
                       std::vector<float>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         // destination i8
@@ -364,7 +364,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::u4,
                       ov::element::i8,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<int8_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -395,7 +395,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::i4,
                       ov::element::i8,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<int8_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -452,7 +452,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::u4,
                       ov::element::i16,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<int16_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -483,7 +483,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::i4,
                       ov::element::i16,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<int16_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -540,7 +540,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::u4,
                       ov::element::i32,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<int32_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -571,7 +571,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::i4,
                       ov::element::i32,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<int32_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -628,7 +628,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::u4,
                       ov::element::i64,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<int64_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -659,7 +659,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::i4,
                       ov::element::i64,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<int64_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -718,7 +718,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{8},
                       ov::element::u4,
                       ov::element::u1,
-                      std::vector<uint8_t>{0x10, 0x01, 0x00, 0x00},
+                      std::vector<uint8_t>{0x01, 0x10, 0x00, 0x00},
                       std::vector<uint8_t>{0x90},
                       8,
                       8),
@@ -758,7 +758,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{8},
                       ov::element::i4,
                       ov::element::u1,
-                      std::vector<uint8_t>{0x10, 0x01, 0x00, 0x00},
+                      std::vector<uint8_t>{0x01, 0x10, 0x00, 0x00},
                       std::vector<uint8_t>{0x90},
                       8,
                       8),
@@ -825,7 +825,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u1,
                       ov::element::u4,
                       std::vector<uint8_t>{0xA0},
-                      std::vector<uint8_t>{0x10, 0x10},
+                      std::vector<uint8_t>{0x01, 0x01},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -841,7 +841,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u8,
                       ov::element::u4,
                       std::vector<uint8_t>{1, 2, 0, 3},
-                      std::vector<uint8_t>{0x12, 0x03},
+                      std::vector<uint8_t>{0x21, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -849,7 +849,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u16,
                       ov::element::u4,
                       std::vector<uint16_t>{1, 2, 0, 3},
-                      std::vector<uint8_t>{0x12, 0x03},
+                      std::vector<uint8_t>{0x21, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -857,7 +857,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u32,
                       ov::element::u4,
                       std::vector<uint32_t>{1, 2, 0, 3},
-                      std::vector<uint8_t>{0x12, 0x03},
+                      std::vector<uint8_t>{0x21, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -865,7 +865,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u64,
                       ov::element::u4,
                       std::vector<uint64_t>{1, 2, 0, 3},
-                      std::vector<uint8_t>{0x12, 0x03},
+                      std::vector<uint8_t>{0x21, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -881,7 +881,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::i8,
                       ov::element::u4,
                       std::vector<int8_t>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -889,7 +889,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::i16,
                       ov::element::u4,
                       std::vector<int16_t>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -897,7 +897,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::i32,
                       ov::element::u4,
                       std::vector<int32_t>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -905,7 +905,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::i64,
                       ov::element::u4,
                       std::vector<int64_t>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -913,7 +913,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::f16,
                       ov::element::u4,
                       std::vector<ov::float16>{-1, -2, 0, 3},
-                      std::vector<uint8_t>{0xFE, 0x03},
+                      std::vector<uint8_t>{0xEF, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -921,7 +921,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::bf16,
                       ov::element::u4,
                       std::vector<ov::bfloat16>{-1, -2, 0, 3},
-                      std::vector<uint8_t>{0xFE, 0x03},
+                      std::vector<uint8_t>{0xEF, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -929,7 +929,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::f32,
                       ov::element::u4,
                       std::vector<float>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
 
@@ -945,7 +945,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::u4,
                       ov::element::u8,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<uint8_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -976,7 +976,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::i4,
                       ov::element::u8,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<uint8_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -1034,7 +1034,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::u4,
                       ov::element::u16,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<uint16_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -1065,7 +1065,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::i4,
                       ov::element::u16,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<uint16_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -1123,7 +1123,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::u4,
                       ov::element::u32,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<uint32_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -1154,7 +1154,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::i4,
                       ov::element::u32,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<uint32_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -1211,7 +1211,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::u4,
                       ov::element::u64,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<uint64_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
@@ -1242,7 +1242,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::i4,
                       ov::element::u64,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<uint64_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT,
diff --git a/src/plugins/template/tests/functional/op_reference/convert_like.cpp b/src/plugins/template/tests/functional/op_reference/convert_like.cpp
index b46fe98af030c2..4ddf3dda276b92 100644
--- a/src/plugins/template/tests/functional/op_reference/convert_like.cpp
+++ b/src/plugins/template/tests/functional/op_reference/convert_like.cpp
@@ -6,6 +6,8 @@
 
 #include <gtest/gtest.h>
 
+#include <tuple>
+
 #include "conversion.hpp"
 
 using namespace ov;
@@ -101,7 +103,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{2, 2},
                       ov::element::u4,
                       ov::element::f32,
-                      std::vector<uint8_t>{0xFB, 0x0A},
+                      std::vector<uint8_t>{0xBF, 0xA0},
                       std::vector<float>{15.0f, 11.0f, 0.0f, 10.0f},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -132,7 +134,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{2, 2},
                       ov::element::i4,
                       ov::element::f32,
-                      std::vector<uint8_t>{0xFE, 0xF2},
+                      std::vector<uint8_t>{0xEF, 0x2F},
                       std::vector<float>{-1.0f, -2.0f, -1.0f, 2.0f},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -243,7 +245,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u1,
                       ov::element::i4,
                       std::vector<uint8_t>{0xA0},
-                      std::vector<uint8_t>{0x10, 0x10},
+                      std::vector<uint8_t>{0x01, 0x01},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -259,7 +261,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u8,
                       ov::element::i4,
                       std::vector<uint8_t>{1, 2, 0, 3},
-                      std::vector<uint8_t>{0x12, 0x03},
+                      std::vector<uint8_t>{0x21, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -267,7 +269,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u16,
                       ov::element::i4,
                       std::vector<uint16_t>{1, 2, 0, 3},
-                      std::vector<uint8_t>{0x12, 0x03},
+                      std::vector<uint8_t>{0x21, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -275,7 +277,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u32,
                       ov::element::i4,
                       std::vector<uint32_t>{1, 2, 0, 3},
-                      std::vector<uint8_t>{0x12, 0x03},
+                      std::vector<uint8_t>{0x21, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -283,7 +285,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u64,
                       ov::element::i4,
                       std::vector<uint64_t>{1, 2, 0, 3},
-                      std::vector<uint8_t>{0x12, 0x03},
+                      std::vector<uint8_t>{0x21, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -299,7 +301,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::i8,
                       ov::element::i4,
                       std::vector<int8_t>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -307,7 +309,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::i16,
                       ov::element::i4,
                       std::vector<int16_t>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -315,7 +317,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::i32,
                       ov::element::i4,
                       std::vector<int32_t>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -323,7 +325,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::i64,
                       ov::element::i4,
                       std::vector<int64_t>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -331,7 +333,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::f16,
                       ov::element::i4,
                       std::vector<ov::float16>{-1, -2, 0, 3},
-                      std::vector<uint8_t>{0xFE, 0x03},
+                      std::vector<uint8_t>{0xEF, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -339,7 +341,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::bf16,
                       ov::element::i4,
                       std::vector<ov::bfloat16>{-1, -2, 0, 3},
-                      std::vector<uint8_t>{0xFE, 0x03},
+                      std::vector<uint8_t>{0xEF, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -347,7 +349,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::f32,
                       ov::element::i4,
                       std::vector<float>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         // destination i8
@@ -362,7 +364,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::u4,
                       ov::element::i8,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<int8_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -393,7 +395,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::i4,
                       ov::element::i8,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<int8_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -436,7 +438,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::f32,
                       ov::element::i8,
-                      std::vector<float>{-1, -2, 2, 3},
+                      std::vector<float>{-1, -2, 2.2, 3.8},
                       std::vector<int8_t>{-1, -2, 2, 3}),
         // destination i16
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -450,7 +452,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::u4,
                       ov::element::i16,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<int16_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -481,7 +483,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::i4,
                       ov::element::i16,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<int16_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -524,7 +526,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::f32,
                       ov::element::i16,
-                      std::vector<float>{-1, -2, 2, 3},
+                      std::vector<float>{-1, -2, 2.2, 3.8},
                       std::vector<int16_t>{-1, -2, 2, 3}),
         // destination i32
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -538,7 +540,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::u4,
                       ov::element::i32,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<int32_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -569,7 +571,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::i4,
                       ov::element::i32,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<int32_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -612,7 +614,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::f32,
                       ov::element::i32,
-                      std::vector<float>{-1, -2, 2, 3},
+                      std::vector<float>{-1, -2, 2.2, 3.8},
                       std::vector<int32_t>{-1, -2, 2, 3}),
         // destination i64
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -626,7 +628,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::u4,
                       ov::element::i64,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<int64_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -657,7 +659,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::i4,
                       ov::element::i64,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<int64_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -700,7 +702,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::f32,
                       ov::element::i64,
-                      std::vector<float>{-1, -2, 2, 3},
+                      std::vector<float>{-1, -2, 2.2, 3.8},
                       std::vector<int64_t>{-1, -2, 2, 3}),
 
         // destination u1
@@ -716,7 +718,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{8},
                       ov::element::u4,
                       ov::element::u1,
-                      std::vector<uint8_t>{0x10, 0x01, 0x00, 0x00},
+                      std::vector<uint8_t>{0x01, 0x10, 0x00, 0x00},
                       std::vector<uint8_t>{0x90},
                       8,
                       8),
@@ -756,7 +758,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{8},
                       ov::element::i4,
                       ov::element::u1,
-                      std::vector<uint8_t>{0x10, 0x01, 0x00, 0x00},
+                      std::vector<uint8_t>{0x01, 0x10, 0x00, 0x00},
                       std::vector<uint8_t>{0x90},
                       8,
                       8),
@@ -823,7 +825,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u1,
                       ov::element::u4,
                       std::vector<uint8_t>{0xA0},
-                      std::vector<uint8_t>{0x10, 0x10},
+                      std::vector<uint8_t>{0x01, 0x01},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -839,7 +841,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u8,
                       ov::element::u4,
                       std::vector<uint8_t>{1, 2, 0, 3},
-                      std::vector<uint8_t>{0x12, 0x03},
+                      std::vector<uint8_t>{0x21, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -847,7 +849,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u16,
                       ov::element::u4,
                       std::vector<uint16_t>{1, 2, 0, 3},
-                      std::vector<uint8_t>{0x12, 0x03},
+                      std::vector<uint8_t>{0x21, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -855,7 +857,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u32,
                       ov::element::u4,
                       std::vector<uint32_t>{1, 2, 0, 3},
-                      std::vector<uint8_t>{0x12, 0x03},
+                      std::vector<uint8_t>{0x21, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -863,7 +865,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::u64,
                       ov::element::u4,
                       std::vector<uint64_t>{1, 2, 0, 3},
-                      std::vector<uint8_t>{0x12, 0x03},
+                      std::vector<uint8_t>{0x21, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -879,7 +881,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::i8,
                       ov::element::u4,
                       std::vector<int8_t>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -887,7 +889,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::i16,
                       ov::element::u4,
                       std::vector<int16_t>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -895,7 +897,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::i32,
                       ov::element::u4,
                       std::vector<int32_t>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -903,7 +905,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::i64,
                       ov::element::u4,
                       std::vector<int64_t>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -911,7 +913,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::f16,
                       ov::element::u4,
                       std::vector<ov::float16>{-1, -2, 0, 3},
-                      std::vector<uint8_t>{0xFE, 0x03},
+                      std::vector<uint8_t>{0xEF, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -919,7 +921,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::bf16,
                       ov::element::u4,
                       std::vector<ov::bfloat16>{-1, -2, 0, 3},
-                      std::vector<uint8_t>{0xFE, 0x03},
+                      std::vector<uint8_t>{0xEF, 0x30},
                       4,
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -927,7 +929,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::element::f32,
                       ov::element::u4,
                       std::vector<float>{-1, -2, 2, 3},
-                      std::vector<uint8_t>{0xFE, 0x23},
+                      std::vector<uint8_t>{0xEF, 0x32},
                       4,
                       4),
 
@@ -943,7 +945,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::u4,
                       ov::element::u8,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<uint8_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -974,7 +976,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::i4,
                       ov::element::u8,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<uint8_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -1017,7 +1019,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::f32,
                       ov::element::u8,
-                      std::vector<float>{1, 2, 2, 3},
+                      std::vector<float>{1, 2, 2.2, 3.8},
                       std::vector<uint8_t>{1, 2, 2, 3}),
 
         // destination u16
@@ -1032,7 +1034,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::u4,
                       ov::element::u16,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<uint16_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -1063,7 +1065,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::i4,
                       ov::element::u16,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<uint16_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -1106,7 +1108,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::f32,
                       ov::element::u16,
-                      std::vector<float>{1, 2, 2, 3},
+                      std::vector<float>{1, 2, 2.2, 3.8},
                       std::vector<uint16_t>{1, 2, 2, 3}),
 
         // destination u32
@@ -1121,7 +1123,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::u4,
                       ov::element::u32,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<uint32_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -1152,7 +1154,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::i4,
                       ov::element::u32,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<uint32_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -1195,7 +1197,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::f32,
                       ov::element::u32,
-                      std::vector<float>{1, 2, 2, 3},
+                      std::vector<float>{1, 2, 2.2, 3.8},
                       std::vector<uint32_t>{1, 2, 2, 3}),
         // destination u64
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -1209,7 +1211,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::u4,
                       ov::element::u64,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<uint64_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -1240,7 +1242,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::i4,
                       ov::element::u64,
-                      std::vector<uint8_t>{0x21, 0x43},
+                      std::vector<uint8_t>{0x12, 0x34},
                       std::vector<uint64_t>{2, 1, 4, 3},
                       4),
         ConvertParams(ConversionTypes::CONVERT_LIKE,
@@ -1283,7 +1285,7 @@ INSTANTIATE_TEST_SUITE_P(
                       ov::PartialShape{4},
                       ov::element::f32,
                       ov::element::u64,
-                      std::vector<float>{1, 2, 2, 3},
+                      std::vector<float>{1, 2, 2.2, 3.8},
                       std::vector<uint64_t>{1, 2, 2, 3})),
     ReferenceConversionLayerTest::getTestCaseName);
 }  // namespace
diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp
index 67a57298da3d95..0ec25023f3801c 100644
--- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp
+++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp
@@ -132,7 +132,7 @@ TEST_F(OpCacheUnitTest, update_cache_by_model) {
             ASSERT_EQ(meta.get_model_info().begin()->second.model_priority, 3);
             // check input_info
             ASSERT_EQ(meta.get_input_info().size(), 1);
-            ASSERT_EQ(meta.get_input_info().begin()->first, "Convert-1_0");
+            ASSERT_EQ(meta.get_input_info().begin()->first, "Convert-0_0");
             ASSERT_EQ(meta.get_input_info().begin()->second.ranges.max, DEFAULT_MAX_VALUE);
             ASSERT_EQ(meta.get_input_info().begin()->second.ranges.min, DEFAULT_MIN_VALUE);
             ASSERT_EQ(meta.get_input_info().begin()->second.is_const, false);
@@ -149,7 +149,7 @@ TEST_F(OpCacheUnitTest, update_cache_by_model) {
             ASSERT_EQ(meta.get_model_info().begin()->second.model_priority, 1);
             // check input_info
             ASSERT_EQ(meta.get_input_info().size(), 1);
-            ASSERT_EQ(meta.get_input_info().begin()->first, "ShapeOf-1_0");
+            ASSERT_EQ(meta.get_input_info().begin()->first, "ShapeOf-0_0");
             ASSERT_EQ(meta.get_input_info().begin()->second.ranges.max, DEFAULT_MAX_VALUE);
             ASSERT_EQ(meta.get_input_info().begin()->second.ranges.min, DEFAULT_MIN_VALUE);
             ASSERT_EQ(meta.get_input_info().begin()->second.is_const, false);
@@ -162,7 +162,7 @@ TEST_F(OpCacheUnitTest, serialize_op) {
     ASSERT_TRUE(this->serialize_op({convert_node, test_meta}));
     ASSERT_TRUE(ov::util::directory_exists(test_artifacts_dir));
     auto serialized_model_path = ov::util::path_join({test_artifacts_dir,
-        "operation", "static", "Convert-1", "f16", "Convert-1_0.xml"});
+        "operation", "static", "Convert-0", "f16", "Convert-0_0.xml"});
     ASSERT_TRUE(ov::util::file_exists(serialized_model_path));
     auto core = ov::Core();
     auto serialized_model = core.read_model(serialized_model_path);
@@ -171,7 +171,7 @@ TEST_F(OpCacheUnitTest, serialize_op) {
 }
 
 TEST_F(OpCacheUnitTest, get_rel_serilization_dir) {
-    auto ref_path = ov::util::path_join({"operation", "static", "Convert-1", "f16"});
+    auto ref_path = ov::util::path_join({"operation", "static", "Convert-0", "f16"});
     auto original_path = this->get_rel_serilization_dir(convert_node);
     ASSERT_EQ(ref_path, original_path);
 }
diff --git a/src/tests/functional/plugin/shared/include/single_layer_tests/nms_rotated.hpp b/src/tests/functional/plugin/shared/include/single_layer_tests/nms_rotated.hpp
new file mode 100644
index 00000000000000..d02a115acaeb18
--- /dev/null
+++ b/src/tests/functional/plugin/shared/include/single_layer_tests/nms_rotated.hpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/single_layer/nms_rotated.hpp"
+
+namespace LayerTestsDefinitions {
+
+TEST_P(NmsRotatedLayerTest, CompareWithRefs) {
+    Run();
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/transpose_matmul_fusion.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/transpose_matmul_fusion.hpp
new file mode 100644
index 00000000000000..f253419ca924f4
--- /dev/null
+++ b/src/tests/functional/plugin/shared/include/subgraph_tests/transpose_matmul_fusion.hpp
@@ -0,0 +1,17 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/subgraph/transpose_matmul_fusion.hpp"
+
+namespace ov {
+namespace test {
+
+TEST_P(TransposeMatMulFusion, CompareWithRefs){
+    run();
+};
+
+}  // namespace test
+}  // namespace ov
diff --git a/src/tests/functional/shared_test_classes/CMakeLists.txt b/src/tests/functional/shared_test_classes/CMakeLists.txt
index a4f46b241437b0..0aa8d0f33592c1 100644
--- a/src/tests/functional/shared_test_classes/CMakeLists.txt
+++ b/src/tests/functional/shared_test_classes/CMakeLists.txt
@@ -12,6 +12,8 @@ ov_add_target(
         INCLUDES
             PUBLIC
                 "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
+            PRIVATE
+                "${OpenVINO_SOURCE_DIR}/src/plugins/template/include"
         ADDITIONAL_SOURCE_DIRS
             ${CMAKE_CURRENT_SOURCE_DIR}/src
         LINK_LIBRARIES
@@ -27,4 +29,4 @@ ov_build_target_faster(${TARGET_NAME}
 # install & export
 
 ov_developer_package_export_targets(TARGET ${TARGET_NAME}
-                                    INSTALL_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include/")
+                                    INSTALL_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include/")
\ No newline at end of file
diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp
index 5ca0b6531a39f3..c76cd8fbc1bc72 100644
--- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp
@@ -34,15 +34,14 @@ class SubgraphBaseTest : public ov::test::TestsCommon {
 
 protected:
     virtual void compare(const std::vector<ov::Tensor>& expected, const std::vector<ov::Tensor>& actual);
-
-    virtual void configure_model();
     virtual void compile_model();
-    virtual void init_ref_function(std::shared_ptr<ov::Model>& funcRef,
-                                   const std::vector<ov::Shape>& targetInputStaticShapes);
-    virtual void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes);
     virtual void infer();
     virtual void validate();
+    virtual void configure_model();;
+    virtual void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes);
 
+    void update_ref_model();
+    void match_parameters();
     void init_input_shapes(const std::vector<InputShape>& shapes);
 
     void TearDown() override {
@@ -65,6 +64,10 @@ class SubgraphBaseTest : public ov::test::TestsCommon {
     ov::CompiledModel compiledModel;
     ov::InferRequest inferRequest;
 
+    // to provide correct inputs for reference function
+    std::map<std::shared_ptr<ov::op::v0::Parameter>, std::shared_ptr<ov::op::v0::Parameter>> matched_parameters;
+    precisions_map convert_precisions;
+
     constexpr static const double disable_threshold = std::numeric_limits<double>::max();
     double abs_threshold = disable_threshold, rel_threshold = disable_threshold;
 
@@ -75,7 +78,6 @@ class SubgraphBaseTest : public ov::test::TestsCommon {
 
     virtual std::vector<ov::Tensor> calculate_refs();
     virtual std::vector<ov::Tensor> get_plugin_outputs();
-    virtual precisions_map get_ref_precisions_convert_map();
 
     friend void core_configuration(SubgraphBaseTest* test);
 };
diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/nms_rotated.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/nms_rotated.hpp
new file mode 100644
index 00000000000000..3d36cf3a2e0439
--- /dev/null
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/nms_rotated.hpp
@@ -0,0 +1,46 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include <tuple>
+
+#include "shared_test_classes/base/layer_test_utils.hpp"
+
+
+namespace LayerTestsDefinitions {
+
+using InputShapeParams = std::tuple<size_t,   // Number of batches
+                                    size_t,   // Number of boxes
+                                    size_t>;  // Number of classes
+
+using InputPrecisions =
+    std::tuple<InferenceEngine::Precision,   // boxes and scores precisions
+               InferenceEngine::Precision,   // max_output_boxes_per_class precision
+               InferenceEngine::Precision>;  // iou_threshold, score_threshold, soft_nms_sigma precisions
+
+using NmsRotatedParams = std::tuple<InputShapeParams,  // Params using to create 1st and 2nd inputs
+                                    InputPrecisions,   // Input precisions
+                                    int32_t,           // Max output boxes per class
+                                    float,             // IOU threshold
+                                    float,             // Score threshold
+                                    bool,              // Sort result descending
+                                    ov::element::Type, // Output type
+                                    bool,              // Clockwise
+                                    std::string>;      // Device name
+
+class NmsRotatedLayerTest : public testing::WithParamInterface<NmsRotatedParams>, virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<NmsRotatedParams>& obj);
+    void GenerateInputs() override;
+    void Compare(const std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>>& expectedOutputs,
+                 const std::vector<InferenceEngine::Blob::Ptr>& actualOutputs) override;
+
+protected:
+    void SetUp() override;
+    InputShapeParams inShapeParams;
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/transpose_matmul_fusion.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/transpose_matmul_fusion.hpp
new file mode 100644
index 00000000000000..c94383725f47ce
--- /dev/null
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/transpose_matmul_fusion.hpp
@@ -0,0 +1,23 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/base/ov_subgraph.hpp"
+
+namespace ov {
+namespace test {
+
+class TransposeMatMulFusion : public testing::WithParamInterface<const char*>,
+                                   public ov::test::SubgraphBaseTest {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<const char*> &obj);
+
+protected:
+    void SetUp() override;
+    void TearDown() override;
+};
+
+}  // namespace test
+}  // namespace ov
diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp
index 016dc26cccdfc5..aca76c4e9e65ab 100644
--- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp
+++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp
@@ -18,6 +18,8 @@
 #include "openvino/pass/serialize.hpp"
 #include "transformations/convert_precision.hpp"
 
+#include "template/properties.hpp"
+
 #include "common_test_utils/graph_comparator.hpp"
 
 #include "ov_models/utils/ov_helpers.hpp"
@@ -72,18 +74,7 @@ void SubgraphBaseTest::run() {
         try {
             compile_model();
             for (const auto& targetStaticShapeVec : targetStaticShapes) {
-                try {
-                    if (!inputDynamicShapes.empty()) {
-                        // resize ngraph function according new target shape
-                        // Note: output shapes of some nodes depend on the input data
-                        // so for some tests we need to override this function and replace parameter with constant node to get correct output shapes
-                        init_ref_function(functionRefs, targetStaticShapeVec);
-                    }
-                    generate_inputs(targetStaticShapeVec);
-                } catch (const std::exception& ex) {
-                    throw std::runtime_error("[IE TEST INFRA] Impossible to reshape ov::Model using the shape: " +
-                        ov::test::utils::vec2str(targetStaticShapeVec) + " " + ex.what());
-                }
+                generate_inputs(targetStaticShapeVec);
                 validate();
             }
             status = ov::test::utils::PassRate::Statuses::PASSED;
@@ -208,9 +199,6 @@ void SubgraphBaseTest::compile_model() {
     auto start_time = std::chrono::system_clock::now();
 
     configure_model();
-    if (functionRefs == nullptr) {
-        functionRefs = function->clone();
-    }
     core_configuration(this);
     compiledModel = core->compile_model(function, targetDevice, configuration);
     if (is_report_stages) {
@@ -220,10 +208,6 @@ void SubgraphBaseTest::compile_model() {
     }
 }
 
-void SubgraphBaseTest::init_ref_function(std::shared_ptr<ov::Model> &funcRef, const std::vector<ov::Shape>& targetInputStaticShapes) {
-    ngraph::helpers::resize_function(funcRef, targetInputStaticShapes);
-}
-
 void SubgraphBaseTest::generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) {
     inputs.clear();
     auto inputMap = utils::getInputMap();
@@ -255,44 +239,21 @@ void SubgraphBaseTest::infer() {
     inferRequest.infer();
 }
 
-precisions_map SubgraphBaseTest::get_ref_precisions_convert_map() {
-    //TODO: remove this conversions as soon as function interpreter fully support bf16 and f16
-    precisions_map precisions = {
-            { ngraph::element::bf16, ngraph::element::f32 }
-    };
-
-    auto convert_added = false;
-    for (const auto &param : function->get_parameters()) {
-        for (size_t i = 0; i < param->get_output_size(); i++) {
-            for (const auto &node : param->get_output_target_inputs(i)) {
-                std::shared_ptr<ov::Node> nodePtr = node.get_node()->shared_from_this();
-                if (std::dynamic_pointer_cast<ov::op::v0::Convert>(nodePtr)) {
-                    convert_added = true;
-                    break;
-                }
-            }
-        }
-    }
-
-    if (!convert_added) {
-        precisions.insert({ ngraph::element::f16, ngraph::element::f32});
+void SubgraphBaseTest::update_ref_model() {
+    if (functionRefs == nullptr) {
+        functionRefs = function->clone();
     }
-
-    return precisions;
-}
-
-std::vector<ov::Tensor> SubgraphBaseTest::calculate_refs() {
     using InputsMap = std::map<std::shared_ptr<ov::Node>, ov::Tensor>;
 
-    auto functionToProcess = functionRefs->clone();
-    precisions_map convert_precisions = get_ref_precisions_convert_map();
-    pass::Manager manager;
-    manager.register_pass<ov::pass::ConvertPrecision>(convert_precisions, type_to_fuse_map{}, false, false);
-    manager.run_passes(functionToProcess);
-    functionToProcess->validate_nodes_and_infer_types();
+    if (!convert_precisions.empty()) {
+        pass::Manager manager;
+        manager.register_pass<ov::pass::ConvertPrecision>(convert_precisions, type_to_fuse_map{}, false, false);
+        manager.run_passes(functionRefs);
+        functionRefs->validate_nodes_and_infer_types();
+    }
 
-    ov::preprocess::PrePostProcessor p(functionToProcess);
-    const auto& inputNodes = functionToProcess->inputs();
+    ov::preprocess::PrePostProcessor p(functionRefs);
+    const auto& inputNodes = functionRefs->inputs();
     for (size_t i = 0; i < inputNodes.size(); ++i) {
         auto itr = std::find_if(inputs.begin(), inputs.end(),
                                 [&](const InputsMap::value_type& item) {
@@ -310,18 +271,80 @@ std::vector<ov::Tensor> SubgraphBaseTest::calculate_refs() {
             throw std::runtime_error(errMsg.str());
         }
     }
-
-    const auto& outputs = functionToProcess->outputs();
+    const auto& outputs = functionRefs->outputs();
     for (size_t i = 0; i < outputs.size(); ++i) {
         if (outType != ElementType::undefined && outType != outputs[i].get_element_type()) {
             p.output(i).tensor().set_element_type(outType);
         }
     }
+    functionRefs = p.build();
+}
+
+void SubgraphBaseTest::match_parameters() {
+    matched_parameters.clear();
+    const auto& ref_params = functionRefs->get_parameters();
+    const auto& params = function->get_parameters();
+    size_t param_size = params.size(), ref_param_size = ref_params.size();
+    if (params.size() < ref_params.size()) {
+        throw std::runtime_error("Incompatible parameters in original and reference model!");
+    }
+    if (params.size() == ref_params.size()) {
+        for (size_t in_idx = 0; in_idx < params.size(); ++in_idx) {
+            matched_parameters.insert({ ref_params[in_idx], params[in_idx] });
+        }
+    } else {
+        auto it = params.begin();
+        auto it_ref = ref_params.begin();
+        while (it_ref != ref_params.end() && it != params.end()) {
+            bool is_match_in = true;
+            if ((*it_ref)->get_output_partial_shape(0).is_static()) {
+                if (inputs.at(*it).get_shape() != (*it_ref)->get_output_shape(0)) {
+                    is_match_in = false;
+                }
+            } else if ((*it)->get_output_partial_shape(0) != (*it_ref)->get_output_partial_shape(0)) {
+                is_match_in = false;
+            }
+            if ((*it)->get_output_element_type(0) != ((*it_ref)->get_output_element_type(0))) {
+                is_match_in = false;
+            }
+            if (is_match_in) {
+                matched_parameters.insert({ *it_ref, *it });
+                ++it_ref;
+            }
+            ++it;
+        }
+        if (matched_parameters.size() != ref_params.size()) {
+            throw std::runtime_error("Incompatible parameters in original and reference model!");
+        }
+    }
+}
+
+std::vector<ov::Tensor> SubgraphBaseTest::calculate_refs() {
+    if (is_report_stages) {
+        std::cout << "[ REFERENCE   ] `SubgraphBaseTest::calculate_refs()` is started"<< std::endl;
+    }
+    auto start_time = std::chrono::system_clock::now();
 
-    functionToProcess = p.build();
+    update_ref_model();
+    match_parameters();
 
-    auto results = ngraph::helpers::interpretFunction(functionToProcess, inputs);
-    return results;
+    auto compiledModelRef = core->compile_model(functionRefs, ov::test::utils::DEVICE_TEMPLATE, {{ ov::template_plugin::disable_transformations(true) }});
+    auto inferRequestRef = compiledModelRef.create_infer_request();
+    for (const auto& param : functionRefs->get_parameters()) {
+        inferRequestRef.set_tensor(param->get_default_output(), inputs.at(matched_parameters[param]));
+    }
+    inferRequestRef.infer();
+
+    auto outputs = std::vector<ov::Tensor>{};
+    for (const auto& output : functionRefs->outputs()) {
+        outputs.push_back(inferRequestRef.get_tensor(output));
+    }
+    if (is_report_stages) {
+        auto end_time = std::chrono::system_clock::now();
+        std::chrono::duration<double> duration = end_time - start_time;
+        std::cout << "[ REFERENCE   ] `SubgraphBaseTest::calculate_refs()` is finished successfully. Duration is " << duration.count() << "s" << std::endl;
+    }
+    return outputs;
 }
 
 std::vector<ov::Tensor> SubgraphBaseTest::get_plugin_outputs() {
@@ -361,7 +384,7 @@ void SubgraphBaseTest::validate() {
     }
 
     ASSERT_EQ(actualOutputs.size(), expectedOutputs.size())
-        << "nGraph interpreter has " << expectedOutputs.size() << " outputs, while IE " << actualOutputs.size();
+        << "TEMPLATE plugin has " << expectedOutputs.size() << " outputs, while " << targetDevice << " " << actualOutputs.size();
     if (is_report_stages) {
         std::cout << "[ COMPARATION ] `ov_tensor_utils.hpp::compare()` is started"<< std::endl;
     }
diff --git a/src/tests/functional/shared_test_classes/src/single_layer/nms_rotated.cpp b/src/tests/functional/shared_test_classes/src/single_layer/nms_rotated.cpp
new file mode 100644
index 00000000000000..80e6cc98db203f
--- /dev/null
+++ b/src/tests/functional/shared_test_classes/src/single_layer/nms_rotated.cpp
@@ -0,0 +1,230 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/single_layer/nms_rotated.hpp"
+#include "openvino/op/nms_rotated.hpp"
+
+#include <vector>
+
+namespace LayerTestsDefinitions {
+
+using namespace InferenceEngine;
+using namespace FuncTestUtils::PrecisionUtils;
+
+std::string NmsRotatedLayerTest::getTestCaseName(const testing::TestParamInfo<NmsRotatedParams>& obj) {
+    InputShapeParams inShapeParams;
+    InputPrecisions inPrecisions;
+    int32_t maxOutBoxesPerClass;
+    float iouThr, scoreThr;
+    bool sortResDescend, clockwise;
+    ov::element::Type outType;
+    std::string targetDevice;
+    std::tie(inShapeParams,
+             inPrecisions,
+             maxOutBoxesPerClass,
+             iouThr,
+             scoreThr,
+             sortResDescend,
+             outType,
+             clockwise,
+             targetDevice) = obj.param;
+
+    size_t numBatches, numBoxes, numClasses;
+    std::tie(numBatches, numBoxes, numClasses) = inShapeParams;
+
+    Precision inputPrec, maxBoxPrec, thrPrec;
+    std::tie(inputPrec, maxBoxPrec, thrPrec) = inPrecisions;
+
+    std::ostringstream result;
+    result << "numBatches=" << numBatches << "_numBoxes=" << numBoxes << "_numClasses=" << numClasses << "_";
+    result << "inputPrec=" << inputPrec << "_maxBoxPrec=" << maxBoxPrec << "_thrPrec=" << thrPrec << "_";
+    result << "maxOutBoxesPerClass=" << maxOutBoxesPerClass << "_";
+    result << "iouThr=" << iouThr << "_scoreThr=" << scoreThr << "_";
+    result << "sortResDescend=" << sortResDescend << "_outType=" << outType << "_";
+    result << "clockwise=" << clockwise << "_";
+    result << "TargetDevice=" << targetDevice;
+    return result.str();
+}
+
+void NmsRotatedLayerTest::GenerateInputs() {
+    size_t it = 0;
+    for (const auto& input : cnnNetwork.getInputsInfo()) {
+        const auto& info = input.second;
+        Blob::Ptr blob;
+
+        if (it == 1) {
+            blob = make_blob_with_precision(info->getTensorDesc());
+            blob->allocate();
+            if (info->getTensorDesc().getPrecision() == Precision::FP32) {
+                ov::test::utils::fill_data_random_float<InferenceEngine::Precision::FP32>(blob, 1, 0, 1000);
+            } else {
+                ov::test::utils::fill_data_random_float<InferenceEngine::Precision::FP16>(blob, 1, 0, 1000);
+            }
+        } else {
+            blob = GenerateInput(*info);
+        }
+        inputs.push_back(blob);
+        it++;
+    }
+}
+
+void NmsRotatedLayerTest::Compare(
+    const std::vector<std::pair<ov::element::Type, std::vector<std::uint8_t>>>& expectedOutputs,
+    const std::vector<InferenceEngine::Blob::Ptr>& actualOutputs) {
+    size_t num_batches, num_boxes, num_classes;
+    std::tie(num_batches, num_boxes, num_classes) = inShapeParams;
+
+    struct OutBox {
+        OutBox() = default;
+
+        OutBox(int32_t batchId, int32_t classId, int32_t boxId, float score) {
+            this->batchId = batchId;
+            this->classId = classId;
+            this->boxId = boxId;
+            this->score = score;
+        }
+
+        bool operator==(const OutBox& rhs) const {
+            return batchId == rhs.batchId && classId == rhs.classId && boxId == rhs.boxId;
+        }
+
+        int32_t batchId;
+        int32_t classId;
+        int32_t boxId;
+        float score;
+    };
+
+    std::vector<OutBox> expected;
+    {
+        const auto selected_indices_size = expectedOutputs[0].second.size() / expectedOutputs[0].first.size();
+        const auto selected_scores_size = expectedOutputs[1].second.size() / expectedOutputs[1].first.size();
+
+        ASSERT_EQ(selected_indices_size, selected_scores_size);
+
+        const auto boxes_count = selected_indices_size / 3;
+        expected.resize(boxes_count);
+
+        if (expectedOutputs[0].first.size() == 4) {
+            auto selected_indices_data = reinterpret_cast<const int32_t*>(expectedOutputs[0].second.data());
+
+            for (size_t i = 0; i < selected_indices_size; i += 3) {
+                expected[i / 3].batchId = selected_indices_data[i + 0];
+                expected[i / 3].classId = selected_indices_data[i + 1];
+                expected[i / 3].boxId = selected_indices_data[i + 2];
+            }
+        } else {
+            auto selected_indices_data = reinterpret_cast<const int64_t*>(expectedOutputs[0].second.data());
+
+            for (size_t i = 0; i < selected_indices_size; i += 3) {
+                expected[i / 3].batchId = static_cast<int32_t>(selected_indices_data[i + 0]);
+                expected[i / 3].classId = static_cast<int32_t>(selected_indices_data[i + 1]);
+                expected[i / 3].boxId = static_cast<int32_t>(selected_indices_data[i + 2]);
+            }
+        }
+
+         if (expectedOutputs[1].first.size() == 4) {
+            auto selected_scores_data = reinterpret_cast<const float*>(expectedOutputs[1].second.data());
+            for (size_t i = 0; i < selected_scores_size; i += 3) {
+                expected[i / 3].score = selected_scores_data[i + 2];
+            }
+        } else {
+            auto selected_scores_data = reinterpret_cast<const double*>(expectedOutputs[1].second.data());
+            for (size_t i = 0; i < selected_scores_size; i += 3) {
+                expected[i / 3].score = static_cast<float>(selected_scores_data[i + 2]);
+            }
+        }
+    }
+
+    std::vector<OutBox> actual;
+    {
+        const auto selected_indices_size = actualOutputs[0]->byteSize() / sizeof(float);
+        const auto selected_indices_memory = as<MemoryBlob>(actualOutputs[0]);
+        IE_ASSERT(selected_indices_memory);
+        const auto selected_indices_lockedMemory = selected_indices_memory->rmap();
+        const auto selected_indices_data = selected_indices_lockedMemory.as<const int32_t*>();
+
+        const auto selected_scores_memory = as<MemoryBlob>(actualOutputs[1]);
+        IE_ASSERT(selected_scores_memory);
+        const auto selected_scores_lockedMemory = selected_scores_memory->rmap();
+        const auto selected_scores_data = selected_scores_lockedMemory.as<const float*>();
+
+        for (size_t i = 0; i < selected_indices_size; i += 3) {
+            const int32_t batchId = selected_indices_data[i + 0];
+            const int32_t classId = selected_indices_data[i + 1];
+            const int32_t boxId = selected_indices_data[i + 2];
+            const float score = selected_scores_data[i + 2];
+            if (batchId == -1 || classId == -1 || boxId == -1)
+                break;
+
+            actual.emplace_back(batchId, classId, boxId, score);
+        }
+    }
+
+    ASSERT_EQ(expected.size(), actual.size());
+    for (size_t i = 0; i < expected.size(); ++i) {
+        ASSERT_EQ(expected[i], actual[i]) << ", i=" << i;
+        ASSERT_NEAR(expected[i].score, actual[i].score, abs_threshold) << ", i=" << i;
+    }
+}
+
+void NmsRotatedLayerTest::SetUp() {
+    InputPrecisions inPrecisions;
+    size_t maxOutBoxesPerClass;
+    float iouThr, scoreThr;
+    bool sortResDescend, clockwise;
+    ov::element::Type outType;
+    std::tie(inShapeParams,
+             inPrecisions,
+             maxOutBoxesPerClass,
+             iouThr,
+             scoreThr,
+             sortResDescend,
+             outType,
+             clockwise,
+             targetDevice) = this->GetParam();
+
+    size_t numBatches, numBoxes, numClasses;
+    std::tie(numBatches, numBoxes, numClasses) = inShapeParams;
+
+    Precision inputPrec, maxBoxPrec, thrPrec;
+    std::tie(inputPrec, maxBoxPrec, thrPrec) = inPrecisions;
+
+    if (inputPrec == Precision::FP16) {
+        abs_threshold = 0.1;
+    } else {
+        abs_threshold = std::numeric_limits<float>::epsilon();
+    }
+
+    ov::ParameterVector params;
+
+    const std::vector<size_t> boxesShape{numBatches, numBoxes, 5}, scoresShape{numBatches, numClasses, numBoxes};
+    const auto ngPrc = convertIE2nGraphPrc(inputPrec);
+
+    const auto boxesNode = std::make_shared<ov::op::v0::Parameter>(ngPrc, ov::Shape(boxesShape));
+    params.push_back(boxesNode);
+    const auto scoresNode = std::make_shared<ov::op::v0::Parameter>(ngPrc, ov::Shape(scoresShape));
+    params.push_back(scoresNode);
+
+    const auto maxOutputBoxesPerClassNode = std::make_shared<ov::op::v0::Constant>(ov::element::Type_t::u32,
+                                                                                   ov::Shape{},
+                                                                                   std::vector<size_t>{maxOutBoxesPerClass});
+    const auto iouThresholdNode = std::make_shared<ov::op::v0::Constant>(ov::element::Type_t::f32,
+                                                                                   ov::Shape{},
+                                                                                   std::vector<float>{iouThr});
+    const auto scoreTresholdNode = std::make_shared<ov::op::v0::Constant>(ov::element::Type_t::f32,
+                                                                                   ov::Shape{},
+                                                                                   std::vector<float>{scoreThr});
+
+    const auto nmsNode = std::make_shared<ov::op::v13::NMSRotated>(params[0],
+                                                               params[1],
+                                                               maxOutputBoxesPerClassNode,
+                                                               iouThresholdNode,
+                                                               scoreTresholdNode,
+                                                               sortResDescend,
+                                                               outType,
+                                                               clockwise);
+
+    function = std::make_shared<ov::Model>(nmsNode, params, "NMS");
+}
+}  // namespace LayerTestsDefinitions
diff --git a/src/tests/functional/shared_test_classes/src/subgraph/transpose_matmul_fusion.cpp b/src/tests/functional/shared_test_classes/src/subgraph/transpose_matmul_fusion.cpp
new file mode 100644
index 00000000000000..dc95fe704400f9
--- /dev/null
+++ b/src/tests/functional/shared_test_classes/src/subgraph/transpose_matmul_fusion.cpp
@@ -0,0 +1,51 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/subgraph/transpose_matmul_fusion.hpp"
+
+namespace ov {
+namespace test {
+
+std::string TransposeMatMulFusion::getTestCaseName(const testing::TestParamInfo<const char*> &obj) {
+    return "device=" + std::string(obj.param);
+}
+
+void TransposeMatMulFusion::SetUp() {
+    targetDevice = GetParam();
+
+    ov::PartialShape shape1{1, 3, 128, 64};
+    ov::PartialShape shape2{1, 3, 64, 128};
+
+    InputShape input_shape1 = {shape1, {Shape{1, 3, 128, 64}}};
+    InputShape input_shape2 = {shape2, {Shape{1, 3, 64, 128}}};
+    init_input_shapes({input_shape1, input_shape2});
+
+    const auto param1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, shape1);
+    const auto param2 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, shape2);
+    const auto order = ov::op::v0::Constant::create(ov::element::i32, Shape{4}, {0, 1, 3, 2});
+    const auto transpose1 = std::make_shared<ov::op::v1::Transpose>(param1, order);
+    const auto transpose2 = std::make_shared<ov::op::v1::Transpose>(param2, order);
+    const auto matmul = std::make_shared<ov::op::v0::MatMul>(transpose1, transpose2, false, false);
+    const auto constant = op::v0::Constant::create(element::f32, Shape{1}, {9});
+    const auto mul = std::make_shared<ov::op::v1::Multiply>(matmul, constant);
+    function = std::make_shared<ov::Model>(mul, ov::ParameterVector{param1, param2});
+}
+
+void TransposeMatMulFusion::TearDown() {
+    const auto model = compiledModel.get_runtime_model();
+
+    int num_ops = 0;
+    for (const auto& node : model->get_ordered_ops()) {
+        const auto& rt_info = node->get_rt_info();
+        const auto layer_type = rt_info.find("layerType")->second.as<std::string>();
+        if (layer_type != "Reorder" && layer_type != "Const")
+            num_ops++;
+        EXPECT_NE(layer_type, "Transpose");
+        EXPECT_NE(layer_type, "Permute");
+    }
+    ASSERT_EQ(num_ops, 5); // two Inputs, one Eltwise, one MatMul and one Output
+}
+
+}  // namespace test
+}  // namespace ov
diff --git a/src/tests/ov_helpers/ov_models/include/ov_models/utils/ov_helpers.hpp b/src/tests/ov_helpers/ov_models/include/ov_models/utils/ov_helpers.hpp
index 5d0f3cd4ac7d0b..862ff798efcf30 100644
--- a/src/tests/ov_helpers/ov_models/include/ov_models/utils/ov_helpers.hpp
+++ b/src/tests/ov_helpers/ov_models/include/ov_models/utils/ov_helpers.hpp
@@ -163,6 +163,7 @@ std::vector<std::uint8_t> convertOutputPrecision(const std::vector<std::uint8_t>
 
 std::ostream& operator<<(std::ostream& os, MemoryTransformation type);
 
+// todo: remove the following function from the source code after cleaning up VPU repo
 void resize_function(std::shared_ptr<ov::Model> function, const std::vector<ov::Shape>& targetInputStaticShapes);
 
 using ov::test::utils::operator<<;
diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/graph_comparator.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/graph_comparator.hpp
index cb90c0699a126d..22373d55292d2a 100644
--- a/src/tests/test_utils/common_test_utils/include/common_test_utils/graph_comparator.hpp
+++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/graph_comparator.hpp
@@ -14,6 +14,7 @@
 #include "openvino/op/loop.hpp"
 #include "openvino/op/util/framework_node.hpp"
 #include "openvino/op/util/sub_graph_base.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
 
 class FunctionsComparator {
 public:
@@ -945,9 +946,7 @@ class ReadAndCompareAttributes : public ov::AttributeVisitor {
     template <typename AttrValue>
     void verify(const std::string& name, const AttrValue& attr_value);
 
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    void verify_mem_buf(const std::string& name, const std::shared_ptr<ngraph::runtime::AlignedBuffer>& buffer);
-    OPENVINO_SUPPRESS_DEPRECATED_END
+    void verify_mem_buf(const std::string& name, const std::shared_ptr<ov::AlignedBuffer>& buffer);
 
     using ModelAccessor = ov::ValueAccessor<std::shared_ptr<ov::Model>>;
 
diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp
index 70778266d8f1b2..c8026f4ef2d7e0 100644
--- a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp
+++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp
@@ -23,6 +23,8 @@ const char API_REPORT_FILENAME[] = "report_api";
 const char REPORT_EXTENSION[] = ".xml";
 const char LST_EXTENSION[] = ".lst";
 
+const char TEMPLATE_LIB[] = "openvino_template_plugin";
+
 const char DEVICE_SUFFIX_SEPARATOR = '.';
 
 const unsigned int maxFileNameLength = 140;
diff --git a/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp b/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp
index e3c5a8b2ec1a55..f6cc70ed10a625 100644
--- a/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp
+++ b/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp
@@ -895,7 +895,6 @@ void check_rt_info(const std::shared_ptr<ov::Model>& f) {
 
 namespace attributes {
 namespace detail {
-OPENVINO_SUPPRESS_DEPRECATED_START
 void ReadAndStoreAttributes::on_adapter(const std::string& name, ov::ValueAccessor<void>& adapter) {
     if (auto inputs = ov::as_type<ov::AttributeAdapter<SubGraphOpInputDescription>>(&adapter)) {
         insert(name, inputs->get());
@@ -904,7 +903,7 @@ void ReadAndStoreAttributes::on_adapter(const std::string& name, ov::ValueAccess
     } else if (ov::is_type<ov::AttributeAdapter<SpecialBodyPorts>>(&adapter)) {
         // drop comparison, no more info than port indexes which will be check in
         // subgraph::compare_io
-    } else if (auto a = ov::as_type<ov::AttributeAdapter<std::shared_ptr<ngraph::runtime::AlignedBuffer>>>(&adapter)) {
+    } else if (auto a = ov::as_type<ov::AttributeAdapter<std::shared_ptr<ov::AlignedBuffer>>>(&adapter)) {
         const auto beg = static_cast<unsigned char*>(a->get()->get_ptr());
         const auto end = beg + a->get()->size();
         insert(name, storage::MemoryChunk{storage::MemoryChunk::Data(beg, end)});
@@ -923,7 +922,6 @@ void ReadAndStoreAttributes::on_adapter(const std::string& name, ov::ValueAccess
                          adapter.get_type_info().name + "']";
     }
 }
-OPENVINO_SUPPRESS_DEPRECATED_END
 template <typename AttrValue>
 void ReadAndCompareAttributes::verify(const std::string& name, const AttrValue& attr_value) {
     if (should_return()) {
@@ -942,9 +940,8 @@ void ReadAndCompareAttributes::verify(const std::string& name, const AttrValue&
     }
 }
 
-OPENVINO_SUPPRESS_DEPRECATED_START
 void ReadAndCompareAttributes::verify_mem_buf(const std::string& name,
-                                              const std::shared_ptr<ngraph::runtime::AlignedBuffer>& buffer) {
+                                              const std::shared_ptr<ov::AlignedBuffer>& buffer) {
     if (should_return()) {
         return;
     }
@@ -961,7 +958,6 @@ void ReadAndCompareAttributes::verify_mem_buf(const std::string& name,
         return;
     }
 }
-OPENVINO_SUPPRESS_DEPRECATED_END
 
 void ReadAndCompareAttributes::verify_function(const std::string& name, ModelAccessor& adapter) {
     if (should_return()) {
@@ -980,7 +976,6 @@ void ReadAndCompareAttributes::verify_function(const std::string& name, ModelAcc
     }
 }
 
-OPENVINO_SUPPRESS_DEPRECATED_START
 void ReadAndCompareAttributes::verify_others(const std::string& name, ov::ValueAccessor<void>& adapter) {
     if (auto inputs = ov::as_type<ov::AttributeAdapter<SubGraphOpInputDescription>>(&adapter)) {
         verify(name, inputs->get());
@@ -989,7 +984,7 @@ void ReadAndCompareAttributes::verify_others(const std::string& name, ov::ValueA
     } else if (ov::is_type<ov::AttributeAdapter<SpecialBodyPorts>>(&adapter)) {
         // drop comparison, no more info than port indexes which will be check in
         // subgraph::compare_io
-    } else if (auto a = ov::as_type<ov::AttributeAdapter<std::shared_ptr<ngraph::runtime::AlignedBuffer>>>(&adapter)) {
+    } else if (auto a = ov::as_type<ov::AttributeAdapter<std::shared_ptr<ov::AlignedBuffer>>>(&adapter)) {
         verify_mem_buf(name, a->get());
     } else if (auto attrs = ov::as_type<ov::AttributeAdapter<ov::op::util::FrameworkNodeAttrs>>(&adapter)) {
         verify(name, attrs->get());
@@ -1005,7 +1000,6 @@ void ReadAndCompareAttributes::verify_others(const std::string& name, ov::ValueA
                         adapter.get_type_info().name + "']";
     }
 }
-OPENVINO_SUPPRESS_DEPRECATED_END
 
 }  // namespace detail
 
@@ -1042,7 +1036,6 @@ AccuracyCheckResult accuracy_check(const std::shared_ptr<ov::Model>& ref_functio
 
         auto ref_outputs = ngraph::helpers::interpretFunction(ref_function, ref_input_data);
         auto outputs = ngraph::helpers::interpretFunction(cur_function, cur_input_data);
-
         IE_ASSERT(ref_outputs.size() == outputs.size());
 
         for (int i = 0; i < ref_outputs.size(); i++) {
diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp
index df4377d5cf9ad4..ef76694caf9691 100644
--- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp
+++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp
@@ -4,45 +4,95 @@
 
 #pragma once
 
+#include "openvino/op/ops.hpp"
 #include "openvino/openvino.hpp"
 
 namespace ov {
 namespace test {
 namespace functional {
 
+// {{ type_info, real_version }}
+const std::map<ov::NodeTypeInfo, size_t> not_aligned_op_version = {
+    // opset 1
+    {ov::op::v0::Abs::get_type_info_static(), 0},
+    {ov::op::v0::Acos::get_type_info_static(), 0},
+    {ov::op::v0::Asin::get_type_info_static(), 0},
+    {ov::op::v0::Atan::get_type_info_static(), 0},
+    {ov::op::v0::BatchNormInference::get_type_info_static(), 0},
+    {ov::op::v0::CTCGreedyDecoder::get_type_info_static(), 0},
+    {ov::op::v0::Ceiling::get_type_info_static(), 0},
+    {ov::op::v0::Clamp::get_type_info_static(), 0},
+    {ov::op::v0::Concat::get_type_info_static(), 0},
+    {ov::op::v0::Constant::get_type_info_static(), 0},
+    {ov::op::v0::Convert::get_type_info_static(), 0},
+    {ov::op::v0::Cos::get_type_info_static(), 0},
+    {ov::op::v0::Cosh::get_type_info_static(), 0},
+    {ov::op::v0::DepthToSpace::get_type_info_static(), 0},
+    {ov::op::v0::DetectionOutput::get_type_info_static(), 0},
+    {ov::op::v0::Elu::get_type_info_static(), 0},
+    {ov::op::v0::Erf::get_type_info_static(), 0},
+    {ov::op::v0::Exp::get_type_info_static(), 0},
+    {ov::op::v0::FakeQuantize::get_type_info_static(), 0},
+    {ov::op::v0::Floor::get_type_info_static(), 0},
+    {ov::op::v0::GRN::get_type_info_static(), 0},
+    {ov::op::v0::HardSigmoid::get_type_info_static(), 0},
+    {ov::op::v0::Interpolate::get_type_info_static(), 0},
+    {ov::op::v0::Log::get_type_info_static(), 0},
+    {ov::op::v0::LRN::get_type_info_static(), 0},
+    {ov::op::v0::LSTMCell::get_type_info_static(), 0},
+    {ov::op::v0::LSTMSequence::get_type_info_static(), 0},
+    {ov::op::v0::MatMul::get_type_info_static(), 0},
+    {ov::op::v0::Negative::get_type_info_static(), 0},
+    {ov::op::v0::NormalizeL2::get_type_info_static(), 0},
+    {ov::op::v0::PRelu::get_type_info_static(), 0},
+    {ov::op::v0::PSROIPooling::get_type_info_static(), 0},
+    {ov::op::v0::Parameter::get_type_info_static(), 0},
+    {ov::op::v0::PriorBox::get_type_info_static(), 0},
+    {ov::op::v0::PriorBoxClustered::get_type_info_static(), 0},
+    {ov::op::v0::Proposal::get_type_info_static(), 0},
+    {ov::op::v0::Range::get_type_info_static(), 0},
+    {ov::op::v0::Relu::get_type_info_static(), 0},
+    {ov::op::v0::RegionYolo::get_type_info_static(), 0},
+    {ov::op::v0::Result::get_type_info_static(), 0},
+    {ov::op::v0::ReverseSequence::get_type_info_static(), 0},
+    {ov::op::v0::RNNCell::get_type_info_static(), 0},
+    {ov::op::v0::Selu::get_type_info_static(), 0},
+    {ov::op::v0::ShapeOf::get_type_info_static(), 0},
+    {ov::op::v0::ShuffleChannels::get_type_info_static(), 0},
+    {ov::op::v0::Sign::get_type_info_static(), 0},
+    {ov::op::v0::Sigmoid::get_type_info_static(), 0},
+    {ov::op::v0::Sin::get_type_info_static(), 0},
+    {ov::op::v0::Sinh::get_type_info_static(), 0},
+    {ov::op::v0::Sqrt::get_type_info_static(), 0},
+    {ov::op::v0::SpaceToDepth::get_type_info_static(), 0},
+    {ov::op::v0::SquaredDifference::get_type_info_static(), 0},
+    {ov::op::v0::Squeeze::get_type_info_static(), 0},
+    {ov::op::v0::Tan::get_type_info_static(), 0},
+    {ov::op::v0::Tanh::get_type_info_static(), 0},
+    {ov::op::v0::TensorIterator::get_type_info_static(), 0},
+    {ov::op::v0::Tile::get_type_info_static(), 0},
+    {ov::op::v0::Unsqueeze::get_type_info_static(), 0},
+    {ov::op::v0::Xor::get_type_info_static(), 0},
+    // opset 2
+    {ov::op::v0::MVN::get_type_info_static(), 0},
+    {ov::op::v0::ReorgYolo::get_type_info_static(), 0},
+    {ov::op::v0::ROIPooling::get_type_info_static(), 0},
+    {ov::op::v0::Gelu::get_type_info_static(), 0},
+    {ov::op::v1::BatchToSpace::get_type_info_static(), 1},
+    {ov::op::v1::SpaceToBatch::get_type_info_static(), 1},
+    // opset 3
+    {ov::op::v0::RNNCell::get_type_info_static(), 0},
+    {ov::op::v0::ShuffleChannels::get_type_info_static(), 0},
+    // opset 4
+    {ov::op::v3::Acosh::get_type_info_static(), 3},
+    {ov::op::v3::Asinh::get_type_info_static(), 3},
+    {ov::op::v3::Atanh::get_type_info_static(), 3},
+};
+
 // todo: reuse in summary
 std::string get_node_version(const std::shared_ptr<ov::Node>& node, const std::string& postfix = "");
+std::string get_node_version(const ov::NodeTypeInfo& node_type_info);
 
 }  // namespace functional
 }  // namespace test
 }  // namespace ov
-
-// todo: remove these structure after remove old subgraphs dumper
-namespace LayerTestsUtils {
-
-struct ModelInfo {
-    size_t unique_op_cnt;
-    // model_path, op_cnt
-    std::map<std::string, size_t> model_paths;
-
-    ModelInfo(size_t _op_cnt = 0, const std::map<std::string, size_t>& _model_paths = {{}});
-};
-
-struct PortInfo {
-    double min;
-    double max;
-    bool convert_to_const;
-
-    PortInfo(double min, double max, bool convert_to_const);
-    PortInfo();
-};
-
-struct OPInfo {
-    std::map<std::string, ModelInfo> found_in_models;
-    std::map<size_t, PortInfo> ports_info;
-
-    OPInfo(const std::string& source_model, const std::string& model_path, size_t total_op_cnt = 0);
-
-    OPInfo() = default;
-};
-}  // namespace LayerTestsUtils
diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_summary.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_summary.hpp
index cc97fb45cd6949..44cf995f7184c8 100644
--- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_summary.hpp
+++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_summary.hpp
@@ -32,7 +32,7 @@ class OpSummary : public virtual Summary {
     std::map<ov::NodeTypeInfo, PassRate> opsStats = {};
     unsigned short int downgrade_coefficient;
 
-    std::string getOpVersion(const std::string& version);
+    std::string get_opset_number(const std::string& opset_full_name);
 
 protected:
     OpSummary(unsigned short int downgrade_coefficient = 1);
diff --git a/src/tests/test_utils/functional_test_utils/src/ov_plugin_cache.cpp b/src/tests/test_utils/functional_test_utils/src/ov_plugin_cache.cpp
index 118368f7a1180f..483b9b98c38db2 100644
--- a/src/tests/test_utils/functional_test_utils/src/ov_plugin_cache.cpp
+++ b/src/tests/test_utils/functional_test_utils/src/ov_plugin_cache.cpp
@@ -55,21 +55,20 @@ std::shared_ptr<ov::Core> PluginCache::core(const std::string& deviceToCheck) {
     }
     assert(0 != ov_core.use_count());
 
-    // register template plugin if it is needed
-    try {
-        std::string pluginName = "openvino_template_plugin";
-        pluginName += OV_BUILD_POSTFIX;
+    // Register Template plugin as a reference provider
+    const auto devices = ov_core->get_available_devices();
+    if (std::find(devices.begin(), devices.end(), std::string(ov::test::utils::DEVICE_TEMPLATE)) == devices.end()) {
         ov_core->register_plugin(
-            ov::util::make_plugin_library_name(ov::test::utils::getExecutableDirectory(), pluginName),
-            "TEMPLATE");
-    } catch (...) {
+            ov::util::make_plugin_library_name(ov::test::utils::getExecutableDirectory(),
+                                               std::string(ov::test::utils::TEMPLATE_LIB) + OV_BUILD_POSTFIX),
+            ov::test::utils::DEVICE_TEMPLATE);
     }
 
     if (!deviceToCheck.empty()) {
         auto properties = ov_core->get_property(deviceToCheck, ov::supported_properties);
 
         if (std::find(properties.begin(), properties.end(), ov::available_devices) != properties.end()) {
-            auto availableDevices = ov_core->get_property(deviceToCheck, ov::available_devices);
+            const auto availableDevices = ov_core->get_property(deviceToCheck, ov::available_devices);
 
             if (availableDevices.empty()) {
                 std::cerr << "No available devices for " << deviceToCheck << std::endl;
diff --git a/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp b/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp
index 0082cba61a343f..358be2d424b177 100644
--- a/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp
+++ b/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp
@@ -8,45 +8,30 @@ namespace ov {
 namespace test {
 namespace functional {
 
-// todo: reuse in summary
 std::string get_node_version(const std::shared_ptr<ov::Node>& node, const std::string& postfix) {
-    std::string op_name = node->get_type_info().name;
-    std::string opset_version = node->get_type_info().get_version();
-    std::string opset_name = "opset";
-    auto pos = opset_version.find(opset_name);
-    if (pos != std::string::npos) {
-        op_name += "-" + opset_version.substr(pos + opset_name.size());
-    }
+    const auto& node_type_info = node->get_type_info();
+    auto op_name = get_node_version(node_type_info);
     if (!postfix.empty()) {
         op_name += "_" + postfix;
     }
     return op_name;
 }
 
+std::string get_node_version(const ov::NodeTypeInfo& node_type_info) {
+    std::string op_name = node_type_info.name + std::string("-");
+    std::string opset_version = node_type_info.get_version();
+    if (not_aligned_op_version.count(node_type_info)) {
+        op_name += std::to_string(not_aligned_op_version.at(node_type_info));
+    } else {
+        std::string opset_name = "opset";
+        auto pos = opset_version.find(opset_name);
+        if (pos != std::string::npos) {
+            op_name += opset_version.substr(pos + opset_name.size());
+        }
+    }
+    return op_name;
+}
+
 }  // namespace functional
 }  // namespace test
 }  // namespace ov
-
-namespace LayerTestsUtils {
-
-ModelInfo::ModelInfo(size_t _op_cnt, const std::map<std::string, size_t>& _model_paths)
-    : unique_op_cnt(_op_cnt),
-      model_paths(_model_paths) {}
-
-PortInfo::PortInfo(double min, double max, bool convert_to_const)
-    : min(min),
-      max(max),
-      convert_to_const(convert_to_const) {}
-
-PortInfo::PortInfo() {
-    min = std::numeric_limits<double>::min();
-    max = std::numeric_limits<double>::max();
-    convert_to_const = false;
-}
-
-OPInfo::OPInfo(const std::string& source_model, const std::string& model_path, size_t total_op_cnt) {
-    found_in_models = {{source_model, ModelInfo(1, {{model_path, total_op_cnt}})}};
-    ports_info = {};
-}
-
-}  // namespace LayerTestsUtils
diff --git a/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp b/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp
index b895d14ab26ca6..fbaaf4e16629c3 100644
--- a/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp
+++ b/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp
@@ -8,6 +8,7 @@
 #include <pugixml.hpp>
 
 #include "common_test_utils/file_utils.hpp"
+#include "functional_test_utils/summary/op_info.hpp"
 
 using namespace ov::test::utils;
 
@@ -109,13 +110,13 @@ void OpSummary::updateOPsImplStatus(const ov::NodeTypeInfo& op, const bool implS
     }
 }
 
-std::string OpSummary::getOpVersion(const std::string& version) {
+std::string OpSummary::get_opset_number(const std::string& opset_full_name) {
     std::string opset_name = "opset";
-    auto pos = version.find(opset_name);
+    auto pos = opset_full_name.find(opset_name);
     if (pos == std::string::npos) {
         return "undefined";
     } else {
-        return version.substr(pos + opset_name.size());
+        return opset_full_name.substr(pos + opset_name.size());
     }
 }
 
@@ -259,7 +260,7 @@ void OpSummary::saveReport() {
         const auto& type_info_set = opset.get_type_info_set();
         for (const auto& type_info : type_info_set) {
             auto it = opsInfo.find(type_info);
-            std::string op_version = getOpVersion(opset_version);
+            std::string op_version = get_opset_number(opset_version);
             if (it == opsInfo.end()) {
                 opsInfo.insert({type_info, op_version});
             } else {
@@ -304,7 +305,7 @@ void OpSummary::saveReport() {
 
     pugi::xml_node opsNode = root.append_child("ops_list");
     for (const auto& op : opsInfo) {
-        std::string name = std::string(op.first.name) + "-" + getOpVersion(op.first.version_id);
+        std::string name = functional::get_node_version(op.first);
         opsNode.append_child(name.c_str()).append_attribute("opsets").set_value(op.second.c_str());
     }
 
@@ -315,7 +316,7 @@ void OpSummary::saveReport() {
         it.second.rel_passed /= downgrade_coefficient;
         it.second.rel_all /= downgrade_coefficient;
 
-        std::string name = std::string(it.first.name) + "-" + getOpVersion(it.first.version_id);
+        std::string name = functional::get_node_version(it.first);
         opList.insert(name);
         pugi::xml_node entry = currentDeviceNode.append_child(name.c_str());
         entry.append_attribute("implemented").set_value(it.second.isImplemented);
diff --git a/tests/layer_tests/common/utils/tf_utils.py b/tests/layer_tests/common/utils/tf_utils.py
index fb02c3f0a1b298..913048acf2e762 100644
--- a/tests/layer_tests/common/utils/tf_utils.py
+++ b/tests/layer_tests/common/utils/tf_utils.py
@@ -98,7 +98,7 @@ def summarize_graph(model_path, output_nodes_for_freeze=None, reshape_net=None):
     variables = list()
     outputs = list()
     graph = load_graph(model_path, output_nodes_for_freeze)
-    unlikely_output_types = ['Const', 'Assign', 'NoOp', 'Placeholder', 'Assert', 'switch_t', 'switch_f']
+    unlikely_output_types = ['Const', 'Assign', 'NoOp', 'Placeholder', 'Assert', 'switch_t', 'switch_f', 'TensorArrayCloseV3']
     control_dependents_map = collect_control_dependencies(graph)
     for node in graph.as_graph_def().node:
         if node.op == 'Placeholder':
diff --git a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py
index 6eab63bf682bd0..60bb92b85b5920 100644
--- a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py
+++ b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import unittest
+import platform
 from typing import Tuple
 
 import numpy as np
@@ -283,8 +284,8 @@ def scripted_fn(x: torch.Tensor, y: torch.Tensor):
         return torch.sigmoid(torch.relu(x * y))
 
     inp_shape = PartialShape([Dimension(1, -1), Dimension(-1, 5), 10])
-    ref_model = make_ref_pt_model_two_inputs(inp_shape, dtype=Type.dynamic)
-    return scripted_fn, ref_model, {'input': [(inp_shape), (inp_shape)]}
+    ref_model = make_ref_pt_model_two_inputs(inp_shape)
+    return scripted_fn, ref_model, {'input': [(inp_shape, Type.f32), (inp_shape, Type.f32)]}
 
 
 def create_pytorch_nn_module_layout_list(tmp_dir):
@@ -471,9 +472,9 @@ def create_pytorch_nn_module_scale_list_compression_enabled(tmp_dir):
 
 def create_pytorch_nn_module_shapes_list_static(tmp_dir):
     pt_model = make_pt_model_two_inputs()
-    ref_model = make_ref_pt_model_two_inputs([1, 3, 20, 20], dtype=Type.dynamic)
+    ref_model = make_ref_pt_model_two_inputs([1, 3, 20, 20])
 
-    return pt_model, ref_model, {'input': [[1, 3, 20, 20], [1, 3, 20, 20]]}
+    return pt_model, ref_model, {'input': [([1, 3, 20, 20], Type.f32), ([1, 3, 20, 20], Type.f32)]}
 
 
 def create_pytorch_nn_module_shapes_list_static_via_input(tmp_dir):
@@ -489,17 +490,16 @@ def create_pytorch_nn_module_shapes_list_dynamic(tmp_dir):
                   [-1, 3, 20, Dimension(-1, 20)]]
 
     param1 = ov.opset8.parameter(PartialShape(
-        inp_shapes[0]), name="x", dtype=Type.dynamic)
+        inp_shapes[0]), name="x", dtype=Type.f32)
     param2 = ov.opset8.parameter(PartialShape(
-        inp_shapes[1]), name="y", dtype=Type.dynamic)
-    cl = ov.opset8.convert_like(param2, param1)
-    mul = ov.opset8.multiply(param1, cl)
+        inp_shapes[1]), name="y", dtype=Type.f32)
+    mul = ov.opset8.multiply(param1, param2)
     relu = ov.opset8.relu(mul)
     sigm = ov.opset8.sigmoid(relu)
 
     parameter_list = [param1, param2]
     ref_model = Model([sigm], parameter_list, "test")
-    return pt_model, ref_model, {'input': inp_shapes}
+    return pt_model, ref_model, {'input': [(inp_shapes[0], Type.f32), (inp_shapes[1], Type.f32)]}
 
 
 def create_pytorch_nn_module_shapes_list_dynamic_via_input(tmp_dir):
@@ -522,8 +522,8 @@ def create_pytorch_nn_module_shapes_list_dynamic_via_input(tmp_dir):
 
 def create_pytorch_nn_module_shapes_list_dynamic_single_input(tmp_dir):
     pt_model = make_pt_model_one_input()
-    inp_shapes = [[Dimension(-1), 3, 20, Dimension(20, -1)]]
-    ref_model = make_ref_pt_model_one_input(inp_shapes[0], dtype=Type.dynamic)
+    inp_shapes = [[Dimension(-1), 3, 20, Dimension(20, -1)], Type.f32]
+    ref_model = make_ref_pt_model_one_input(inp_shapes[0])
     return pt_model, ref_model, {'input': inp_shapes}
 
 
@@ -536,8 +536,8 @@ def create_pytorch_nn_module_shapes_list_dynamic_single_input_via_input(tmp_dir)
 
 def create_pytorch_nn_module_shapes_list_static_single_input(tmp_dir):
     pt_model = make_pt_model_one_input()
-    inp_shapes = [[1, 3, 20, 20]]
-    ref_model = make_ref_pt_model_one_input(inp_shapes[0], dtype=Type.dynamic)
+    inp_shapes = [[1, 3, 20, 20], Type.f32]
+    ref_model = make_ref_pt_model_one_input(inp_shapes[0])
     return pt_model, ref_model, {'input': inp_shapes}
 
 
@@ -1236,6 +1236,8 @@ class TestPrecisionSensitive():
     @pytest.mark.parametrize("create_model", test_data)
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122714')
     def test_precision_sensitive(self, create_model, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api):
         import numpy.testing as npt
         from pathlib import Path
diff --git a/tests/layer_tests/onnx_tests/test_reduce_lp.py b/tests/layer_tests/onnx_tests/test_reduce_lp.py
index e64929a680c20d..73cd86a2bbbc6f 100644
--- a/tests/layer_tests/onnx_tests/test_reduce_lp.py
+++ b/tests/layer_tests/onnx_tests/test_reduce_lp.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 from common.layer_test_class import check_ir_version
@@ -232,6 +234,8 @@ def create_reduce_lp_const(self, shape, axes, keep_dims, reduce_p, ir_version):
     @pytest.mark.parametrize("keep_dims", [True, False])
     @pytest.mark.parametrize("reduce_p", [1, 2])
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122846')
     def test_reduce_lp_precommit(self, params, keep_dims, reduce_p, ie_device, precision,
                                  ir_version, temp_dir, use_old_api):
         self._test(*self.create_reduce_lp(**params, keep_dims=keep_dims, reduce_p=reduce_p,
diff --git a/tests/layer_tests/onnx_tests/test_roi_align.py b/tests/layer_tests/onnx_tests/test_roi_align.py
index 13663808a3acd3..a29ddc4c1d1213 100644
--- a/tests/layer_tests/onnx_tests/test_roi_align.py
+++ b/tests/layer_tests/onnx_tests/test_roi_align.py
@@ -136,6 +136,8 @@ def create_net(self, input_shape, rois_shape, indices_shape, output_shape,
     @pytest.mark.nightly
     @pytest.mark.precommit
     @pytest.mark.xfail(condition=platform.system() == 'Windows', reason="Ticket - 122731")
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122846')
     def test_roi_alignv10(self, params, ie_device, precision, ir_version, temp_dir, use_old_api):
         # TODO: ticket for investigating GPU failures: CVS-86300
         if ie_device != "GPU":
diff --git a/tests/layer_tests/ovc_python_api_tests/test_pytorch.py b/tests/layer_tests/ovc_python_api_tests/test_pytorch.py
index 5ae2dcac31c83e..90db75d01c0e78 100644
--- a/tests/layer_tests/ovc_python_api_tests/test_pytorch.py
+++ b/tests/layer_tests/ovc_python_api_tests/test_pytorch.py
@@ -281,8 +281,8 @@ def scripted_fn(x: torch.Tensor, y: torch.Tensor):
         return torch.sigmoid(torch.relu(x * y))
 
     inp_shape = PartialShape([Dimension(1, -1), Dimension(-1, 5), 10])
-    ref_model = make_ref_pt_model_two_inputs(inp_shape, dtype=Type.dynamic)
-    return scripted_fn, ref_model, {'input': [(inp_shape), (inp_shape)]}
+    ref_model = make_ref_pt_model_two_inputs(inp_shape)
+    return scripted_fn, ref_model, {'input': [(inp_shape, Type.f32), (inp_shape, Type.f32)]}
 
 
 def create_pytorch_nn_module_layout_list(tmp_dir):
@@ -469,9 +469,9 @@ def create_pytorch_nn_module_scale_list_compression_enabled(tmp_dir):
 
 def create_pytorch_nn_module_shapes_list_static(tmp_dir):
     pt_model = make_pt_model_two_inputs()
-    ref_model = make_ref_pt_model_two_inputs([1, 3, 20, 20], dtype=Type.dynamic)
+    ref_model = make_ref_pt_model_two_inputs([1, 3, 20, 20])
 
-    return pt_model, ref_model, {'input': [[1, 3, 20, 20], [1, 3, 20, 20]]}
+    return pt_model, ref_model, {'input': [([1, 3, 20, 20], Type.f32), ([1, 3, 20, 20], Type.f32)]}
 
 
 def create_pytorch_nn_module_shapes_list_static_via_input(tmp_dir):
@@ -487,17 +487,16 @@ def create_pytorch_nn_module_shapes_list_dynamic(tmp_dir):
                   [-1, 3, 20, Dimension(-1, 20)]]
 
     param1 = ov.opset8.parameter(PartialShape(
-        inp_shapes[0]), name="x", dtype=Type.dynamic)
+        inp_shapes[0]), name="x", dtype=Type.f32)
     param2 = ov.opset8.parameter(PartialShape(
-        inp_shapes[1]), name="y", dtype=Type.dynamic)
-    cl = ov.opset8.convert_like(param2, param1)
-    mul = ov.opset8.multiply(param1, cl)
+        inp_shapes[1]), name="y", dtype=Type.f32)
+    mul = ov.opset8.multiply(param1, param2)
     relu = ov.opset8.relu(mul)
     sigm = ov.opset8.sigmoid(relu)
 
     parameter_list = [param1, param2]
     ref_model = Model([sigm], parameter_list, "test")
-    return pt_model, ref_model, {'input': inp_shapes}
+    return pt_model, ref_model, {'input': [(inp_shapes[0], Type.f32), (inp_shapes[1], Type.f32)]}
 
 
 def create_pytorch_nn_module_shapes_list_dynamic_via_input(tmp_dir):
@@ -520,8 +519,8 @@ def create_pytorch_nn_module_shapes_list_dynamic_via_input(tmp_dir):
 
 def create_pytorch_nn_module_shapes_list_dynamic_single_input(tmp_dir):
     pt_model = make_pt_model_one_input()
-    inp_shapes = [[Dimension(-1), 3, 20, Dimension(20, -1)]]
-    ref_model = make_ref_pt_model_one_input(inp_shapes[0], dtype=Type.dynamic)
+    inp_shapes = [[Dimension(-1), 3, 20, Dimension(20, -1)], Type.f32]
+    ref_model = make_ref_pt_model_one_input(inp_shapes[0])
     return pt_model, ref_model, {'input': inp_shapes}
 
 
@@ -534,8 +533,8 @@ def create_pytorch_nn_module_shapes_list_dynamic_single_input_via_input(tmp_dir)
 
 def create_pytorch_nn_module_shapes_list_static_single_input(tmp_dir):
     pt_model = make_pt_model_one_input()
-    inp_shapes = [[1, 3, 20, 20]]
-    ref_model = make_ref_pt_model_one_input(inp_shapes[0], dtype=Type.dynamic)
+    inp_shapes = [[1, 3, 20, 20], Type.f32]
+    ref_model = make_ref_pt_model_one_input(inp_shapes[0])
     return pt_model, ref_model, {'input': inp_shapes}
 
 
diff --git a/tests/layer_tests/py_frontend_tests/test_torch_decoder.py b/tests/layer_tests/py_frontend_tests/test_torch_decoder.py
index 6adbf0d033e98f..7e1758bd84d4fe 100644
--- a/tests/layer_tests/py_frontend_tests/test_torch_decoder.py
+++ b/tests/layer_tests/py_frontend_tests/test_torch_decoder.py
@@ -641,7 +641,7 @@ def f(x, y):
 
 @pytest.mark.precommit
 def test_pytorch_decoder_can_convert_scripted_function():
-    from openvino.tools.mo import convert_model
+    from openvino import convert_model, Type
     scripted = torch.jit.script(f)
-    model = convert_model(scripted)
+    model = convert_model(scripted, input=[Type.f32, Type.f32])
     assert model is not None
diff --git a/tests/layer_tests/py_frontend_tests/test_torch_frontend.py b/tests/layer_tests/py_frontend_tests/test_torch_frontend.py
index 6b4d748fc45632..f76c7b1fa97ba8 100644
--- a/tests/layer_tests/py_frontend_tests/test_torch_frontend.py
+++ b/tests/layer_tests/py_frontend_tests/test_torch_frontend.py
@@ -22,6 +22,9 @@ def forward(self, x):
 
 class aten_multi_input_output(torch.nn.Module):
     def forward(self, x, y, z):
+        x = x.to(torch.float32)
+        y = y.to(torch.float32)
+        z = z.to(torch.float32)
         return torch.nn.functional.relu(x), x * y, z / x
 
 
diff --git a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py
index 00cdc427056518..f8b726c4c5f66d 100644
--- a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py
+++ b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py
@@ -77,10 +77,13 @@ def use_torch_compile_backend():
         if use_torch_compile_backend():
             self.torch_compile_backend_test(model, torch_inputs, custom_eps)
         else:
+            trace_model = kwargs.get('trace_model', False)
+            freeze_model = kwargs.get('freeze_model', True)
             with torch.no_grad():
-                trace_model = kwargs.get('trace_model', False)
-                freeze_model = kwargs.get('freeze_model', True)
-                smodel, converted_model = self.convert_directly_via_frontend(model, torch_inputs, trace_model, dynamic_shapes, ov_inputs, freeze_model)
+                if kwargs.get('use_convert_model', False):
+                    smodel, converted_model = self.convert_via_mo(model, torch_inputs, trace_model, dynamic_shapes, ov_inputs, freeze_model)
+                else:
+                    smodel, converted_model = self.convert_directly_via_frontend(model, torch_inputs, trace_model, dynamic_shapes, ov_inputs, freeze_model)
 
             if kind is not None and not isinstance(kind, (tuple, list)):
                 kind = [kind]
@@ -162,12 +165,13 @@ def _prepare_input(self):
         raise RuntimeError("Please provide inputs generation function")
 
     def convert_via_mo(self, model, example_input, trace_model, dynamic_shapes, ov_inputs, freeze_model):
-        from openvino.tools.ovc import convert_model
-        kwargs = {"example_input": example_input if len(example_input) > 1 else example_input[0]}
+        from openvino import convert_model, PartialShape
         if trace_model:
             decoder = TorchScriptPythonDecoder(model, example_input=example_input, skip_freeze=not freeze_model)
+            kwargs = {"example_input": example_input if len(example_input) > 1 else example_input[0]}
         else:
             decoder = TorchScriptPythonDecoder(model, skip_freeze=not freeze_model)
+            kwargs = {"input": [(i.dtype, PartialShape([-1] * len(i.shape))) for i in example_input]}
         smodel = decoder.pt_module
         print(smodel.inlined_graph)
         if not dynamic_shapes:
diff --git a/tests/layer_tests/pytorch_tests/test_adaptive_max_pool.py b/tests/layer_tests/pytorch_tests/test_adaptive_max_pool.py
index c01e58c2107eec..09be641a0fb96e 100644
--- a/tests/layer_tests/pytorch_tests/test_adaptive_max_pool.py
+++ b/tests/layer_tests/pytorch_tests/test_adaptive_max_pool.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import torch
@@ -49,6 +51,8 @@ def forward(self, input_tensor):
     @pytest.mark.precommit
     @pytest.mark.precommit_ts_backend
     @pytest.mark.precommit_fx_backend
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_adaptive_max_pool3d(self, ie_device, precision, ir_version, input_tensor, output_size, return_indices):
         self.input_tensor = input_tensor
         self._test(*self.create_model(output_size, return_indices), ie_device, precision, ir_version)
@@ -94,6 +98,8 @@ def forward(self, input_tensor):
     @pytest.mark.precommit
     @pytest.mark.precommit_ts_backend
     @pytest.mark.precommit_fx_backend
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_adaptive_max_pool2d(self, ie_device, precision, ir_version, input_tensor, output_size, return_indices):
         self.input_tensor = input_tensor
         self._test(*self.create_model(output_size, return_indices), ie_device, precision, ir_version)
@@ -139,6 +145,8 @@ def forward(self, input_tensor):
     @pytest.mark.precommit
     @pytest.mark.precommit_ts_backend
     @pytest.mark.precommit_fx_backend
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_adaptive_max_pool1d(self, ie_device, precision, ir_version, input_tensor, output_size, return_indices):
         self.input_tensor = input_tensor
         self._test(*self.create_model(output_size, return_indices), ie_device, precision, ir_version)
\ No newline at end of file
diff --git a/tests/layer_tests/pytorch_tests/test_add.py b/tests/layer_tests/pytorch_tests/test_add.py
index 7cf243b0577bcb..d89a05420eb7e1 100644
--- a/tests/layer_tests/pytorch_tests/test_add.py
+++ b/tests/layer_tests/pytorch_tests/test_add.py
@@ -43,7 +43,7 @@ def forward2(self, lhs, rhs):
     @pytest.mark.parametrize("op_type", ["add", "add_"])
     def test_add(self, ie_device, precision, ir_version, alpha, input_rhs, op_type):
         self.input_rhs = input_rhs
-        self._test(*self.create_model(alpha, op_type), ie_device, precision, ir_version)
+        self._test(*self.create_model(alpha, op_type), ie_device, precision, ir_version, use_convert_model=True)
 
 
 class TestAddTypes(PytorchLayerTest):
diff --git a/tests/layer_tests/pytorch_tests/test_all.py b/tests/layer_tests/pytorch_tests/test_all.py
index b5255f197cfef0..ca9b734c1ad1dd 100644
--- a/tests/layer_tests/pytorch_tests/test_all.py
+++ b/tests/layer_tests/pytorch_tests/test_all.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import torch
@@ -68,6 +70,8 @@ def test_all_noparams(self, input_tensor, ie_device, precision, ir_version):
     ])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_all(self, input_tensor, keepdim, ie_device, precision, ir_version):
         self.input_tensor = input_tensor
         for dim in range(len(input_tensor.shape)):
diff --git a/tests/layer_tests/pytorch_tests/test_argmax_argmin.py b/tests/layer_tests/pytorch_tests/test_argmax_argmin.py
index 05abf128da400d..80ed6fcb872b5f 100644
--- a/tests/layer_tests/pytorch_tests/test_argmax_argmin.py
+++ b/tests/layer_tests/pytorch_tests/test_argmax_argmin.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 
 from pytorch_layer_test_class import PytorchLayerTest
@@ -71,6 +73,8 @@ def forward(self, x):
     @pytest.mark.parametrize("dtype", ["float32", "int32", "int64"])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_argmin_argmax(self, axes, keep_dims, op_type, dtype, ie_device, precision, ir_version):
         self._test(*self.create_model(op_type, axes, keep_dims),
                    ie_device, precision, ir_version, trace_model=True, 
diff --git a/tests/layer_tests/pytorch_tests/test_as_strided.py b/tests/layer_tests/pytorch_tests/test_as_strided.py
new file mode 100644
index 00000000000000..9bfaa66d3a7f6b
--- /dev/null
+++ b/tests/layer_tests/pytorch_tests/test_as_strided.py
@@ -0,0 +1,125 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import numpy as np
+import pytest
+import torch
+
+from pytorch_layer_test_class import PytorchLayerTest
+
+
+class TestAsStrided(PytorchLayerTest):
+    def _prepare_input(self):
+        return (np.random.randn(8, 8).astype(np.float32),)
+
+    def create_model(self, size, stride, offset):
+        class aten_as_strided(torch.nn.Module):
+            def __init__(self, size, stride, offset):
+                super().__init__()
+                self.size = size
+                self.stride = stride
+                self.offset = offset
+
+            def forward(self, x):
+                return torch.as_strided(x, self.size, self.stride, self.offset)
+
+        ref_net = None
+
+        return aten_as_strided(size, stride, offset), ref_net, "aten::as_strided"
+
+    @pytest.mark.parametrize(
+        "size,stride",
+        [
+            ([1], [1]),
+            ([2, 2], [1, 1]),
+            ([5, 4, 3], [1, 3, 7]),
+            ([5, 5, 5], [5, 0, 5]),
+            ([1, 2, 3, 4], [4, 3, 2, 1]),
+        ],
+    )
+    @pytest.mark.parametrize("offset", [None, 1, 3, 7])
+    @pytest.mark.nightly
+    @pytest.mark.precommit
+    def test_as_strided(self, size, stride, offset, ie_device, precision, ir_version):
+        self._test(*self.create_model(size, stride, offset), ie_device, precision, ir_version, trace_model=True)
+
+
+class TestAsStridedListConstruct(PytorchLayerTest):
+    def _prepare_input(self, size_shape_tensor=[1], stride_shape_tensor=[1]):
+        return (
+            np.random.randn(8, 8).astype(np.float32),
+            np.ones(size_shape_tensor),
+            np.ones(stride_shape_tensor),
+        )
+
+    def create_model(self, size, stride, offset, mode):
+        class aten_as_strided(torch.nn.Module):
+            def __init__(self, size, stride, offset, mode):
+                super().__init__()
+                self.size = size
+                self.stride = stride
+                self.size_shape_tensor = torch.empty(size)
+                self.stride_shape_tensor = torch.empty(stride)
+                self.offset = offset
+                modes = {
+                    "no_const": self.forward_no_const,
+                    "stride_const": self.forward_stride_const,
+                    "size_const": self.forward_size_const,
+                }
+                self.forward = modes.get(mode)
+
+            def forward_no_const(self, x, size_shape_tensor, stride_shape_tensor):
+                sz1, sz2, sz3 = size_shape_tensor.shape
+                st1, st2, st3 = stride_shape_tensor.shape
+                return torch.as_strided(x, [sz1, sz2, sz3], [st1, st2, st3], self.offset)
+
+            def forward_stride_const(self, x, size_shape_tensor, stride_shape_tensor):
+                sz1, sz2, sz3 = size_shape_tensor.shape
+                return torch.as_strided(x, [sz1, sz2, sz3], self.stride, self.offset)
+
+            def forward_size_const(self, x, size_shape_tensor, stride_shape_tensor):
+                st1, st2, st3 = stride_shape_tensor.shape
+                return torch.as_strided(x, self.size, [st1, st2, st3], self.offset)
+
+        ref_net = None
+
+        return aten_as_strided(size, stride, offset, mode), ref_net, ["aten::as_strided", "prim::ListConstruct"]
+
+    @pytest.mark.parametrize("size,stride", [([5, 4, 3], [1, 3, 7]), ([5, 5, 5], [5, 0, 5])])
+    @pytest.mark.parametrize("offset", [None, 7])
+    @pytest.mark.parametrize("mode", ["no_const", "stride_const", "size_const"])
+    @pytest.mark.nightly
+    @pytest.mark.precommit
+    def test_as_strided_list_construct(self, size, stride, offset, mode, ie_device, precision, ir_version):
+        inp_kwargs = {"size_shape_tensor": size, "stride_shape_tensor": stride}
+        self._test(
+            *self.create_model(size, stride, offset, mode),
+            ie_device,
+            precision,
+            ir_version,
+            kwargs_to_prepare_input=inp_kwargs,
+            trace_model=True
+        )
+
+
+class TestAsStridedLongformer(PytorchLayerTest):
+    def _prepare_input(self):
+        return (np.random.randn(1, 10, 20, 40).astype(np.float32).transpose([0, 2, 3, 1]),)
+
+    def create_model(self):
+        class aten_as_strided_lf(torch.nn.Module):
+            def forward(self, x):
+                chunk_size = list(x.size())
+                chunk_size[1] = chunk_size[1] * 2 - 1
+                chunk_stride = list(x.stride())
+                chunk_stride[1] = chunk_stride[1] // 2
+                return x.as_strided(size=chunk_size, stride=chunk_stride)
+
+        ref_net = None
+
+        return aten_as_strided_lf(), ref_net, "aten::as_strided"
+
+    @pytest.mark.nightly
+    @pytest.mark.precommit
+    def test_as_strided_lf(self, ie_device, precision, ir_version):
+        self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True, freeze_model=False)
diff --git a/tests/layer_tests/pytorch_tests/test_comparision.py b/tests/layer_tests/pytorch_tests/test_comparision.py
index 98134a274f7bdb..a114afb1f712c8 100644
--- a/tests/layer_tests/pytorch_tests/test_comparision.py
+++ b/tests/layer_tests/pytorch_tests/test_comparision.py
@@ -55,7 +55,7 @@ def forward(self, x, y):
     @pytest.mark.nightly
     @pytest.mark.precommit
     def test_comp(self, op, ie_device, precision, ir_version):
-        self._test(*self.create_model(op), ie_device, precision, ir_version)
+        self._test(*self.create_model(op), ie_device, precision, ir_version, use_convert_model=True)
 
 
 class TestCompMixedTypes(PytorchLayerTest):
diff --git a/tests/layer_tests/pytorch_tests/test_cumsum.py b/tests/layer_tests/pytorch_tests/test_cumsum.py
index 926cfe9e95c30a..771eb02768bdf0 100644
--- a/tests/layer_tests/pytorch_tests/test_cumsum.py
+++ b/tests/layer_tests/pytorch_tests/test_cumsum.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 
 from pytorch_layer_test_class import PytorchLayerTest
@@ -69,5 +71,7 @@ def forward_out_prim_dtype(self, x, y):
     @pytest.mark.parametrize("out,dtype_from_input", [(False, False), (True, False), (True, True)])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_cumsum(self, axis, dtype, out, dtype_from_input, ie_device, precision, ir_version):
         self._test(*self.create_model(axis, dtype, out, dtype_from_input), ie_device, precision, ir_version, kwargs_to_prepare_input={"out": out, "out_dtype": dtype})
diff --git a/tests/layer_tests/pytorch_tests/test_deformable_convolution.py b/tests/layer_tests/pytorch_tests/test_deformable_convolution.py
index 8fa207efe85b41..fa4293b275e6c7 100644
--- a/tests/layer_tests/pytorch_tests/test_deformable_convolution.py
+++ b/tests/layer_tests/pytorch_tests/test_deformable_convolution.py
@@ -10,15 +10,6 @@
 from torchvision.ops import deform_conv2d
 
 
-def xfail_106712(test_param):
-    return pytest.param(
-        test_param,
-        marks=pytest.mark.xfail(
-            reason="Depending on number of groups and number of output channels, deformable convolution may return incorrect reasults. Ticket 106712"
-        ),
-    )
-
-
 params = [
     {
         "weights_shape": [64, 64, 3, 3],
@@ -62,15 +53,13 @@ def xfail_106712(test_param):
         "padding": (2, 2),
         "dilation": (1, 1),
     },
-    xfail_106712(
-        {
-            "weights_shape": [64, 16, 3, 3],
-            "offset_shape": [1, 18, 64, 64],
-            "stride": (1, 1),
-            "padding": (1, 1),
-            "dilation": (1, 1),
-        }
-    ),
+    {
+        "weights_shape": [64, 16, 3, 3],
+        "offset_shape": [1, 18, 64, 64],
+        "stride": (1, 1),
+        "padding": (1, 1),
+        "dilation": (1, 1),
+    },
     {
         "weights_shape": [60, 16, 3, 3],
         "offset_shape": [1, 18, 64, 64],
@@ -92,15 +81,13 @@ def xfail_106712(test_param):
         "padding": (1, 1),
         "dilation": (1, 1),
     },
-    xfail_106712(
-        {
-            "weights_shape": [64, 32, 3, 3],
-            "offset_shape": [1, 36, 68, 68],
-            "stride": (1, 1),
-            "padding": (3, 3),
-            "dilation": (1, 1),
-        }
-    ),
+    {
+        "weights_shape": [64, 32, 3, 3],
+        "offset_shape": [1, 36, 68, 68],
+        "stride": (1, 1),
+        "padding": (3, 3),
+        "dilation": (1, 1),
+    },
     {
         "weights_shape": [62, 32, 3, 3],
         "offset_shape": [1, 36, 68, 68],
diff --git a/tests/layer_tests/pytorch_tests/test_device.py b/tests/layer_tests/pytorch_tests/test_device.py
index a36bd731a6dae7..2efdf5c85c50fa 100644
--- a/tests/layer_tests/pytorch_tests/test_device.py
+++ b/tests/layer_tests/pytorch_tests/test_device.py
@@ -56,7 +56,8 @@ def test_device(self, device_string, ie_device, precision, ir_version):
             ie_device,
             precision,
             ir_version,
-            trace_model=False
+            trace_model=False,
+            use_convert_model=True,
         )
 
     @pytest.mark.parametrize("device_string", ["cpu", "cuda"])
@@ -68,5 +69,6 @@ def test_device_type(self, device_string, ie_device, precision, ir_version):
             ie_device,
             precision,
             ir_version,
-            trace_model=False
+            trace_model=False,
+            use_convert_model=True,
         )
diff --git a/tests/layer_tests/pytorch_tests/test_dict.py b/tests/layer_tests/pytorch_tests/test_dict.py
index a3e3c29b6baf1f..6e4db9dea825bd 100644
--- a/tests/layer_tests/pytorch_tests/test_dict.py
+++ b/tests/layer_tests/pytorch_tests/test_dict.py
@@ -23,4 +23,4 @@ def forward(self, x):
     @pytest.mark.nightly
     @pytest.mark.precommit
     def test_dict(self, ie_device, precision, ir_version):
-        self._test(*self.create_model(), ie_device, precision, ir_version)
+        self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True)
diff --git a/tests/layer_tests/pytorch_tests/test_distance.py b/tests/layer_tests/pytorch_tests/test_distance.py
index 1c76a7243b47e3..fb9fade8206996 100644
--- a/tests/layer_tests/pytorch_tests/test_distance.py
+++ b/tests/layer_tests/pytorch_tests/test_distance.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 
 from pytorch_layer_test_class import PytorchLayerTest
@@ -29,8 +31,10 @@ def forward(self, x, y):
     @pytest.mark.nightly
     @pytest.mark.precommit
     @pytest.mark.parametrize("p", [2., 4., 6., 8.,])
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_cdist(self, p, ie_device, precision, ir_version):
-        self._test(*self.create_model(p), ie_device, precision, ir_version)
+        self._test(*self.create_model(p), ie_device, precision, ir_version, use_convert_model=True)
 
 
 class TestPairwiseDistance(PytorchLayerTest):
@@ -61,5 +65,7 @@ def forward(self, x, y):
     @pytest.mark.parametrize("p", [2., 4., 6., 8.,])
     @pytest.mark.parametrize("eps", [1e-06, 0.00001, 1e-07])
     @pytest.mark.parametrize("keepdim", [True, False])
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_cdist(self, p, eps, keepdim, ie_device, precision, ir_version):
-        self._test(*self.create_model(p, eps, keepdim), ie_device, precision, ir_version)
\ No newline at end of file
+        self._test(*self.create_model(p, eps, keepdim), ie_device, precision, ir_version, use_convert_model=True)
diff --git a/tests/layer_tests/pytorch_tests/test_div.py b/tests/layer_tests/pytorch_tests/test_div.py
index d6e696b62882d5..564cb2915c8686 100644
--- a/tests/layer_tests/pytorch_tests/test_div.py
+++ b/tests/layer_tests/pytorch_tests/test_div.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import torch
@@ -47,7 +49,7 @@ def test_div_pt_spec(self, input_array, other_array, rounding_mode, ie_device, p
         self.other_array = other_array
         self.other_type = np.float32
         self._test(*self.create_model(rounding_mode),
-                   ie_device, precision, ir_version)
+                   ie_device, precision, ir_version, use_convert_model=True)
 
 
 class TestDivTypes(PytorchLayerTest):
@@ -116,6 +118,8 @@ def forward3(self, lhs, rhs):
     ]))
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_div_types(self, ie_device, precision, ir_version, lhs_type, lhs_shape, rhs_type, rhs_shape, rounding_mode):
         self.lhs_type = lhs_type
         self.lhs_shape = lhs_shape
diff --git a/tests/layer_tests/pytorch_tests/test_embedding_bag.py b/tests/layer_tests/pytorch_tests/test_embedding_bag.py
index 2595b2269316fd..d0c6d0c532856f 100644
--- a/tests/layer_tests/pytorch_tests/test_embedding_bag.py
+++ b/tests/layer_tests/pytorch_tests/test_embedding_bag.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 
 from pytorch_layer_test_class import PytorchLayerTest
@@ -42,6 +44,8 @@ def forward_offsets_per_sample_weights(self, indicies, weight, offsets, per_samp
     @pytest.mark.precommit
     @pytest.mark.parametrize("indicies_dtype", ["int", "int32"])
     @pytest.mark.parametrize("per_sample_weights", [True, False])
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_embedding_bag(self, ie_device, precision, ir_version, indicies_dtype, per_sample_weights):
         self._test(*self.create_model(per_sample_weights), ie_device, precision, ir_version,
                    kwargs_to_prepare_input={"indicies_dtype": indicies_dtype, "per_sample_weights": per_sample_weights}, 
@@ -85,6 +89,8 @@ def forward_per_sample_weights(self, indicies, weight, per_sample_wights):
     @pytest.mark.parametrize("indicies_size", [[1, 1], [2, 5], [3, 10], [4, 7]])
     @pytest.mark.parametrize("indicies_dtype", ["int", "int32"])
     @pytest.mark.parametrize("per_sample_weights", [True, False])
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_embedding_bag(self, ie_device, precision, ir_version, indicies_dtype, indicies_size, per_sample_weights):
         self._test(*self.create_model(per_sample_weights), ie_device, precision, ir_version,
                    kwargs_to_prepare_input={"indicies_size": indicies_size, "indicies_dtype": indicies_dtype, "per_sample_weights": per_sample_weights}, 
diff --git a/tests/layer_tests/pytorch_tests/test_empty.py b/tests/layer_tests/pytorch_tests/test_empty.py
index 7b5a4b92563dc5..c504e262038c87 100644
--- a/tests/layer_tests/pytorch_tests/test_empty.py
+++ b/tests/layer_tests/pytorch_tests/test_empty.py
@@ -134,7 +134,7 @@ def forward(self, input_tensor: torch.Tensor):
     @pytest.mark.precommit
     def test_new_empty(self, shape, input_dtype, ie_device, precision, ir_version):
         self._test(*self.create_model(shape), ie_device, precision, ir_version,
-                   kwargs_to_prepare_input={'input_dtype': input_dtype})
+                   kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True)
 
     @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]])
     @pytest.mark.parametrize("input_dtype", [bool, np.uint8, np.int8, np.int32, np.int64, np.float32, np.float64])
@@ -142,4 +142,4 @@ def test_new_empty(self, shape, input_dtype, ie_device, precision, ir_version):
     @pytest.mark.nightly
     def test_new_empty_with_dtype(self, shape, dtype, input_dtype, ie_device, precision, ir_version):
         self._test(*self.create_model(shape, dtype=dtype, used_dtype=True), ie_device, precision, ir_version,
-                   kwargs_to_prepare_input={'input_dtype': input_dtype})
+                   kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True)
diff --git a/tests/layer_tests/pytorch_tests/test_eq.py b/tests/layer_tests/pytorch_tests/test_eq.py
index 91c30df79fabb8..6fa5f4ecfba5eb 100644
--- a/tests/layer_tests/pytorch_tests/test_eq.py
+++ b/tests/layer_tests/pytorch_tests/test_eq.py
@@ -45,4 +45,4 @@ def test_eq_pt_spec(self, input_array, other_array, types, ie_device, precision,
         self.input_type = types[0]
         self.other_array = other_array
         self.other_type = types[1]
-        self._test(*self.create_model(), ie_device, precision, ir_version)
+        self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True)
diff --git a/tests/layer_tests/pytorch_tests/test_fake_quantize.py b/tests/layer_tests/pytorch_tests/test_fake_quantize.py
index 6bb1d6601cb43b..3146ac87b90087 100644
--- a/tests/layer_tests/pytorch_tests/test_fake_quantize.py
+++ b/tests/layer_tests/pytorch_tests/test_fake_quantize.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import torch
@@ -46,6 +48,8 @@ def forward(self, x):
             (1.0, 0, 0, 127),
         ],
     )
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_fake_quantize_per_tensor_affine(
         self, ie_device, precision, ir_version, scale, zero_point, quant_min, quant_max
     ):
@@ -96,6 +100,8 @@ def forward(self, x):
             (torch.tensor([-0.005, -0.7, 0.1]), torch.tensor([1, 0, 1], dtype=torch.int32), 0, 0, 255),
         ],
     )
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_fake_quantize_per_channel_affine(
         self, ie_device, precision, ir_version, scale, zero_point, axis, quant_min, quant_max
     ):
diff --git a/tests/layer_tests/pytorch_tests/test_floor_divide.py b/tests/layer_tests/pytorch_tests/test_floor_divide.py
index cd427acb3dba56..0cdc46333b2651 100644
--- a/tests/layer_tests/pytorch_tests/test_floor_divide.py
+++ b/tests/layer_tests/pytorch_tests/test_floor_divide.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 
@@ -52,10 +54,12 @@ def forward(self, input_tensor, other_tensor):
     ]))
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_floor_divide(self, input_tensor, other_tensor, ie_device, precision, ir_version):
         self.input_tensor = input_tensor
         self.other_tensor = other_tensor
-        self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True)
+        self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True, use_convert_model=True)
 
     @pytest.mark.parametrize('input_tensor', ([
         np.random.randint(low=0, high=10, size=5).astype(np.float32),
diff --git a/tests/layer_tests/pytorch_tests/test_fp16.py b/tests/layer_tests/pytorch_tests/test_fp16.py
index b754306727935c..5952f40f167c54 100644
--- a/tests/layer_tests/pytorch_tests/test_fp16.py
+++ b/tests/layer_tests/pytorch_tests/test_fp16.py
@@ -29,7 +29,7 @@ def forward(self, x):
     @pytest.mark.parametrize("to_trace", [True, False])
     def test_bf16(self, ie_device, precision, ir_version, to_trace):
         self._test(*self.create_model(), ie_device, precision,
-                   ir_version, trace_model=to_trace, freeze_model=False)
+                   ir_version, trace_model=to_trace, freeze_model=False, use_convert_model=True)
 
 
 class TestFP16(PytorchLayerTest):
@@ -53,4 +53,4 @@ def forward(self, x):
     @pytest.mark.parametrize("to_trace", [True, False])
     def test_fp16(self, ie_device, precision, ir_version, to_trace):
         self._test(*self.create_model(), ie_device, precision,
-                   ir_version, trace_model=to_trace, freeze_model=False)
+                   ir_version, trace_model=to_trace, freeze_model=False, use_convert_model=True)
diff --git a/tests/layer_tests/pytorch_tests/test_full.py b/tests/layer_tests/pytorch_tests/test_full.py
index c564b1bb3731b9..cf3794be11e891 100644
--- a/tests/layer_tests/pytorch_tests/test_full.py
+++ b/tests/layer_tests/pytorch_tests/test_full.py
@@ -1,5 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
+
+import platform
+
 import numpy as np
 import pytest
 
@@ -144,6 +147,8 @@ def forward(self, input_t: torch.Tensor, x:float):
     @pytest.mark.parametrize("mode", ["", "inplace", "out"])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_fill(self, shape, value, input_dtype, value_dtype, mode, ie_device, precision, ir_version):
         self._test(*self.create_model(mode), ie_device, precision, ir_version,
                    kwargs_to_prepare_input={
@@ -183,6 +188,8 @@ def forward(self, x:torch.Tensor, y:float):
     @pytest.mark.parametrize("wrap", [True, False])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_fill_diagonal(self, shape, value, input_dtype, value_dtype, wrap, ie_device, precision, ir_version):
         self._test(*self.create_model(shape, wrap), ie_device, precision, ir_version,
                    kwargs_to_prepare_input={
@@ -340,7 +347,7 @@ def forward(self, input_tensor: torch.Tensor, x: float):
     @pytest.mark.precommit
     def test_new_full(self, shape, value, input_dtype, ie_device, precision, ir_version):
         self._test(*self.create_model(shape), ie_device, precision, ir_version,
-                   kwargs_to_prepare_input={'value': value, 'input_dtype': input_dtype})
+                   kwargs_to_prepare_input={'value': value, 'input_dtype': input_dtype}, use_convert_model=True)
 
     @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]])
     @pytest.mark.parametrize("value,input_dtype", [(0, np.uint8), (1, np.int32), (-1, np.float32), (0.5, np.float64)])
@@ -348,7 +355,7 @@ def test_new_full(self, shape, value, input_dtype, ie_device, precision, ir_vers
     @pytest.mark.nightly
     def test_new_full_with_dtype(self, value, shape, dtype, input_dtype, ie_device, precision, ir_version):
         self._test(*self.create_model(shape, dtype=dtype, used_dtype=True), ie_device, precision, ir_version,
-                   kwargs_to_prepare_input={'value': value, 'input_dtype': input_dtype})
+                   kwargs_to_prepare_input={'value': value, 'input_dtype': input_dtype}, use_convert_model=True)
 
 
 class TestZerosAndOnes(PytorchLayerTest):
@@ -555,7 +562,7 @@ def forward(self, input_tensor: torch.Tensor):
     @pytest.mark.precommit
     def test_new_zeros(self, shape, input_dtype, ie_device, precision, ir_version):
         self._test(*self.create_model(shape), ie_device, precision, ir_version,
-                   kwargs_to_prepare_input={'input_dtype': input_dtype})
+                   kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True)
 
     @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]])
     @pytest.mark.parametrize("input_dtype", [bool, np.uint8, np.int8, np.int32, np.int64, np.float32, np.float64])
@@ -563,7 +570,7 @@ def test_new_zeros(self, shape, input_dtype, ie_device, precision, ir_version):
     @pytest.mark.nightly
     def test_new_zeros_with_dtype(self, shape, dtype, input_dtype, ie_device, precision, ir_version):
         self._test(*self.create_model(shape, dtype=dtype, used_dtype=True), ie_device, precision, ir_version,
-                   kwargs_to_prepare_input={'input_dtype': input_dtype})
+                   kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True)
 
 
 class TestNewOnes(PytorchLayerTest):
@@ -614,7 +621,7 @@ def forward(self, input_tensor: torch.Tensor):
     @pytest.mark.precommit
     def test_new_ones(self, shape, input_dtype, ie_device, precision, ir_version):
         self._test(*self.create_model(shape), ie_device, precision, ir_version,
-                   kwargs_to_prepare_input={'input_dtype': input_dtype})
+                   kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True)
 
     @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]])
     @pytest.mark.parametrize("input_dtype", [bool, np.uint8, np.int8, np.int32, np.int64, np.float32, np.float64])
@@ -622,4 +629,4 @@ def test_new_ones(self, shape, input_dtype, ie_device, precision, ir_version):
     @pytest.mark.nightly
     def test_new_ones_with_dtype(self, shape, dtype, input_dtype, ie_device, precision, ir_version):
         self._test(*self.create_model(shape, dtype=dtype, used_dtype=True), ie_device, precision, ir_version,
-                   kwargs_to_prepare_input={'input_dtype': input_dtype})
+                   kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True)
diff --git a/tests/layer_tests/pytorch_tests/test_getitem.py b/tests/layer_tests/pytorch_tests/test_getitem.py
index b827f626914125..62d7e12ada6e61 100644
--- a/tests/layer_tests/pytorch_tests/test_getitem.py
+++ b/tests/layer_tests/pytorch_tests/test_getitem.py
@@ -102,4 +102,4 @@ def _prepare_input(self):
     @pytest.mark.parametrize("idx", [-4, -3, -2, -1, 0, 1, 2, 3])
     def test_add_cat(self, ie_device, precision, ir_version, idx):
         self._test(aten_add_getitem(idx), None, ["aten::__getitem__", "aten::add", "prim::ListConstruct"],
-                   ie_device, precision, ir_version, freeze_model=False)
+                   ie_device, precision, ir_version, freeze_model=False, use_convert_model=True)
diff --git a/tests/layer_tests/pytorch_tests/test_grid_sampler.py b/tests/layer_tests/pytorch_tests/test_grid_sampler.py
index b142544c3b6e62..7b55862e2f0c2d 100644
--- a/tests/layer_tests/pytorch_tests/test_grid_sampler.py
+++ b/tests/layer_tests/pytorch_tests/test_grid_sampler.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 
 from pytorch_layer_test_class import PytorchLayerTest
@@ -35,6 +37,8 @@ def forward(self, input, grid):
     @pytest.mark.parametrize("align_corners", [True, False, None])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_grid_sampler(self, h_in, w_in, h_out, w_out, mode, padding_mode, align_corners, ie_device, precision, ir_version):
         self._test(*self.create_model(mode, padding_mode, align_corners), ie_device, precision, ir_version, kwargs_to_prepare_input={
             "h_in": h_in, "w_in": w_in, "h_out": h_out, "w_out": w_out
diff --git a/tests/layer_tests/pytorch_tests/test_if.py b/tests/layer_tests/pytorch_tests/test_if.py
index b4fd9470cc8be3..fa4b1e9967dd02 100644
--- a/tests/layer_tests/pytorch_tests/test_if.py
+++ b/tests/layer_tests/pytorch_tests/test_if.py
@@ -39,4 +39,4 @@ def forward(self, x, y):
     @pytest.mark.skipif(os.getenv("GITHUB_ACTIONS") == 'true', reason="Ticket - 114818")
     def test_if(self, y, ie_device, precision, ir_version):
         self.y = y
-        self._test(*self.create_model(), ie_device, precision, ir_version)
+        self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True)
diff --git a/tests/layer_tests/pytorch_tests/test_index.py b/tests/layer_tests/pytorch_tests/test_index.py
index 6f7cea86990d2c..4b4e53690f927e 100644
--- a/tests/layer_tests/pytorch_tests/test_index.py
+++ b/tests/layer_tests/pytorch_tests/test_index.py
@@ -150,4 +150,4 @@ def forward(self, x):
                                                [2, 2, 3, 4]))
     def test_index_mask(self, input_shape, ie_device, precision, ir_version):
         self._test(*self.create_model(), ie_device, precision, ir_version, kwargs_to_prepare_input={
-                   "input_shape": input_shape}, trace_model=True)
+                   "input_shape": input_shape}, trace_model=True, use_convert_model=True)
diff --git a/tests/layer_tests/pytorch_tests/test_index_put_.py b/tests/layer_tests/pytorch_tests/test_index_put_.py
index 55cbe39bd92d58..6f94a0912d4df2 100644
--- a/tests/layer_tests/pytorch_tests/test_index_put_.py
+++ b/tests/layer_tests/pytorch_tests/test_index_put_.py
@@ -162,7 +162,7 @@ def test_nonzero_index_put_(self, ie_device, precision, ir_version, input_data,
         self.values = input_data["values"]
         self.indices_0 = indices[0]
         self.indices_1 = indices[1]
-        self._test(*self.create_model(accumulate), ie_device, precision, ir_version, trace_model=True)
+        self._test(*self.create_model(accumulate), ie_device, precision, ir_version, trace_model=True, use_convert_model=True)
 
 class TestMask_IndexPut(PytorchLayerTest):
     def _prepare_input(self):
@@ -181,4 +181,4 @@ def forward(self, x, y):
     @pytest.mark.nightly
     @pytest.mark.precommit
     def test_nonzero_index_put_(self, ie_device, precision, ir_version):
-        self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True)
+        self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True, use_convert_model=True)
diff --git a/tests/layer_tests/pytorch_tests/test_instance_norm.py b/tests/layer_tests/pytorch_tests/test_instance_norm.py
index 2fe3f5e13e066a..3ec2dd0144573d 100644
--- a/tests/layer_tests/pytorch_tests/test_instance_norm.py
+++ b/tests/layer_tests/pytorch_tests/test_instance_norm.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 
 from pytorch_layer_test_class import PytorchLayerTest
@@ -60,6 +62,8 @@ def forward(self, x):
     ])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_group_norm(self, params, ie_device, precision, ir_version, kwargs_to_prepare_input):
         self._test(*self.create_model(**params),
                    ie_device, precision, ir_version, kwargs_to_prepare_input=kwargs_to_prepare_input, 
diff --git a/tests/layer_tests/pytorch_tests/test_len.py b/tests/layer_tests/pytorch_tests/test_len.py
index d6d3a7dc211564..7aa5f020b9c7fc 100644
--- a/tests/layer_tests/pytorch_tests/test_len.py
+++ b/tests/layer_tests/pytorch_tests/test_len.py
@@ -48,7 +48,7 @@ def test_len(self, ie_device, precision, ir_version, input_tensor):
     def test_len_int_list(self, ie_device, precision, ir_version, input_tensor):
         self.input_tensor = input_tensor
         self._test(*self.create_model_int_list(),
-                   ie_device, precision, ir_version)
+                   ie_device, precision, ir_version, use_convert_model=True)
 
 
 class TestLenEmpty(PytorchLayerTest):
diff --git a/tests/layer_tests/pytorch_tests/test_linspace.py b/tests/layer_tests/pytorch_tests/test_linspace.py
index aa6f70d3d71c89..4cf623e55fafad 100644
--- a/tests/layer_tests/pytorch_tests/test_linspace.py
+++ b/tests/layer_tests/pytorch_tests/test_linspace.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import torch
@@ -63,6 +65,8 @@ def forward(self, start, end, steps, d):
     @pytest.mark.parametrize(
         "start,end,steps", [(0, 1, 5), (-2, 1, 5), (1, -5, 7), (1, 10, 2), (-1, -5, 2), (-1, -5, 1), (1.25, -5.5, 5)]
     )
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_linspace_with_prim_dtype(self, dtype, end, start, steps, ie_device, precision, ir_version):
         self._test(
             *self.create_model(dtype, ref_dtype=True),
@@ -79,6 +83,8 @@ def test_linspace_with_prim_dtype(self, dtype, end, start, steps, ie_device, pre
         "start,end,steps", [(0, 1, 5), (-2, 1, 5), (1, -5, 7), (1, 10, 2), (-1, -5, 2), (-1, -5, 1), (1.25, -5.5, 5)]
     )
     @pytest.mark.parametrize("use_out", [False, True])
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_linspace_with_out(self, dtype, use_out, end, start, steps, ie_device, precision, ir_version):
         self._test(
             *self.create_model(dtype=dtype, use_out=use_out),
diff --git a/tests/layer_tests/pytorch_tests/test_listunpack.py b/tests/layer_tests/pytorch_tests/test_listunpack.py
index b001bccd2a97d1..39d72bfe54c6e9 100644
--- a/tests/layer_tests/pytorch_tests/test_listunpack.py
+++ b/tests/layer_tests/pytorch_tests/test_listunpack.py
@@ -123,6 +123,7 @@ def test_listconstruct_getitem_listunpack(
             ie_device,
             precision,
             ir_version,
+            use_convert_model=True,
         )
 
 class TestMeshgridListUnpack(PytorchLayerTest):
diff --git a/tests/layer_tests/pytorch_tests/test_mul.py b/tests/layer_tests/pytorch_tests/test_mul.py
index f22fd7b03bc0cb..c7aea00ee8ad89 100644
--- a/tests/layer_tests/pytorch_tests/test_mul.py
+++ b/tests/layer_tests/pytorch_tests/test_mul.py
@@ -37,7 +37,7 @@ def test_mul_pt_spec(self, input_array, other_array, ie_device, precision, ir_ve
         self.input_type = np.float32
         self.other_array = other_array
         self.other_type = np.float32
-        self._test(*self.create_model(), ie_device, precision, ir_version)
+        self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True)
 
 
 class TestMulTypes(PytorchLayerTest):
diff --git a/tests/layer_tests/pytorch_tests/test_native_multi_head_attention.py b/tests/layer_tests/pytorch_tests/test_native_multi_head_attention.py
index 41e737dba6221d..26b7cdbd14812b 100644
--- a/tests/layer_tests/pytorch_tests/test_native_multi_head_attention.py
+++ b/tests/layer_tests/pytorch_tests/test_native_multi_head_attention.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import torch
@@ -74,6 +76,8 @@ def _prepare_input(self):
         ["need_weights", "average_attn_weights"], 
         [[False, False], [True, False], [True, True]]
     )
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_native_multi_head_attention(self, ie_device, precision, ir_version, mask, need_weights, average_attn_weights):
         self._test(aten_native_multi_head_attention(mask, need_weights, average_attn_weights), 
                    None, "aten::_native_multi_head_attention", ie_device, precision, ir_version) 
diff --git a/tests/layer_tests/pytorch_tests/test_nms.py b/tests/layer_tests/pytorch_tests/test_nms.py
index ae09726a23b8f9..b703e98ccaffe9 100644
--- a/tests/layer_tests/pytorch_tests/test_nms.py
+++ b/tests/layer_tests/pytorch_tests/test_nms.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 from pytorch_layer_test_class import PytorchLayerTest
 import numpy as np
@@ -35,6 +37,8 @@ def forward(self, boxes, scores):
 
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_nms(self, ie_device, precision, ir_version, boxes_num):
         self.boxes_num = boxes_num
         self._test(*self.create_model(), ie_device, precision, ir_version)
diff --git a/tests/layer_tests/pytorch_tests/test_norm.py b/tests/layer_tests/pytorch_tests/test_norm.py
index aef0a074059950..9422c170401702 100644
--- a/tests/layer_tests/pytorch_tests/test_norm.py
+++ b/tests/layer_tests/pytorch_tests/test_norm.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import torch
@@ -245,6 +247,8 @@ def forward_out(self, x, y):
     @pytest.mark.parametrize("dtype", ["float32", "float64", None])
     @pytest.mark.parametrize("out", [True, False])
     @pytest.mark.parametrize("prim_dtype", [True, False])
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_linalg_matrix_norm(self, p, dim, keepdim, dtype, out, prim_dtype, ie_device, precision, ir_version):
         self._test(*self.create_model(p, dim, keepdim, dtype, out, prim_dtype),
                    ie_device, precision, ir_version,
diff --git a/tests/layer_tests/pytorch_tests/test_or.py b/tests/layer_tests/pytorch_tests/test_or.py
index c6592a11af083f..bde1e61ecce74d 100644
--- a/tests/layer_tests/pytorch_tests/test_or.py
+++ b/tests/layer_tests/pytorch_tests/test_or.py
@@ -25,4 +25,5 @@ def forward(self, x):
     @pytest.mark.nightly
     @pytest.mark.precommit
     def test_or(self, ie_device, precision, ir_version):
-        self._test(*self.create_model(), ie_device, precision, ir_version, dynamic_shapes=False, trace_model=True)
+        self._test(*self.create_model(), ie_device, precision, ir_version,
+                   dynamic_shapes=False, trace_model=True, use_convert_model=True)
diff --git a/tests/layer_tests/pytorch_tests/test_pooling.py b/tests/layer_tests/pytorch_tests/test_pooling.py
index 3f4c94db6d45d0..f54902282ece1b 100644
--- a/tests/layer_tests/pytorch_tests/test_pooling.py
+++ b/tests/layer_tests/pytorch_tests/test_pooling.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 
 from pytorch_layer_test_class import PytorchLayerTest
@@ -133,6 +135,8 @@ def forward(self, x):
     @pytest.mark.parametrize("count_include_pad", [True, False])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_avg_pool1d(self, params, ceil_mode, count_include_pad, ie_device, precision, ir_version):
         self._test(*self.create_model("avg_pool1d", **params, ceil_mode=ceil_mode, count_include_pad=count_include_pad),
                    ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 3}, trace_model=True,
@@ -151,6 +155,8 @@ def test_avg_pool1d(self, params, ceil_mode, count_include_pad, ie_device, preci
     @pytest.mark.parametrize("count_include_pad", [True, False])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_avg_pool2d(self, params, ceil_mode, count_include_pad, ie_device, precision, ir_version):
         self._test(*self.create_model("avg_pool2d", **params, ceil_mode=ceil_mode, count_include_pad=count_include_pad),
                    ie_device, precision, ir_version, trace_model=True, dynamic_shapes=False)
@@ -160,6 +166,8 @@ def test_avg_pool2d(self, params, ceil_mode, count_include_pad, ie_device, preci
     @pytest.mark.parametrize("count_include_pad", [True, False])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_avg_pool3d(self, params, ceil_mode, count_include_pad, ie_device, precision, ir_version):
         self._test(*self.create_model("avg_pool3d", **params, ceil_mode=ceil_mode, count_include_pad=count_include_pad),
                    ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 5}, trace_model=True,
@@ -170,6 +178,8 @@ def test_avg_pool3d(self, params, ceil_mode, count_include_pad, ie_device, preci
     @pytest.mark.parametrize("dilation", [1, 2])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_max_pool1d(self, params, ceil_mode, dilation, ie_device, precision, ir_version):
         self._test(*self.create_model("max_pool1d", **params, ceil_mode=ceil_mode, dilation=dilation),
                    ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 3}, dynamic_shapes=False)
@@ -179,6 +189,8 @@ def test_max_pool1d(self, params, ceil_mode, dilation, ie_device, precision, ir_
     @pytest.mark.parametrize("dilation", [1, 2])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_max_pool2d(self, params, ceil_mode, dilation,  ie_device, precision, ir_version):
         to_trace = False
         if params["stride"] == []:
@@ -191,6 +203,8 @@ def test_max_pool2d(self, params, ceil_mode, dilation,  ie_device, precision, ir
     @pytest.mark.parametrize("dilation", [1, 2])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_max_pool3d(self, params, ceil_mode, dilation, ie_device, precision, ir_version):
         self._test(*self.create_model("max_pool3d", **params, ceil_mode=ceil_mode, dilation=dilation),
                    ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 5}, dynamic_shapes=False)
@@ -200,6 +214,8 @@ def test_max_pool3d(self, params, ceil_mode, dilation, ie_device, precision, ir_
     @pytest.mark.parametrize("dilation", [1, 2])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_max_pool1d_indices(self, params, ceil_mode, dilation, ie_device, precision, ir_version):
         if ceil_mode and (np.array(params["padding"]).any() != 0):
             pytest.skip("ticket 122418")
@@ -211,6 +227,8 @@ def test_max_pool1d_indices(self, params, ceil_mode, dilation, ie_device, precis
     @pytest.mark.parametrize("dilation", [1, 2])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_max_pool2d_indices(self, params, ceil_mode, dilation,  ie_device, precision, ir_version):
         if ceil_mode and (np.array(params["padding"]).any() != 0):
             pytest.skip("ticket 122418")
@@ -225,6 +243,8 @@ def test_max_pool2d_indices(self, params, ceil_mode, dilation,  ie_device, preci
     @pytest.mark.parametrize("dilation", [1, 2])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_max_pool3d_indices(self, params, ceil_mode, dilation, ie_device, precision, ir_version):
         if ceil_mode and (np.array(params["padding"]).any() != 0):
             pytest.skip("ticket 122418")
diff --git a/tests/layer_tests/pytorch_tests/test_pow.py b/tests/layer_tests/pytorch_tests/test_pow.py
index 9cf6468404e5d3..92e65898e353eb 100644
--- a/tests/layer_tests/pytorch_tests/test_pow.py
+++ b/tests/layer_tests/pytorch_tests/test_pow.py
@@ -41,7 +41,7 @@ def forward(self, input_data, exponent):
     @pytest.mark.precommit
     def test_pow(self, ie_device, precision, ir_version, test_input):
         self.test_input = test_input
-        self._test(*self.create_model(), ie_device, precision, ir_version)
+        self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True)
 
 
 class TestPowMixedTypes(PytorchLayerTest):
diff --git a/tests/layer_tests/pytorch_tests/test_quantize.py b/tests/layer_tests/pytorch_tests/test_quantize.py
index f1a7522159090e..600821fa16204c 100644
--- a/tests/layer_tests/pytorch_tests/test_quantize.py
+++ b/tests/layer_tests/pytorch_tests/test_quantize.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import torch
@@ -49,6 +51,8 @@ def _prepare_input(self):
     ])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_quantize_per_tensor_dequantize(self, scale, zero_point, dtype, ie_device, precision, ir_version):
         if dtype == torch.quint8: zero_point = abs(zero_point)
         self._test(aten_quantize_per_tensor_aten_dequantize(scale, zero_point, dtype), None, ["aten::quantize_per_tensor", "aten::dequantize"], 
@@ -88,6 +92,8 @@ def _prepare_input(self):
     ])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_quantize_per_channel_dequantize(self, scale, zero_point, dtype, axis, ie_device, precision, ir_version):
         np.random.shuffle(scale), np.random.shuffle(zero_point)
         if dtype == torch.quint8: zero_point = abs(zero_point)
diff --git a/tests/layer_tests/pytorch_tests/test_quantized_add.py b/tests/layer_tests/pytorch_tests/test_quantized_add.py
index 960d3b4cca7aef..59a992fc088d5a 100644
--- a/tests/layer_tests/pytorch_tests/test_quantized_add.py
+++ b/tests/layer_tests/pytorch_tests/test_quantized_add.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import torch
@@ -38,6 +40,8 @@ def _prepare_input(self):
     ])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_quantized_add(self, scale, zero_point, dtype, ie_device, precision, ir_version):
         if dtype == torch.quint8: zero_point = abs(zero_point)
         self._test(quantized_add(scale, zero_point, dtype), None, ["quantized::add"],
diff --git a/tests/layer_tests/pytorch_tests/test_quantized_add_relu.py b/tests/layer_tests/pytorch_tests/test_quantized_add_relu.py
index 4a0dd797e3525c..6cb64dfab053d6 100644
--- a/tests/layer_tests/pytorch_tests/test_quantized_add_relu.py
+++ b/tests/layer_tests/pytorch_tests/test_quantized_add_relu.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import torch
@@ -38,6 +40,8 @@ def _prepare_input(self):
     ])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_quantized_add_relu(self, scale, zero_point, dtype, ie_device, precision, ir_version):
         if dtype == torch.quint8: zero_point = abs(zero_point)
         self._test(quantized_add_relu(scale, zero_point, dtype), None, ["quantized::add_relu"], 
diff --git a/tests/layer_tests/pytorch_tests/test_quantized_cat.py b/tests/layer_tests/pytorch_tests/test_quantized_cat.py
index db6e5278bb5c50..ce0bc880e78f66 100644
--- a/tests/layer_tests/pytorch_tests/test_quantized_cat.py
+++ b/tests/layer_tests/pytorch_tests/test_quantized_cat.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import torch
@@ -73,6 +75,8 @@ def _prepare_input(self):
     @pytest.mark.parametrize("dtype", [torch.quint8, torch.qint8])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_quantized_cat(self, scale, zero_point, dtype, ie_device, precision, ir_version):
         self._test(
             aten_quantized_cat(scale, zero_point, dtype),
@@ -91,6 +95,8 @@ def test_quantized_cat(self, scale, zero_point, dtype, ie_device, precision, ir_
     @pytest.mark.parametrize("dtype", [torch.quint8, torch.qint8])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_append_quantized_cat(self, scale, zero_point, dtype, ie_device, precision, ir_version):
         self._test(
             aten_append_quantized_cat(scale, zero_point, dtype),
@@ -130,6 +136,8 @@ def test_loop_append_quantized_cat(self, scale, zero_point, dtype, ie_device, pr
     @pytest.mark.parametrize("dtype", [torch.quint8, torch.qint8])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_add_quantized_cat(self, scale, zero_point, dtype, ie_device, precision, ir_version):
         self._test(
             aten_add_quantized_cat(scale, zero_point, dtype),
diff --git a/tests/layer_tests/pytorch_tests/test_quantized_convnd.py b/tests/layer_tests/pytorch_tests/test_quantized_convnd.py
index cf3ec0142cf46b..bc4ac9e1788b34 100644
--- a/tests/layer_tests/pytorch_tests/test_quantized_convnd.py
+++ b/tests/layer_tests/pytorch_tests/test_quantized_convnd.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 import numpy as np
 import torch
@@ -78,6 +80,8 @@ def forward(self, x):
     @pytest.mark.parametrize("zero_point", [0, 1])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_quantized_conv2d(self, params, bias, relu, scale, zero_point, ie_device, precision, ir_version):
         self._test(
             *self.create_model(**params, bias=bias, relu=relu,
diff --git a/tests/layer_tests/pytorch_tests/test_quantized_hardswish.py b/tests/layer_tests/pytorch_tests/test_quantized_hardswish.py
index a0b40783c4e98d..4508bbcb266ab6 100644
--- a/tests/layer_tests/pytorch_tests/test_quantized_hardswish.py
+++ b/tests/layer_tests/pytorch_tests/test_quantized_hardswish.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import torch
@@ -36,6 +38,8 @@ def _prepare_input(self):
     ])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_quantized_hardswish(self, scale, zero_point, dtype, ie_device, precision, ir_version):
         if dtype == torch.quint8: zero_point = abs(zero_point)
         self._test(quantized_hardswish(scale, zero_point, dtype), None, ["quantized::hardswish"], 
diff --git a/tests/layer_tests/pytorch_tests/test_quantized_linear.py b/tests/layer_tests/pytorch_tests/test_quantized_linear.py
index 1ded932f234055..bd89ea48303f25 100644
--- a/tests/layer_tests/pytorch_tests/test_quantized_linear.py
+++ b/tests/layer_tests/pytorch_tests/test_quantized_linear.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 import torch
 import numpy as np
@@ -73,6 +75,8 @@ def forward(self, inp):
     @pytest.mark.parametrize("trace", [True, False])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_quantized_linear(self, params, scale, zero_point, trace, ie_device, precision, ir_version):
         input_shape = params.get("input_shape")
         weight_shape = params.get("weight_shape")
@@ -84,6 +88,8 @@ def test_quantized_linear(self, params, scale, zero_point, trace, ie_device, pre
     @pytest.mark.parametrize("inplace", [True, False])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_quantized_hardtanh_linear(self, trace, inplace, ie_device, precision, ir_version):
         self._test(*self.create_hardtanh_model([10, 9], True, 1, 0.3, inplace), ie_device, precision, ir_version,
                    kwargs_to_prepare_input={"input_shape": [2, 3, 9]}, trace_model=trace, freeze_model=False, quantized_ops=True, quant_size=0.3)
diff --git a/tests/layer_tests/pytorch_tests/test_quantized_mul.py b/tests/layer_tests/pytorch_tests/test_quantized_mul.py
index cc877daa919b5b..d170d70308b6a5 100644
--- a/tests/layer_tests/pytorch_tests/test_quantized_mul.py
+++ b/tests/layer_tests/pytorch_tests/test_quantized_mul.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import torch
@@ -38,6 +40,8 @@ def _prepare_input(self):
     ])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_quantized_mul(self, scale, zero_point, dtype, ie_device, precision, ir_version):
         if dtype == torch.quint8: zero_point = abs(zero_point)
         self._test(quantized_mul(scale, zero_point, dtype), None, ["quantized::mul"], 
diff --git a/tests/layer_tests/pytorch_tests/test_remainder.py b/tests/layer_tests/pytorch_tests/test_remainder.py
index 4a499e85a37870..05ad2d3b1387fe 100644
--- a/tests/layer_tests/pytorch_tests/test_remainder.py
+++ b/tests/layer_tests/pytorch_tests/test_remainder.py
@@ -32,7 +32,7 @@ def forward(self, lhs, rhs):
     @pytest.mark.precommit
     def test_remainder(self, ie_device, precision, ir_version, input_rhs):
         self.input_rhs = input_rhs
-        self._test(*self.create_model(), ie_device, precision, ir_version)
+        self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True)
 
 
 class TestRemainderTypes(PytorchLayerTest):
diff --git a/tests/layer_tests/pytorch_tests/test_repeat.py b/tests/layer_tests/pytorch_tests/test_repeat.py
index 45263366c76c54..884a51e2a24f6f 100644
--- a/tests/layer_tests/pytorch_tests/test_repeat.py
+++ b/tests/layer_tests/pytorch_tests/test_repeat.py
@@ -77,4 +77,4 @@ def forward(self, x):
     @pytest.mark.nightly
     @pytest.mark.precommit
     def test_repeat_t5(self, ie_device, precision, ir_version):
-        self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True)
+        self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True, use_convert_model=True)
diff --git a/tests/layer_tests/pytorch_tests/test_rsub.py b/tests/layer_tests/pytorch_tests/test_rsub.py
index 64c4b9619d7b73..9c144ad4da247b 100644
--- a/tests/layer_tests/pytorch_tests/test_rsub.py
+++ b/tests/layer_tests/pytorch_tests/test_rsub.py
@@ -40,9 +40,9 @@ def forward(self, x, y:int, alpha: float):
     
     @pytest.mark.nightly
     @pytest.mark.precommit
-    def test_rsub(self, ie_device, precision, ir_version, input_data):
+    def test_rsub_f(self, ie_device, precision, ir_version, input_data):
         self.input_data = input_data
-        self._test(*self.create_model(second_type="float"), ie_device, precision, ir_version)
+        self._test(*self.create_model(second_type="float"), ie_device, precision, ir_version, use_convert_model=True)
 
     @pytest.mark.parametrize('input_data', [(np.random.randn(2, 3, 4).astype(np.float32),
                                              np.array(5).astype(int),
@@ -50,9 +50,9 @@ def test_rsub(self, ie_device, precision, ir_version, input_data):
     
     @pytest.mark.nightly
     @pytest.mark.precommit
-    def test_rsub(self, ie_device, precision, ir_version, input_data):
+    def test_rsub_i(self, ie_device, precision, ir_version, input_data):
         self.input_data = input_data
-        self._test(*self.create_model(second_type="int"), ie_device, precision, ir_version)
+        self._test(*self.create_model(second_type="int"), ie_device, precision, ir_version, use_convert_model=True)
 
 
 class TestRsubTypes(PytorchLayerTest):
diff --git a/tests/layer_tests/pytorch_tests/test_scaled_dot_product_attention.py b/tests/layer_tests/pytorch_tests/test_scaled_dot_product_attention.py
index 22ed325471823b..69c600a0b7562d 100644
--- a/tests/layer_tests/pytorch_tests/test_scaled_dot_product_attention.py
+++ b/tests/layer_tests/pytorch_tests/test_scaled_dot_product_attention.py
@@ -36,6 +36,7 @@ def forward(self, query, key, value):
 
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.precommit_fx_backend
     @pytest.mark.parametrize(['mask', "is_causal"], [(False, False), (False, True), (True, True), (True, False)])
     def test_scaled_dot_product_atten(self, ie_device, precision, ir_version, mask, is_causal):
         self._test(*self.create_model(mask, is_causal),ie_device, precision, ir_version)
diff --git a/tests/layer_tests/pytorch_tests/test_strided_const.py b/tests/layer_tests/pytorch_tests/test_strided_const.py
index 438edbc88e24fe..ab33e92f88b4b8 100644
--- a/tests/layer_tests/pytorch_tests/test_strided_const.py
+++ b/tests/layer_tests/pytorch_tests/test_strided_const.py
@@ -31,4 +31,4 @@ def forward(self, x):
     @pytest.mark.nightly
     @pytest.mark.precommit
     def test_strides(self, ie_device, precision, ir_version):
-        self._test(*self.create_model(), ie_device, precision, ir_version)
+        self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True)
diff --git a/tests/layer_tests/pytorch_tests/test_sub.py b/tests/layer_tests/pytorch_tests/test_sub.py
index aa97b0f23653fb..381d1672454cbe 100644
--- a/tests/layer_tests/pytorch_tests/test_sub.py
+++ b/tests/layer_tests/pytorch_tests/test_sub.py
@@ -50,7 +50,7 @@ def _forward_inplace(self, x, y, alpha: float):
     @pytest.mark.precommit
     def test_sub(self, ie_device, precision, ir_version, input_data, inplace):
         self.input_data = input_data
-        self._test(*self.create_model(inplace), ie_device, precision, ir_version)
+        self._test(*self.create_model(inplace), ie_device, precision, ir_version, use_convert_model=True)
 
 
 class TestSubTypes(PytorchLayerTest):
diff --git a/tests/layer_tests/pytorch_tests/test_transpose.py b/tests/layer_tests/pytorch_tests/test_transpose.py
index b3378761da8c74..5dec55ec59c597 100644
--- a/tests/layer_tests/pytorch_tests/test_transpose.py
+++ b/tests/layer_tests/pytorch_tests/test_transpose.py
@@ -91,4 +91,5 @@ def test_t_small(self, num_dims, input_dtype, mode, ie_device, precision, ir_ver
             precision,
             ir_version,
             kwargs_to_prepare_input={"num_dims": num_dims, "input_dtype": input_dtype},
+            use_convert_model=True,
         )
diff --git a/tests/layer_tests/pytorch_tests/test_tuple_construct.py b/tests/layer_tests/pytorch_tests/test_tuple_construct.py
index 1582df48c4b370..45413a940f132b 100644
--- a/tests/layer_tests/pytorch_tests/test_tuple_construct.py
+++ b/tests/layer_tests/pytorch_tests/test_tuple_construct.py
@@ -60,7 +60,7 @@ def forward(self, x):
     @pytest.mark.parametrize("case", ["single", "multiple", "none", "list", "tensor_tail", "list_and_tuple"])
     @pytest.mark.nightly
     def test_tuple_construct(self, case, ie_device, precision, ir_version):
-        self._test(*self.create_model(case), ie_device, precision, ir_version)
+        self._test(*self.create_model(case), ie_device, precision, ir_version, use_convert_model=True)
 
 
 class TestTupleConstructTupleUnpack(PytorchLayerTest):
@@ -86,7 +86,7 @@ def prepare_input(self, x):
     @pytest.mark.nightly
     def test_tuple_construct_unpack(self, ie_device, precision, ir_version):
         self._test(*self.create_model(), ie_device,
-                   precision, ir_version, freeze_model=False)
+                   precision, ir_version, freeze_model=False, use_convert_model=True)
 
 
 class TestTupleUnpackParameterSingle(PytorchLayerTest):
@@ -208,7 +208,7 @@ def some_func(self, x: Tuple[torch.Tensor, torch.Tensor]):
     @pytest.mark.nightly
     def test(self, ie_device, precision, ir_version):
         self._test(*self.create_model(), ie_device, precision,
-                   ir_version, trace_model=False, freeze_model=False)
+                   ir_version, trace_model=False, freeze_model=False, use_convert_model=True)
 
 
 class TestTcOutsideTuInsideIfBody(PytorchLayerTest):
@@ -236,4 +236,4 @@ def some_func(self, x: Tuple[torch.Tensor, torch.Tensor]):
     @pytest.mark.nightly
     def test(self, ie_device, precision, ir_version):
         self._test(*self.create_model(), ie_device, precision,
-                   ir_version, trace_model=False, freeze_model=False)
+                   ir_version, trace_model=False, freeze_model=False, use_convert_model=True)
diff --git a/tests/layer_tests/pytorch_tests/test_var_mean.py b/tests/layer_tests/pytorch_tests/test_var_mean.py
index 6ce85988e9edfb..bd8a5a10617eb4 100644
--- a/tests/layer_tests/pytorch_tests/test_var_mean.py
+++ b/tests/layer_tests/pytorch_tests/test_var_mean.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 
 from pytorch_layer_test_class import PytorchLayerTest
@@ -52,6 +54,8 @@ def forward(self, x):
     @pytest.mark.precommit
     @pytest.mark.parametrize("unbiased", [True, False])
     @pytest.mark.parametrize("op_type", ["var", "var_mean", "std", "std_mean"])
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_var2args(self, unbiased, op_type, ie_device, precision, ir_version):
         self._test(*self.create_model(unbiased, op_type=op_type), ie_device, precision, ir_version)
 
@@ -61,5 +65,7 @@ def test_var2args(self, unbiased, op_type, ie_device, precision, ir_version):
     @pytest.mark.parametrize("dim", [None, 0, 1, 2, 3, -1, -2, (0, 1), (-1, -2), (0, 1, -1), (0, 1, 2, 3)])
     @pytest.mark.parametrize("keepdim", [True, False])
     @pytest.mark.parametrize("op_type", ["var", "var_mean", "std", "std_mean"])
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122715')
     def test_var(self, unbiased, dim, keepdim, op_type, ie_device, precision, ir_version):
         self._test(*self.create_model(unbiased, dim, keepdim, two_args_case=False, op_type=op_type), ie_device, precision, ir_version)
\ No newline at end of file
diff --git a/tests/layer_tests/tensorflow_lite_tests/test_tfl_BroadcastTo.py b/tests/layer_tests/tensorflow_lite_tests/test_tfl_BroadcastTo.py
index bae3f51ce97ff0..6f3eb1b70ed2f2 100644
--- a/tests/layer_tests/tensorflow_lite_tests/test_tfl_BroadcastTo.py
+++ b/tests/layer_tests/tensorflow_lite_tests/test_tfl_BroadcastTo.py
@@ -1,3 +1,5 @@
+import platform
+
 import pytest
 import tensorflow as tf
 
@@ -29,5 +31,7 @@ def make_model(self, params):
 
     @pytest.mark.parametrize("params", test_params)
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 123324')
     def test_broadcast_to(self, params, ie_device, precision, temp_dir):
         self._test(ie_device, precision, temp_dir, params)
diff --git a/tests/layer_tests/tensorflow_lite_tests/test_tfl_RFFT2D.py b/tests/layer_tests/tensorflow_lite_tests/test_tfl_RFFT2D.py
index b534878970ac59..1ae3464c207b34 100644
--- a/tests/layer_tests/tensorflow_lite_tests/test_tfl_RFFT2D.py
+++ b/tests/layer_tests/tensorflow_lite_tests/test_tfl_RFFT2D.py
@@ -1,3 +1,5 @@
+import platform
+
 import pytest
 import tensorflow as tf
 
@@ -30,5 +32,7 @@ def make_model(self, params):
 
     @pytest.mark.parametrize("params", test_params)
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 123324')
     def test_rfft2d(self, params, ie_device, precision, temp_dir):
         self._test(ie_device, precision, temp_dir, params)
diff --git a/tests/layer_tests/tensorflow_lite_tests/test_tfl_SegmentSum.py b/tests/layer_tests/tensorflow_lite_tests/test_tfl_SegmentSum.py
index a5ce2d314aee0b..c7339efaf7f55e 100644
--- a/tests/layer_tests/tensorflow_lite_tests/test_tfl_SegmentSum.py
+++ b/tests/layer_tests/tensorflow_lite_tests/test_tfl_SegmentSum.py
@@ -1,3 +1,5 @@
+import platform
+
 import pytest
 import tensorflow as tf
 
@@ -40,5 +42,7 @@ def make_model(self, params):
 
     @pytest.mark.parametrize("params", test_params)
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 123324')
     def test_segment_sum(self, params, ie_device, precision, temp_dir):
         self._test(ie_device, precision, temp_dir, params)
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_AdjustContrastv2.py b/tests/layer_tests/tensorflow_tests/test_tf_AdjustContrastv2.py
index 88944c50a38091..896e1789111eaa 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_AdjustContrastv2.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_AdjustContrastv2.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -38,6 +40,8 @@ def create_adjust_contrast_net(self, input_shape, input_type):
     @pytest.mark.parametrize("params", test_data_basic)
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_adjust_contrast_basic(self, params, ie_device, precision, ir_version, temp_dir,
                                    use_new_frontend, use_old_api):
         self._test(*self.create_adjust_contrast_net(**params),
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py b/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py
index 62689f5609cc12..7e3964e68c9c35 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 
@@ -126,6 +128,8 @@ def create_add_placeholder_const_net(self, x_shape, y_shape, ir_version, op_type
                               'Xdivy'])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_binary_op(self, params, ie_device, precision, ir_version, temp_dir, op_type,
                        use_new_frontend, use_old_api):
         if precision == "FP16":
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py b/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py
index d981b2997542b5..8ab60f9ac65beb 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -40,6 +42,8 @@ def create_bucketize_net(self, input_shape, input_type, boundaries_size):
     @pytest.mark.parametrize("params", test_data_basic)
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_bucketize_basic(self, params, ie_device, precision, ir_version, temp_dir,
                              use_new_frontend, use_old_api):
         self._test(*self.create_bucketize_net(**params),
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_CropAndResize.py b/tests/layer_tests/tensorflow_tests/test_tf_CropAndResize.py
index 92ef18ff5aba98..30cefc07c942d2 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_CropAndResize.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_CropAndResize.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -53,6 +55,8 @@ def create_crop_and_resize_net(self, image_shape, num_boxes, crop_size_value, me
     @pytest.mark.parametrize("params", test_data_basic)
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_crop_and_resize_basic(self, params, ie_device, precision, ir_version, temp_dir,
                                    use_new_frontend, use_old_api):
         self._test(*self.create_crop_and_resize_net(**params),
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_DivNoNan.py b/tests/layer_tests/tensorflow_tests/test_tf_DivNoNan.py
index 58db73ece154e1..5a6f3883185f23 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_DivNoNan.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_DivNoNan.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -41,6 +43,8 @@ def create_div_no_nan_net(self, input_shape, input_type):
     @pytest.mark.parametrize("params", test_data_basic)
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_div_no_nan_basic(self, params, ie_device, precision, ir_version, temp_dir,
                               use_new_frontend, use_old_api):
         self._test(*self.create_div_no_nan_net(**params),
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_FakeQuantWithMinMaxVars.py b/tests/layer_tests/tensorflow_tests/test_tf_FakeQuantWithMinMaxVars.py
index 43d8da8e38019d..191b46e035a376 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_FakeQuantWithMinMaxVars.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_FakeQuantWithMinMaxVars.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -42,6 +44,8 @@ def create_fake_quant_with_min_max_vars_net(self, inputs_shape, min_value, max_v
     ])
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_fake_quant_with_min_max_vars_basic(self, params, fake_quant_op, ie_device, precision, ir_version, temp_dir,
                                                 use_new_frontend,
                                                 use_old_api):
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_If.py b/tests/layer_tests/tensorflow_tests/test_tf_If.py
index 0e4e7a6fb249e5..20085e6ac86672 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_If.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_If.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -67,6 +69,8 @@ def else_branch():
     @pytest.mark.parametrize("params", test_data_basic)
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_if_basic(self, params, ie_device, precision, ir_version, temp_dir,
                       use_new_frontend, use_old_api):
         if ie_device == 'GPU':
@@ -137,6 +141,8 @@ def else_branch():
     @pytest.mark.parametrize("params", test_data_basic)
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_if_basic(self, params, ie_device, precision, ir_version, temp_dir,
                       use_new_frontend, use_old_api):
         if ie_device == 'GPU':
@@ -215,6 +221,8 @@ def else_branch():
     @pytest.mark.parametrize("params", test_data_basic)
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_if_basic(self, params, ie_device, precision, ir_version, temp_dir,
                       use_new_frontend, use_old_api):
         if ie_device == 'GPU':
@@ -305,6 +313,8 @@ def else_branch():
     @pytest.mark.parametrize("params", test_data_basic)
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_if_basic(self, params, ie_device, precision, ir_version, temp_dir,
                       use_new_frontend, use_old_api):
         if ie_device == 'GPU':
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_LeakyRelu.py b/tests/layer_tests/tensorflow_tests/test_tf_LeakyRelu.py
index 1504ae706a9b19..ea672ac144d987 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_LeakyRelu.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_LeakyRelu.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 import tensorflow as tf
 from common.tf_layer_test_class import CommonTFLayerTest
@@ -31,6 +33,8 @@ def create_leaky_relu_net(self, x_shape, alpha_value):
     @pytest.mark.parametrize("params", test_data_basic)
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_leaky_relu_basic(self, params, ie_device, precision, ir_version, temp_dir,
                               use_new_frontend, use_old_api):
         self._test(*self.create_leaky_relu_net(**params),
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_LinSpace.py b/tests/layer_tests/tensorflow_tests/test_tf_LinSpace.py
index c696eaaa0355e5..216fe7b7816de4 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_LinSpace.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_LinSpace.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 import tensorflow as tf
 from common.tf_layer_test_class import CommonTFLayerTest
@@ -28,6 +30,8 @@ def create_lin_space_net(self, num_value):
     @pytest.mark.parametrize("params", test_data_basic)
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_lin_space_basic(self, params, ie_device, precision, ir_version, temp_dir,
                              use_new_frontend, use_old_api):
         self._test(*self.create_lin_space_net(**params),
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_LogSoftmax.py b/tests/layer_tests/tensorflow_tests/test_tf_LogSoftmax.py
index bef52905aa3159..063e310dd8174a 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_LogSoftmax.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_LogSoftmax.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -39,6 +41,8 @@ def create_log_softmax_net(self, logits_shape):
     @pytest.mark.precommit
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_log_softmax_basic(self, params, ie_device, precision, ir_version, temp_dir,
                                use_new_frontend, use_old_api):
         self._test(*self.create_log_softmax_net(**params),
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_MaxPoolWithArgmax.py b/tests/layer_tests/tensorflow_tests/test_tf_MaxPoolWithArgmax.py
index 4d1fed5747ba11..f08995f3c09d11 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_MaxPoolWithArgmax.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_MaxPoolWithArgmax.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -59,6 +61,8 @@ def create_max_pool_with_argmax_net(self, input_shape, ksize, strides, input_typ
     ])
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_max_pool_with_argmax_basic(self, params, input_type, padding, targmax,
                                         include_batch_in_index, with_second_output,
                                         ie_device, precision, ir_version, temp_dir,
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_NormalizeL2.py b/tests/layer_tests/tensorflow_tests/test_tf_NormalizeL2.py
index 51a1b322af6541..5de76778d1d837 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_NormalizeL2.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_NormalizeL2.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 from common.tf_layer_test_class import CommonTFLayerTest
 
@@ -30,6 +32,8 @@ def create_normalize_l2_net(shape, axes):
     @pytest.mark.precommit
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_normalize_l2_basic(self, params, ie_device, precision, ir_version, temp_dir,
                                 use_new_frontend, use_old_api):
         self._test(*self.create_normalize_l2_net(**params),
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py b/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py
index eb3ac133b3687d..7c523740d79f96 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 from common.layer_test_class import check_ir_version
 from common.tf_layer_test_class import CommonTFLayerTest
@@ -145,6 +147,8 @@ def create_pooling_net(self, kernel_size, strides, pads, in_shape, out_shape, me
 
     @pytest.mark.parametrize("params", test_data_4D)
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_pool_4D(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend,
                      use_old_api):
         self._test(*self.create_pooling_net(**params, ir_version=ir_version,
@@ -227,6 +231,8 @@ def test_pool_4D(self, params, ie_device, precision, ir_version, temp_dir, use_n
 
     @pytest.mark.parametrize("params", test_data_5D)
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_pool_5D(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend,
                      use_old_api):
         if ie_device == 'GPU':
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_RandomUniform.py b/tests/layer_tests/tensorflow_tests/test_tf_RandomUniform.py
index 0006afd9ab9eca..1f5f778db3ac2f 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_RandomUniform.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_RandomUniform.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 import tensorflow as tf
 from common.layer_test_class import check_ir_version
@@ -88,6 +90,8 @@ def create_tf_random_uniform_net(self, global_seed, op_seed, x_shape, min_val, m
     @pytest.mark.nightly
     @pytest.mark.precommit
     @pytest.mark.precommit_tf_fe
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_random_uniform_basic(self, params, ie_device, precision, ir_version, temp_dir,
                                   use_new_frontend, use_old_api):
         if ie_device == 'GPU':
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Resize.py b/tests/layer_tests/tensorflow_tests/test_tf_Resize.py
index 184a8115772128..c62492c7a76196 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_Resize.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_Resize.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -60,6 +62,8 @@ def create_resize_net(self, images_shape, images_type, size_value, align_corners
     @pytest.mark.parametrize("params", test_data_basic)
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_resize_basic(self, params, ie_device, precision, ir_version, temp_dir,
                           use_new_frontend, use_old_api):
         self._test(*self.create_resize_net(**params),
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_ScatterND.py b/tests/layer_tests/tensorflow_tests/test_tf_ScatterND.py
index dac986b96c281e..26ddcfdd53bcc2 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_ScatterND.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_ScatterND.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 
 from common.tf_layer_test_class import CommonTFLayerTest
@@ -69,6 +71,8 @@ def create_tf_scatternd_placeholder_const_net(self, x_shape, indices, updates, i
 
     @pytest.mark.parametrize("params", test_data)
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_tf_scatter_nd(self, params, ie_device, precision, ir_version, temp_dir,
                            use_new_frontend, use_old_api):
         self._test(*self.create_tf_scatternd_placeholder_const_net(**params, ir_version=ir_version,
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_SegmentSum.py b/tests/layer_tests/tensorflow_tests/test_tf_SegmentSum.py
index 5d74c361f51c20..f0f99d4b9cf95f 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_SegmentSum.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_SegmentSum.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -41,6 +43,8 @@ def create_segment_sum_net(self, data_shape, segment_ids_shape, data_type, segme
     @pytest.mark.parametrize("params", test_data_basic)
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_segment_sum_basic(self, params, ie_device, precision, ir_version, temp_dir,
                                use_new_frontend, use_old_api):
         if not use_new_frontend:
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Softmax.py b/tests/layer_tests/tensorflow_tests/test_tf_Softmax.py
index fc9391feaae3e8..574fe3d32949f7 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_Softmax.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_Softmax.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -34,6 +36,8 @@ def create_softmax_net(self, input_shape):
     @pytest.mark.parametrize("params", test_data_basic)
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_softmax_basic(self, params, ie_device, precision, ir_version, temp_dir,
                            use_new_frontend, use_old_api):
         self._test(*self.create_softmax_net(**params),
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py b/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py
index b0f24322b01041..03e83dc39e9c8d 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import pytest
 
 from common.tf_layer_test_class import CommonTFLayerTest
@@ -33,6 +35,8 @@ def create_space_to_batch_net(self, in_shape, pads_value, block_shape_value):
     @pytest.mark.parametrize("params", test_data_basic)
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_space_to_batch_basic(self, params, ie_device, precision, ir_version, temp_dir,
                                   use_new_frontend, use_old_api):
         self._test(*self.create_space_to_batch_net(**params),
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_TensorArrayOps.py b/tests/layer_tests/tensorflow_tests/test_tf_TensorArrayOps.py
new file mode 100644
index 00000000000000..098f099f74d24d
--- /dev/null
+++ b/tests/layer_tests/tensorflow_tests/test_tf_TensorArrayOps.py
@@ -0,0 +1,200 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import numpy as np
+import pytest
+import tensorflow as tf
+from common.tf_layer_test_class import CommonTFLayerTest
+
+
+def create_tensor_array(data_shape, data_type):
+    size = data_shape[0]
+    data = tf.compat.v1.placeholder(data_type, data_shape, 'data')
+    indices = tf.compat.v1.placeholder(tf.int32, [size], 'indices')
+    size_const = tf.constant(size, dtype=tf.int32, shape=[])
+    handle, flow = tf.raw_ops.TensorArrayV3(size=size_const, dtype=tf.as_dtype(data_type))
+    flow = tf.raw_ops.TensorArrayScatterV3(handle=handle, indices=indices, value=data, flow_in=flow)
+    return handle, flow
+
+
+class TestTensorArraySizeV3(CommonTFLayerTest):
+    def _prepare_input(self, inputs_info):
+        assert 'data' in inputs_info
+        assert 'indices' in inputs_info
+        data_shape = inputs_info['data']
+        inputs_data = {}
+        rng = np.random.default_rng()
+        inputs_data['data'] = rng.integers(-10, 10, data_shape).astype(self.data_type)
+        inputs_data['indices'] = rng.permutation(self.size).astype(np.int32)
+        return inputs_data
+
+    def create_tensor_array_size_v3(self, data_shape, data_type):
+        size = data_shape[0]
+        self.data_type = data_type
+        self.size = size
+        tf.compat.v1.reset_default_graph()
+        # Create the graph and model
+        with tf.compat.v1.Session() as sess:
+            handle, flow = create_tensor_array(data_shape, data_type)
+            tf.raw_ops.TensorArraySizeV3(handle=handle, flow_in=flow)
+            tf.raw_ops.TensorArrayCloseV3(handle=handle)
+            tf.compat.v1.global_variables_initializer()
+            tf_net = sess.graph_def
+
+        return tf_net, None
+
+    test_data_basic = [
+        dict(data_shape=[5], data_type=np.float32),
+        dict(data_shape=[10, 20, 30], data_type=np.int32),
+    ]
+
+    @pytest.mark.parametrize("params", test_data_basic)
+    @pytest.mark.precommit_tf_fe
+    @pytest.mark.nightly
+    def test_tensor_array_size_v3(self, params, ie_device, precision, ir_version, temp_dir,
+                                  use_new_frontend, use_old_api):
+        self._test(*self.create_tensor_array_size_v3(**params),
+                   ie_device, precision, ir_version, temp_dir=temp_dir,
+                   use_new_frontend=use_new_frontend, use_old_api=use_old_api)
+
+
+class TestTensorArrayReadV3(CommonTFLayerTest):
+    def _prepare_input(self, inputs_info):
+        assert 'data' in inputs_info
+        assert 'indices' in inputs_info
+        data_shape = inputs_info['data']
+        inputs_data = {}
+        rng = np.random.default_rng()
+        inputs_data['data'] = rng.integers(-10, 10, data_shape).astype(self.data_type)
+        inputs_data['index_to_read'] = rng.integers(0, data_shape[0], []).astype(np.int32)
+        inputs_data['indices'] = rng.permutation(self.size).astype(np.int32)
+        return inputs_data
+
+    def create_tensor_array_read_v3(self, data_shape, data_type):
+        size = data_shape[0]
+        self.data_type = data_type
+        self.size = size
+        tf.compat.v1.reset_default_graph()
+        # Create the graph and model
+        with tf.compat.v1.Session() as sess:
+            handle, flow = create_tensor_array(data_shape, data_type)
+            index_to_read = tf.compat.v1.placeholder(tf.int32, [], 'index_to_read')
+            tf.raw_ops.TensorArrayReadV3(handle=handle, index=index_to_read, flow_in=flow,
+                                         dtype=tf.dtypes.as_dtype(data_type))
+            tf.raw_ops.TensorArrayCloseV3(handle=handle)
+            tf.compat.v1.global_variables_initializer()
+            tf_net = sess.graph_def
+
+        return tf_net, None
+
+    test_data_basic = [
+        dict(data_shape=[6], data_type=np.float32),
+        dict(data_shape=[8, 5, 6, 10], data_type=np.int32),
+    ]
+
+    @pytest.mark.parametrize("params", test_data_basic)
+    @pytest.mark.precommit_tf_fe
+    @pytest.mark.nightly
+    def test_tensor_array_read_v3(self, params, ie_device, precision, ir_version, temp_dir,
+                                  use_new_frontend, use_old_api):
+        self._test(*self.create_tensor_array_read_v3(**params),
+                   ie_device, precision, ir_version, temp_dir=temp_dir,
+                   use_new_frontend=use_new_frontend, use_old_api=use_old_api)
+
+
+class TestTensorArrayWriteGatherV3(CommonTFLayerTest):
+    def _prepare_input(self, inputs_info):
+        assert 'data' in inputs_info
+        assert 'indices' in inputs_info
+        assert 'value_to_write' in inputs_info
+        data_shape = inputs_info['data']
+        value_shape = inputs_info['value_to_write']
+        inputs_data = {}
+        rng = np.random.default_rng()
+        inputs_data['data'] = rng.integers(-10, 10, data_shape).astype(self.data_type)
+        inputs_data['value_to_write'] = rng.integers(-10, 10, value_shape).astype(self.data_type)
+        indices_data = rng.permutation(self.size).astype(np.int32)
+        inputs_data['indices'] = np.delete(indices_data, np.where(indices_data == self.index_to_write))
+        return inputs_data
+
+    def create_tensor_array_write_v3(self, size, data_shape, data_type, index_to_write, indices_to_gather):
+        self.data_type = data_type
+        self.size = size
+        self.index_to_write = index_to_write
+        tf.compat.v1.reset_default_graph()
+        # Create the graph and model
+        with tf.compat.v1.Session() as sess:
+            value_to_write = tf.compat.v1.placeholder(data_type, data_shape[1:], 'value_to_write')
+            index_to_write_const = tf.constant(index_to_write, dtype=tf.int32, shape=[])
+            indices_to_gather_const = tf.constant(indices_to_gather, dtype=tf.int32, shape=[len(indices_to_gather)])
+            data = tf.compat.v1.placeholder(data_type, data_shape, 'data')
+            indices = tf.compat.v1.placeholder(tf.int32, [size - 1], 'indices')
+            size_const = tf.constant(size, dtype=tf.int32, shape=[])
+            handle, flow = tf.raw_ops.TensorArrayV3(size=size_const, dtype=tf.as_dtype(data_type))
+            flow = tf.raw_ops.TensorArrayScatterV3(handle=handle, indices=indices, value=data, flow_in=flow)
+            flow = tf.raw_ops.TensorArrayWriteV3(handle=handle, index=index_to_write_const,
+                                                 value=value_to_write, flow_in=flow)
+            tf.raw_ops.TensorArrayGatherV3(handle=handle, indices=indices_to_gather_const, flow_in=flow,
+                                           dtype=tf.dtypes.as_dtype(data_type))
+            tf.raw_ops.TensorArrayCloseV3(handle=handle)
+            tf.compat.v1.global_variables_initializer()
+            tf_net = sess.graph_def
+
+        return tf_net, None
+
+    test_data_basic = [
+        dict(size=7, data_shape=[6], data_type=np.float32, index_to_write=3, indices_to_gather=[0, 3, 1]),
+        dict(size=10, data_shape=[9, 2, 4], data_type=np.int32, index_to_write=2, indices_to_gather=[2, 1, 4, 3]),
+    ]
+
+    @pytest.mark.parametrize("params", test_data_basic)
+    @pytest.mark.precommit_tf_fe
+    @pytest.mark.nightly
+    def test_tensor_array_write_v3(self, params, ie_device, precision, ir_version, temp_dir,
+                                   use_new_frontend, use_old_api):
+        self._test(*self.create_tensor_array_write_v3(**params),
+                   ie_device, precision, ir_version, temp_dir=temp_dir,
+                   use_new_frontend=use_new_frontend, use_old_api=use_old_api)
+
+
+class TestTensorArrayConcatV3(CommonTFLayerTest):
+    def _prepare_input(self, inputs_info):
+        assert 'data' in inputs_info
+        assert 'indices' in inputs_info
+        data_shape = inputs_info['data']
+        inputs_data = {}
+        rng = np.random.default_rng()
+        inputs_data['data'] = rng.integers(-10, 10, data_shape).astype(self.data_type)
+        inputs_data['indices'] = rng.permutation(self.size).astype(np.int32)
+        return inputs_data
+
+    def create_tensor_array_concat_v3(self, data_shape, data_type):
+        size = data_shape[0]
+        self.data_type = data_type
+        self.size = size
+        tf.compat.v1.reset_default_graph()
+        # Create the graph and model
+        with tf.compat.v1.Session() as sess:
+            handle, flow = create_tensor_array(data_shape, data_type)
+            tensor_array_concat_v3 = tf.raw_ops.TensorArrayConcatV3(handle=handle, flow_in=flow,
+                                                                    dtype=tf.as_dtype(data_type))
+            tf.identity(tensor_array_concat_v3[0], name='values')
+            tf.identity(tensor_array_concat_v3[1], name='length')
+            tf.raw_ops.TensorArrayCloseV3(handle=handle)
+            tf.compat.v1.global_variables_initializer()
+            tf_net = sess.graph_def
+
+        return tf_net, None
+
+    test_data_basic = [
+        dict(data_shape=[5, 3, 11, 2], data_type=np.int32),
+    ]
+
+    @pytest.mark.parametrize("params", test_data_basic)
+    @pytest.mark.precommit_tf_fe
+    @pytest.mark.nightly
+    def test_tensor_array_concat_v3(self, params, ie_device, precision, ir_version, temp_dir,
+                                    use_new_frontend, use_old_api):
+        self._test(*self.create_tensor_array_concat_v3(**params),
+                   ie_device, precision, ir_version, temp_dir=temp_dir,
+                   use_new_frontend=use_new_frontend, use_old_api=use_old_api)
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_ToBool.py b/tests/layer_tests/tensorflow_tests/test_tf_ToBool.py
new file mode 100644
index 00000000000000..74da79c36d52a1
--- /dev/null
+++ b/tests/layer_tests/tensorflow_tests/test_tf_ToBool.py
@@ -0,0 +1,43 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import numpy as np
+import pytest
+import tensorflow as tf
+from common.tf_layer_test_class import CommonTFLayerTest
+
+
+class TestToBool(CommonTFLayerTest):
+    def _prepare_input(self, inputs_info):
+        assert 'x' in inputs_info
+        x_shape = inputs_info['x']
+        inputs_data = {}
+        inputs_data['x'] = np.random.randint(-10, 10, x_shape).astype(np.float32)
+        
+        return inputs_data
+
+    def create_tobool_net(self, input_shape, input_type):
+        self.input_type = input_type
+        tf.compat.v1.reset_default_graph()
+        # Create the graph and model
+        with tf.compat.v1.Session() as sess:
+            x = tf.compat.v1.placeholder(input_type, input_shape, 'x')
+            tf.raw_ops.ToBool(input=x)
+            tf.compat.v1.global_variables_initializer()
+            tf_net = sess.graph_def
+
+        return tf_net, None
+
+    test_data_basic = [
+        dict(input_shape=[10, 20], input_type=np.float32),
+        dict(input_shape=[2, 3, 4], input_type=np.float32),
+    ]
+
+    @pytest.mark.parametrize("params", test_data_basic)
+    @pytest.mark.precommit_tf_fe
+    @pytest.mark.nightly
+    def test_to_bool_basic(self, params, ie_device, precision, ir_version, temp_dir,
+                                         use_new_frontend, use_old_api):
+        self._test(*self.create_tobool_net(**params),
+                   ie_device, precision, ir_version, temp_dir=temp_dir,
+                   use_new_frontend=use_new_frontend, use_old_api=use_old_api)
\ No newline at end of file
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py b/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py
index ece6f08471a643..73efaf490b23dd 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -46,6 +48,8 @@ def create_topk_v2_net(self, input_shape, input_type, k, sorted, is_first_output
     @pytest.mark.parametrize("params", test_basic)
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_topk_v2_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend,
                            use_old_api):
         self._test(*self.create_topk_v2_net(**params),
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_TruncateDiv.py b/tests/layer_tests/tensorflow_tests/test_tf_TruncateDiv.py
new file mode 100644
index 00000000000000..18440dbcd7f44a
--- /dev/null
+++ b/tests/layer_tests/tensorflow_tests/test_tf_TruncateDiv.py
@@ -0,0 +1,53 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import platform
+
+import numpy as np
+import pytest
+import tensorflow as tf
+from common.tf_layer_test_class import CommonTFLayerTest
+
+
+class TestTruncateDiv(CommonTFLayerTest):
+    def _prepare_input(self, inputs_info):
+        assert 'x' in inputs_info
+        assert 'y' in inputs_info
+        x_shape = inputs_info['x']
+        y_shape = inputs_info['y']
+        inputs_data = {}
+        # generate x and y to ensure truncation
+        inputs_data['x'] = np.random.randint(-10, 10, x_shape).astype(self.input_type)
+        inputs_data['y'] = np.random.randint(1, 10, y_shape).astype(self.input_type)
+        return inputs_data
+
+    def create_truncate_div_net(self, input_shape, input_type):
+        self.input_type = input_type
+        tf.compat.v1.reset_default_graph()
+        # Create the graph and model
+        with tf.compat.v1.Session() as sess:
+            x = tf.compat.v1.placeholder(input_type, input_shape, 'x')
+            y = tf.compat.v1.placeholder(input_type, input_shape, 'y')
+            tf.raw_ops.TruncateDiv(x=x, y=y)
+            tf.compat.v1.global_variables_initializer()
+            tf_net = sess.graph_def
+
+        return tf_net, None
+
+    test_data_basic = [
+        dict(input_shape=[10, 20], input_type=np.float32),
+        dict(input_shape=[8, 5], input_type=np.float32),
+        dict(input_shape=[5, 3], input_type=np.int32),
+        dict(input_shape=[6, 4], input_type=np.int32),
+    ]
+
+    @pytest.mark.parametrize("params", test_data_basic)
+    @pytest.mark.precommit_tf_fe
+    @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
+    def test_truncate_div_basic(self, params, ie_device, precision, ir_version, temp_dir,
+                               use_new_frontend, use_old_api):
+        self._test(*self.create_truncate_div_net(**params),
+                   ie_device, precision, ir_version, temp_dir=temp_dir,
+                   use_new_frontend=use_new_frontend, use_old_api=use_old_api)
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_TruncateMod.py b/tests/layer_tests/tensorflow_tests/test_tf_TruncateMod.py
new file mode 100644
index 00000000000000..48b738095c8bb0
--- /dev/null
+++ b/tests/layer_tests/tensorflow_tests/test_tf_TruncateMod.py
@@ -0,0 +1,49 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import numpy as np
+import pytest
+import tensorflow as tf
+from common.tf_layer_test_class import CommonTFLayerTest
+
+
+class TestTruncateMod(CommonTFLayerTest):
+    def _prepare_input(self, inputs_info):
+        assert 'x' in inputs_info
+        assert 'y' in inputs_info
+        x_shape = inputs_info['x']
+        y_shape = inputs_info['y']
+        inputs_data = {}
+        # generate x and y to ensure truncation
+        inputs_data['x'] = np.random.randint(-10, 10, x_shape).astype(self.input_type)
+        inputs_data['y'] = np.random.randint(1, 10, y_shape).astype(self.input_type)
+        return inputs_data
+
+    def create_truncate_mod_net(self, input_shape, input_type):
+        self.input_type = input_type
+        tf.compat.v1.reset_default_graph()
+        # Create the graph and model
+        with tf.compat.v1.Session() as sess:
+            x = tf.compat.v1.placeholder(input_type, input_shape, 'x')
+            y = tf.compat.v1.placeholder(input_type, input_shape, 'y')
+            tf.raw_ops.TruncateMod(x=x, y=y)
+            tf.compat.v1.global_variables_initializer()
+            tf_net = sess.graph_def
+
+        return tf_net, None
+
+    test_data_basic = [
+        dict(input_shape=[10, 20], input_type=np.float32),
+        dict(input_shape=[8, 5], input_type=np.float32),
+        dict(input_shape=[5, 3], input_type=np.int32),
+        dict(input_shape=[6, 4], input_type=np.int32),
+    ]
+
+    @pytest.mark.parametrize("params", test_data_basic)
+    @pytest.mark.precommit_tf_fe
+    @pytest.mark.nightly
+    def test_truncate_mod_basic(self, params, ie_device, precision, ir_version, temp_dir,
+                               use_new_frontend, use_old_api):
+        self._test(*self.create_truncate_mod_net(**params),
+                   ie_device, precision, ir_version, temp_dir=temp_dir,
+                   use_new_frontend=use_new_frontend, use_old_api=use_old_api)
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_UnsortedSegmentSum.py b/tests/layer_tests/tensorflow_tests/test_tf_UnsortedSegmentSum.py
index 09afd6f26330ca..f7dcf2eeb324f2 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_UnsortedSegmentSum.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_UnsortedSegmentSum.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -55,6 +57,8 @@ def create_unsorted_segment_sum_net(self, data_shape, segment_ids_shape, num_seg
     ])
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_unsorted_segment_sum_basic(self, params, data_type, segment_ids_type, num_segments_type, ie_device,
                                         precision, ir_version, temp_dir,
                                         use_new_frontend, use_old_api):
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Xlog1py.py b/tests/layer_tests/tensorflow_tests/test_tf_Xlog1py.py
index 7c80fbdad88b09..4da47e7b5356c4 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_Xlog1py.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_Xlog1py.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -42,6 +44,8 @@ def create_xlog1py_net(self, input_shape, input_type):
     @pytest.mark.parametrize("params", test_data_basic)
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_xlog1py_basic(self, params, ie_device, precision, ir_version, temp_dir,
                          use_new_frontend, use_old_api):
         self._test(*self.create_xlog1py_net(**params),
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Xlogy.py b/tests/layer_tests/tensorflow_tests/test_tf_Xlogy.py
index 6ecddeb439aed3..911c3b0eea2154 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_Xlogy.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_Xlogy.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -42,6 +44,8 @@ def create_xlogy_net(self, input_shape, input_type):
     @pytest.mark.parametrize("params", test_data_basic)
     @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
+    @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64',
+                       reason='Ticket - 122716')
     def test_xlogy_basic(self, params, ie_device, precision, ir_version, temp_dir,
                          use_new_frontend, use_old_api):
         self._test(*self.create_xlogy_net(**params),
diff --git a/tests/model_hub_tests/torch_tests/hf_transformers_models b/tests/model_hub_tests/torch_tests/hf_transformers_models
index 0618d98a4d9f31..31a24b681eb4c5 100644
--- a/tests/model_hub_tests/torch_tests/hf_transformers_models
+++ b/tests/model_hub_tests/torch_tests/hf_transformers_models
@@ -10,7 +10,6 @@ albert-base-v2,albert
 AlekseyKorshuk/test_reward_model,reward_model,skip,Load problem
 alibaba-damo/mgp-str-base,mgp-str,xfail,Compile error: unsupported Einsum
 allenai/hvila-block-layoutlm-finetuned-docbank,hierarchical_model,skip,Load problem
-allenai/longformer-base-4096,longformer,xfail,Unsupported op aten::as_strided
 ameya772/sentence-t5-base-atis-fine-tuned,T5,skip,Load problem
 andreasmadsen/efficient_mlm_m0.40,roberta-prelayernorm
 anton-l/emformer-base-librispeech,emformer,skip,Load problem
@@ -68,7 +67,7 @@ facebook/detr-resnet-50,detr
 facebook/dinov2-base,dinov2,skip,Load problem
 facebook/dpr-question_encoder-single-nq-base,dpr
 facebook/encodec_24khz,encodec,xfail,Unsupported op aten::lstm
-facebook/esm2_t6_8M_UR50D,esm,xfail,Tracing error: The values for attribute 'shape' do not match
+facebook/esm2_t6_8M_UR50D,esm
 facebook/flava-full,flava,xfail,Tracing problem
 facebook/flava-image-codebook,flava_image_codebook,skip,Load problem
 facebook/m2m100_418M,m2m_100
@@ -123,10 +122,10 @@ hf-internal-testing/tiny-random-Data2VecAudioModel,data2vec-audio,skip,Load prob
 hf-internal-testing/tiny-random-Data2VecTextModel,data2vec-text
 hf-internal-testing/tiny-random-Data2VecVisionModel,data2vec-vision
 hf-internal-testing/tiny-random-DeiTModel,deit
-hf-internal-testing/tiny-random-DonutSwinModel,donut-swin,xfail,Unsupported op aten::adaptive_avg_pool1d
+hf-internal-testing/tiny-random-DonutSwinModel,donut-swin
 hf-internal-testing/tiny-random-EfficientFormerForImageClassification,efficientformer
 hf-internal-testing/tiny-random-flaubert,flaubert
-hf-internal-testing/tiny-random-FocalNetModel,focalnet,xfail,Unsupported op aten::adaptive_avg_pool1d
+hf-internal-testing/tiny-random-FocalNetModel,focalnet
 hf-internal-testing/tiny-random-GPTBigCodeForCausalLM,gpt_bigcode,xfail,Conversion is failed for: aten::mul
 hf-internal-testing/tiny-random-GPTJModel,gptj
 hf-internal-testing/tiny-random-groupvit,groupvit
@@ -155,7 +154,7 @@ hf-internal-testing/tiny-random-Speech2TextModel,speech_to_text,skip,Load proble
 hf-internal-testing/tiny-random-speech-encoder-decoder,speech-encoder-decoder,skip,Load problem
 hf-internal-testing/tiny-random-SplinterModel,splinter
 hf-internal-testing/tiny-random-SqueezeBertModel,squeezebert
-hf-internal-testing/tiny-random-SwinModel,swin,xfail,Unsupported op aten::adaptive_avg_pool1d
+hf-internal-testing/tiny-random-SwinModel,swin
 hf-internal-testing/tiny-random-unispeech,unispeech,skip,Load problem
 hf-internal-testing/tiny-random-UniSpeechSatModel,unispeech-sat,skip,Load problem
 hf-internal-testing/tiny-random-vision_perceiver_conv,perceiver
@@ -247,7 +246,7 @@ microsoft/markuplm-base,markuplm
 microsoft/resnet-50,resnet
 microsoft/speecht5_hifigan,hifigan,skip,Load problem
 microsoft/speecht5_tts,speecht5,skip,Load problem
-microsoft/swinv2-tiny-patch4-window8-256,swinv2,xfail,Unsupported op aten::adaptive_avg_pool1d
+microsoft/swinv2-tiny-patch4-window8-256,swinv2
 microsoft/table-transformer-detection,table-transformer
 microsoft/wavlm-large,wavlm,skip,Load problem
 microsoft/xclip-base-patch32,xclip,skip,Load problem
@@ -301,7 +300,6 @@ pie/example-re-textclf-tacred,TransformerTextClassificationModel,skip,Load probl
 pleisto/yuren-baichuan-7b,multimodal_llama,skip,Load problem
 predictia/europe_reanalysis_downscaler_convbaseline,convbilinear,skip,Load problem
 predictia/europe_reanalysis_downscaler_convswin2sr,conv_swin2sr,skip,Load problem
-pszemraj/led-large-book-summary,led,xfail,Unsupported op aten::as_strided
 qmeeus/whisper-small-ner-combined,whisper_for_slu,skip,Load problem
 raman-ai/pcqv2-tokengt-lap16,tokengt,skip,Load problem
 range3/pegasus-gpt2-medium,pegasusgpt2,skip,Load problem
@@ -330,8 +328,8 @@ sheonhan/ict-imagenet-256,ict,skip,Load problem
 shibing624/text2vec-base-chinese-paraphrase,ernie
 shikhartuli/flexibert-mini,flexibert,skip,Load problem
 shikras/shikra-7b-delta-v1-0708,shikra,skip,Load problem
-shi-labs/dinat-mini-in1k-224,dinat,xfail,Unsupported op aten::adaptive_avg_pool1d
-shi-labs/nat-mini-in1k-224,nat,xfail,Unsupported op aten::adaptive_avg_pool1d
+shi-labs/dinat-mini-in1k-224,dinat,xfail,Accuracy validation failed
+shi-labs/nat-mini-in1k-224,nat,xfail,Accuracy validation failed
 shi-labs/oneformer_ade20k_swin_large,oneformer,skip,Load problem
 shuqi/seed-encoder,seed_encoder,skip,Load problem
 sijunhe/nezha-cn-base,nezha
diff --git a/tests/model_hub_tests/torch_tests/test_hf_transformers.py b/tests/model_hub_tests/torch_tests/test_hf_transformers.py
index 184e725a04f9b9..caeb2e0ff2a01d 100644
--- a/tests/model_hub_tests/torch_tests/test_hf_transformers.py
+++ b/tests/model_hub_tests/torch_tests/test_hf_transformers.py
@@ -292,7 +292,8 @@ def teardown_method(self):
         cleanup_dir(hf_hub_cache_dir)
         super().teardown_method()
 
-    @pytest.mark.parametrize("name,type", [("bert-base-uncased", "bert"),
+    @pytest.mark.parametrize("name,type", [("allenai/led-base-16384", "led"),
+                                           ("bert-base-uncased", "bert"),
                                            ("facebook/bart-large-mnli", "bart"),
                                            ("google/flan-t5-base", "t5"),
                                            ("google/tapas-large-finetuned-wtq", "tapas"),
diff --git a/thirdparty/dependencies.cmake b/thirdparty/dependencies.cmake
index fac4752c318250..4eed13c9a79af6 100644
--- a/thirdparty/dependencies.cmake
+++ b/thirdparty/dependencies.cmake
@@ -414,14 +414,14 @@ if(ENABLE_OV_PADDLE_FRONTEND OR ENABLE_OV_ONNX_FRONTEND OR ENABLE_OV_TF_FRONTEND
         if(CMAKE_VERBOSE_MAKEFILE)
             set(Protobuf_DEBUG ON)
         endif()
-        if(OV_VCPKG_BUILD)
-            set(protobuf_config CONFIG)
-        endif()
         # try to find newer version first (major is changed)
         # see https://protobuf.dev/support/version-support/ and
         # https://github.com/protocolbuffers/protobuf/commit/d61f75ff6db36b4f9c0765f131f8edc2f86310fa
-        find_package(Protobuf 4.22.0 QUIET ${protobuf_config})
+        find_package(Protobuf 4.22.0 QUIET CONFIG)
         if(NOT Protobuf_FOUND)
+            if(OV_VCPKG_BUILD)
+                set(protobuf_config CONFIG)
+            endif()
             # otherwise, fallback to existing default
             find_package(Protobuf 3.20.3 REQUIRED ${protobuf_config})
         endif()
diff --git a/thirdparty/fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp b/thirdparty/fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp
index 5c4c6f7031e6d8..5a6f237b27cda1 100644
--- a/thirdparty/fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp
+++ b/thirdparty/fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp
@@ -1214,7 +1214,7 @@ or column if there are N channels, or have N columns if there is a single channe
 @param src Input set of 2D points stored in one of possible containers: Mat,
 std::vector<cv::Point2i>, std::vector<cv::Point2f>, std::vector<cv::Point2d>.
 @param distType Distance used by the M-estimator, see #DistanceTypes. @ref DIST_USER
-and @ref DIST_C are not suppored.
+and @ref DIST_C are not supported.
 @param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value
 is chosen.
 @param reps Sufficient accuracy for the radius (distance between the coordinate origin and the
@@ -1286,7 +1286,7 @@ or column if there are N channels, or have N columns if there is a single channe
 @param src Input set of 3D points stored in one of possible containers: Mat,
 std::vector<cv::Point3i>, std::vector<cv::Point3f>, std::vector<cv::Point3d>.
 @param distType Distance used by the M-estimator, see #DistanceTypes. @ref DIST_USER
-and @ref DIST_C are not suppored.
+and @ref DIST_C are not supported.
 @param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value
 is chosen.
 @param reps Sufficient accuracy for the radius (distance between the coordinate origin and the
diff --git a/thirdparty/open_model_zoo b/thirdparty/open_model_zoo
index e0e434f64a4da0..bb98fe444c84d6 160000
--- a/thirdparty/open_model_zoo
+++ b/thirdparty/open_model_zoo
@@ -1 +1 @@
-Subproject commit e0e434f64a4da07274c31c1aae48fbdcfa087fb0
+Subproject commit bb98fe444c84d67fd67ee7ec15a340722c652053
diff --git a/tools/constraints.txt b/tools/constraints.txt
index 18a3080d3a1e78..2e1588a005e03f 100644
--- a/tools/constraints.txt
+++ b/tools/constraints.txt
@@ -18,4 +18,4 @@ pyenchant>=3.0.0
 test-generator==0.1.1
 py>=1.9.0
 urllib3>=1.26.4
-openvino-telemetry>=2023.1.0
+openvino-telemetry>=2023.2.1
diff --git a/tools/mo/openvino/tools/mo/convert_impl.py b/tools/mo/openvino/tools/mo/convert_impl.py
index ae6c39a144b0a3..9d683f4b6ac977 100644
--- a/tools/mo/openvino/tools/mo/convert_impl.py
+++ b/tools/mo/openvino/tools/mo/convert_impl.py
@@ -312,8 +312,8 @@ def update_fallback_with_conversion_error(use_new_frontend: bool, is_tf: bool, e
     conversion_error_re = r"^(\[TensorFlow\ Frontend\]\ Internal\ error\,\ no\ translator\ found\ for\ operation\(s\)\:\ )((\w+)(\,\ \w+)*)$"
     conversion_error_match = re.findall(conversion_error_re, ex_msg, re.MULTILINE)
     all_fallback_operations = [
-        # corresponds to TF1 TensorList operation
-        "TensorArrayScatterV3", "TensorArrayV3", "TensorArraySizeV3", "TensorArrayGatherV3",
+        # corresponds to TF1 While operation
+        "LoopCond", "Enter", "NextIteration", "Exit", "Switch", "Merge",
         # corresponds to operations with complex tensors
         "FFT", "FFT2D", "FFT3D", "IFFT", "IFFT2D", "IFFT3D",
         "RFFT", "RFFT2D", "RFFT3D", "IRFFT", "IRFFT2D", "IRFFT3D",
diff --git a/tools/mo/openvino/tools/mo/ops/Cast.py b/tools/mo/openvino/tools/mo/ops/Cast.py
index 77beb07c74122e..24409912429f07 100644
--- a/tools/mo/openvino/tools/mo/ops/Cast.py
+++ b/tools/mo/openvino/tools/mo/ops/Cast.py
@@ -36,12 +36,14 @@ def backend_attrs(self):
 
     @staticmethod
     def type_infer(node: Node):
-        assert node.has_valid('dst_type'), 'Destination type of "Cast" operation should be extracted earlier'
+        assert node.has_valid(
+            'dst_type'), 'Destination type of "Cast" operation should be extracted earlier'
         node.out_port(0).set_data_type(node.dst_type)
 
     @staticmethod
     def helper_value_propagation(node_name, value, dst_type):
-        new_blob, finite_match_count, zero_match_count = convert_blob(value, dst_type)
+        new_blob, finite_match_count, zero_match_count = convert_blob(
+            value, dst_type)
 
         if finite_match_count:
             log.error("{} elements of {} were clipped to infinity while converting an input blob for node '{}' to {}."
@@ -63,6 +65,10 @@ def custom_type_casting_and_packing(node: Node, value, dst_type):
             we would pad them to 6 element with the last element as zero and we would pack them into 3 uint8 values
         """
         assert dst_type in [packed_U4, packed_I4]
+        # TODO: Remove this comment when it's clear that we can fix it easily
+        # raise Exception("Packing of u4/i4 data is no longer supported in mo because it is now incompatible with the new "
+        #                 "order of the halfs of a byte that was introduced in OpenVINO runtime recently. Use ovc "
+        #                 "command line tool or openvino.convert_model python function instead.")
 
         minimum_regular_dtype = np.uint8 if dst_type == packed_U4 else np.int8
         # initial casing from the source type to the numpy-friendly type which could absorb all the values of dst_type
@@ -83,10 +89,12 @@ def custom_type_casting_and_packing(node: Node, value, dst_type):
         padded = np.concatenate((flattened, np.zeros([pad], dtype=minimum_regular_dtype)))
         assert np.prod(padded.shape) % num_values_fitting_into_uint8 == 0
 
-        bit_order_little = (padded[:, None] & (1 << np.arange(num_bits)) > 0).astype(np.uint8)
-        bit_order_big = np.flip(bit_order_little, axis=1)
-        bit_order_big_flattened = bit_order_big.flatten()
-        packed = np.packbits(bit_order_big_flattened)
+        bit_order_little = (padded[:, None] & (
+            1 << np.arange(num_bits)) > 0).astype(np.uint8)
+        bit_order_big_flattened = bit_order_little.flatten()
+        # u1 still has reversed bit order:
+        packed = np.packbits(bit_order_big_flattened,
+                             bitorder='little' if num_bits > 1 else 'big')
 
         node.out_node(0)['force_shape'] = data_shape.copy()
         node.out_node(0)['force_type'] = np_data_type_to_precision(dst_type)
diff --git a/tools/mo/openvino/tools/mo/ops/multinomial.py b/tools/mo/openvino/tools/mo/ops/multinomial.py
index 42f4b0d3eedbb9..233a9d4565f4c1 100644
--- a/tools/mo/openvino/tools/mo/ops/multinomial.py
+++ b/tools/mo/openvino/tools/mo/ops/multinomial.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from openvino.tools.mo.front.common.partial_infer.utils import dynamic_dimension
+from openvino.tools.mo.front.common.partial_infer.utils import dynamic_dimension_value, shape_array
 from openvino.tools.mo.front.extractor import bool_to_str
 from openvino.tools.mo.graph.graph import Graph, Node
 
@@ -63,7 +63,7 @@ def infer(node: Node):
 
         num_samples = node.in_port(1).data.get_value()
         if num_samples is not None:
-            output_shape.append(num_samples)
+            output_shape.append(np.array(num_samples).item())
         else:
-            output_shape.append(dynamic_dimension)
-        node.out_port(0).data.set_shape(output_shape)
+            output_shape.append(dynamic_dimension_value)
+        node.out_port(0).data.set_shape(shape_array(output_shape))
diff --git a/tools/mo/openvino/tools/mo/utils/telemetry_utils.py b/tools/mo/openvino/tools/mo/utils/telemetry_utils.py
index 802986edf4c4c0..e2cdd0b53f61d0 100644
--- a/tools/mo/openvino/tools/mo/utils/telemetry_utils.py
+++ b/tools/mo/openvino/tools/mo/utils/telemetry_utils.py
@@ -22,8 +22,13 @@
 
 
 def init_mo_telemetry(app_name='Model Optimizer'):
-    return tm.Telemetry(tid=get_tid(), app_name=app_name, app_version=get_rt_version(), backend='ga4')
-
+    return tm.Telemetry(tid=get_tid(),
+                        app_name=app_name,
+                        app_version=get_rt_version(),
+                        backend='ga4',
+                        enable_opt_in_dialog=False,
+                        disable_in_ci=True
+                        )
 
 def send_framework_info(framework: str):
     """
diff --git a/tools/mo/unit_tests/mo/ops/cast_test.py b/tools/mo/unit_tests/mo/ops/cast_test.py
index 985a7276514235..73a468e9fb80fa 100644
--- a/tools/mo/unit_tests/mo/ops/cast_test.py
+++ b/tools/mo/unit_tests/mo/ops/cast_test.py
@@ -21,81 +21,20 @@
 class TestCastTest():
     """
     Example of checking:
-        7 == 0111,           padded to 0111 0000, results in 112
-        7 == 0111, 8 == 1000 packed to 0111 1000, results in 120
+        7 == 0111,           padded to 00000111, results in 7
+        7 == 0111, 8 == 1000 packed to 10000111, results in 7+16
 
-        -8 == 1000,          padded to 1000 0000, results in 128
+        -8 == 1000,          padded to 00001000, results in 8
     """
 
-    @pytest.mark.parametrize("value, expected, custom_dtype",[
-        ([0], [0], packed_U4),
-        ([1], [16], packed_U4),
-        ([2], [32], packed_U4),
-        ([3], [48], packed_U4),
-        ([4], [64], packed_U4),
-        ([5], [80], packed_U4),
-        ([6], [96], packed_U4),
-        ([7], [112], packed_U4),
-        ([8], [128], packed_U4),
-        ([9], [144], packed_U4),
-        ([10], [160], packed_U4),
-        ([11], [176], packed_U4),
-        ([12], [192], packed_U4),
-        ([13], [208], packed_U4),
-        ([14], [224], packed_U4),
-        ([15], [240], packed_U4),
-
-        ([0, 15], [15], packed_U4),
-        ([1, 14], [30], packed_U4),
-        ([2, 13], [45], packed_U4),
-        ([3, 12], [60], packed_U4),
-        ([4, 11], [75], packed_U4),
-        ([5, 10], [90], packed_U4),
-        ([6, 9], [105], packed_U4),
-        ([7, 8], [120], packed_U4),
-        ([8, 7], [135], packed_U4),
-        ([9, 6], [150], packed_U4),
-        ([10, 5], [165], packed_U4),
-        ([11, 4], [180], packed_U4),
-        ([12, 3], [195], packed_U4),
-        ([13, 2], [210], packed_U4),
-        ([14, 1], [225], packed_U4),
-        ([15, 0], [240], packed_U4),
-
-        ([-8], [128], packed_I4),
-        ([-7], [144], packed_I4),
-        ([-6], [160], packed_I4),
-        ([-5], [176], packed_I4),
-        ([-4], [192], packed_I4),
-        ([-3], [208], packed_I4),
-        ([-2], [224], packed_I4),
-        ([-1], [240], packed_I4),
-        ([0], [0], packed_I4),
-        ([1], [16], packed_I4),
-        ([2], [32], packed_I4),
-        ([3], [48], packed_I4),
-        ([4], [64], packed_I4),
-        ([5], [80], packed_I4),
-        ([6], [96], packed_I4),
-        ([7], [112], packed_I4),
-
-        ([-8, 7], [135], packed_I4),
-        ([-7, 6], [150], packed_I4),
-        ([-6, 5], [165], packed_I4),
-        ([-5, 4], [180], packed_I4),
-        ([-4, 3], [195], packed_I4),
-        ([-3, 2], [210], packed_I4),
-        ([-2, 1], [225], packed_I4),
-        ([-1, 0], [240], packed_I4),
-        ([0, -1], [15], packed_I4),
-        ([1, -2], [30], packed_I4),
-        ([2, -3], [45], packed_I4),
-        ([3, -4], [60], packed_I4),
-        ([4, -5], [75], packed_I4),
-        ([5, -6], [90], packed_I4),
-        ([6, -7], [105], packed_I4),
-        ([7, -8], [120], packed_I4),
-    ])
+    @pytest.mark.parametrize("value, expected, custom_dtype",
+        [([i], [i], packed_U4) for i in range(16)] +
+        [([i, 15-i], [i + (15-i)*16], packed_U4) for i in range(16)] +
+        [([-i], [16-i], packed_I4) for i in range(1, 8+1)] +
+        [([i], [i], packed_I4) for i in range(8)] +
+        [([-i-1, i], [16-i-1 + 16*i], packed_I4) for i in range(8)] +
+        [([i, -i-1], [i + 16*(16-i-1)], packed_I4) for i in range(8)]
+    )
     def test_custom_value_propagation(self, value, expected, custom_dtype):
         graph = build_graph(nodes(value, custom_dtype), [
             *connect('value', 'convert'), *connect('convert', 'output'),
diff --git a/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py b/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py
index 8d905d8f13129d..26ea01b77d6722 100644
--- a/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py
+++ b/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py
@@ -235,17 +235,13 @@ def test_freeze_placeholder_with_unknown_rank(self, inputs, inputs_data, expecte
                    freeze_placeholder_with_value,
                    input_shape, only_conversion, True)
 
-    def test_conversion_failure_fallback_default(self):
+    def test_conversion_tf1_while_default(self):
         self.basic("ctc_model_based.pbtxt", None, None, None, None,
                    None, None, True, True, False, False)
 
-    @unittest.skipIf(platform == 'darwin', reason="Ticket - 122182")
-    def test_conversion_failure_fallback_use_new_frontend(self):
-        with self.assertRaisesRegex(Exception,
-                                    "\[TensorFlow Frontend\] Internal error, no translator found for operation\(s\)\: "
-                                    "TensorArrayGatherV3\, TensorArrayReadV3\, TensorArraySizeV3\, TensorArrayV3\, TensorArrayWriteV3"):
-            self.basic("ctc_model_based.pbtxt", None, None, None, None,
-                       None, None, True, True, True, False)
+    def test_conversion_tf1_while_use_new_frontend(self):
+        self.basic("ctc_model_based.pbtxt", None, None, None, None,
+                   None, None, True, True, True, False)
 
     @unittest.skip("88349: Fix auto-pruning in legacy FE")
     def test_conversion_model_oneshot_iterator_use_legacy_frontend(self):
diff --git a/tools/ovc/openvino/tools/ovc/telemetry_utils.py b/tools/ovc/openvino/tools/ovc/telemetry_utils.py
index 87e0132ccd17a6..42232b0839a6be 100644
--- a/tools/ovc/openvino/tools/ovc/telemetry_utils.py
+++ b/tools/ovc/openvino/tools/ovc/telemetry_utils.py
@@ -17,7 +17,13 @@
 
 
 def init_mo_telemetry(app_name='Model Conversion API'):
-    return tm.Telemetry(tid=get_tid(), app_name=app_name, app_version=get_rt_version(), backend='ga4')
+    return tm.Telemetry(tid=get_tid(),
+                        app_name=app_name,
+                        app_version=get_rt_version(),
+                        backend='ga4',
+                        enable_opt_in_dialog=False,
+                        disable_in_ci=True
+                        )
 
 def send_framework_info(framework: str):
     """