diff --git a/.github/scripts/workflow_rerun/errors_to_look_for.json b/.github/scripts/workflow_rerun/errors_to_look_for.json
index b9cac8f17adaa6..d8fe6ac2df03d2 100644
--- a/.github/scripts/workflow_rerun/errors_to_look_for.json
+++ b/.github/scripts/workflow_rerun/errors_to_look_for.json
@@ -86,5 +86,25 @@
{
"error_text": "because the GET request got Content-Type",
"ticket": 158400
+ },
+ {
+ "error_text": "Unable to make request:",
+ "ticket": 158401
+ },
+ {
+ "error_text": "Failed to make request",
+ "ticket": 158401
+ },
+ {
+ "error_text": "Failure when receiving data from the peer",
+ "ticket": 159323
+ },
+ {
+ "error_text": "HTTP response code said error",
+ "ticket": 159398
+ },
+ {
+ "error_text": "download failed after attempts",
+ "ticket": 159547
}
]
\ No newline at end of file
diff --git a/.github/workflows/cleanup_caches.yml b/.github/workflows/cleanup_caches.yml
index d6633fd9dab3ee..c3aac30ccd4379 100644
--- a/.github/workflows/cleanup_caches.yml
+++ b/.github/workflows/cleanup_caches.yml
@@ -4,7 +4,7 @@ on:
schedule:
# at 00:00 on the 1st day of every month
- cron: '0 0 1 * *'
-
+
permissions: read-all
jobs:
@@ -61,8 +61,8 @@ jobs:
cache-path: ${{ env.CCACHE_PATH }}
recursive: true
key: '.'
-
-
+
+
Cleanup_ccache_win:
name: Cleanup Windows ccache
runs-on: 'aks-win-4-cores-8gb'
diff --git a/.github/workflows/export_workflow_metrics.yml b/.github/workflows/export_workflow_metrics.yml
index 39bb699b8caa91..aef00244f8175b 100644
--- a/.github/workflows/export_workflow_metrics.yml
+++ b/.github/workflows/export_workflow_metrics.yml
@@ -34,7 +34,7 @@ permissions: read-all
jobs:
export-workflow-metrics:
name: Export finished workflow metrics
- runs-on: aks-linux-2-cores-8gb
+ runs-on: aks-linux-2-cores-8gb-stats
if: ${{ github.repository_owner == 'openvinotoolkit' }}
steps:
diff --git a/.github/workflows/workflow_rerunner.yml b/.github/workflows/workflow_rerunner.yml
index 0d8d6610bea588..535101ec943264 100644
--- a/.github/workflows/workflow_rerunner.yml
+++ b/.github/workflows/workflow_rerunner.yml
@@ -29,7 +29,7 @@ jobs:
name: Rerun Workflow
# Run only for the failed workflows in openvinotoolkit org
if: ${{ github.event.workflow_run.conclusion == 'failure' && github.repository_owner == 'openvinotoolkit' }}
- runs-on: aks-linux-2-cores-8gb
+ runs-on: aks-linux-2-cores-8gb-stats
permissions:
actions: write
contents: read
@@ -70,7 +70,7 @@ jobs:
rerunner_tests:
name: Rerunner Tests
if: ${{ github.event_name == 'pull_request' && github.repository_owner == 'openvinotoolkit' }}
- runs-on: aks-linux-2-cores-8gb
+ runs-on: aks-linux-2-cores-8gb-stats
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -96,9 +96,9 @@ jobs:
working-directory: ${{ github.workspace }}/.github/scripts/workflow_rerun
run: |
export PYTHONPATH=${{ github.workspace }}/.github/scripts/workflow_rerun:${{ github.workspace }}/.github/scripts:$PYTHONPATH
-
+
# Need to get a run id with successful status for log analyzing
# cannot lock a run id as logs get deleted after some time
run_id=$(python3 -c "from github import Github, Auth; import os; github=Github(auth=Auth.Token(token=os.environ.get('GITHUB_TOKEN'))); repo = github.get_repo('${GITHUB_REPOSITORY}'); run_id = repo.get_workflow_runs(status='success')[0].id; print(run_id)")
-
+
python3 rerunner.py --repository-name ${GITHUB_REPOSITORY} --run-id $run_id --dry-run
diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst
index d877cb1768d44d..f4ec275491fa32 100644
--- a/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst
+++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst
@@ -6,16 +6,14 @@ models from OpenVINO-supported frameworks may also work properly but have not be
**AI Models that run on Intel® Core Ultra™ Processors with OpenVINO™ toolkit:**
-.. raw:: html
-
-
-
-
-.. csv-table::
+.. data-table::
:class: modeldata stripe
:name: supportedModelsTable
:header-rows: 1
:file: ../../_static/download/supported_models.csv
+ :data-column-hidden: []
+ :data-order: [[ 0, "asc" ]]
+ :data-page-length: 10
| Marked cells indicate models that passed inference with no errors. Empty cells indicate
diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst
index d27f7626391f46..1bd8f5dae7c634 100644
--- a/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst
+++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst
@@ -41,27 +41,36 @@ Data as of OpenVINO 2024.4, 18 Oct. 2024.
.. tab-item:: PyTorch
- .. csv-table::
+ .. data-table::
:class: modeldata stripe
- :name: TensorFlow ops
+ :name: TensorFlow_ops_v1
:header-rows: 1
:file: ../../_static/conformance_files/pytorch_ops.csv
+ :data-column-hidden: []
+ :data-order: [[ 0, "asc" ]]
+ :data-page-length: 10
.. tab-item:: TensorFlow
- .. csv-table::
+ .. data-table::
:class: modeldata stripe
- :name: TensorFlow ops
+ :name: TensorFlow_ops_v2
:header-rows: 1
:file: ../../_static/conformance_files/tensorflow_ops.csv
+ :data-column-hidden: []
+ :data-order: [[ 0, "asc" ]]
+ :data-page-length: 10
.. tab-item:: PaddlePaddle
- .. csv-table::
+ .. data-table::
:class: modeldata stripe
- :name: Paddle ops
+ :name: Paddle_ops
:header-rows: 1
:file: ../../_static/conformance_files/paddlepaddle_ops.csv
+ :data-column-hidden: []
+ :data-order: [[ 0, "asc" ]]
+ :data-page-length: 10
.. tab-item:: ONNX
diff --git a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst
index 085a1ff8449151..83581d465df92e 100644
--- a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst
+++ b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst
@@ -8,10 +8,6 @@ The current data is as of OpenVINO 2024.4, 20 Nov. 2024.
The tables below list the key performance indicators for inference on built-in GPUs.
-.. raw:: html
-
-
-
.. tab-set::
@@ -22,7 +18,9 @@ The tables below list the key performance indicators for inference on built-in G
:name: supportedModelsTable_V1
:header-rows: 1
:file: ../../_static/benchmarks_files/llm_models_9-288V.csv
- :hidden: [3,4,6]
+ :data-column-hidden: [3,4,6]
+ :data-order: [[ 0, "asc" ]]
+ :data-page-length: 10
.. tab-item:: 7-268V
@@ -31,7 +29,8 @@ The tables below list the key performance indicators for inference on built-in G
:name: supportedModelsTable_V2
:header-rows: 1
:file: ../../_static/benchmarks_files/llm_models_7-258V.csv
- :hidden: [3,4,6]
+ :data-column-hidden: [3,4,6]
+ :data-order: [[ 0, "asc" ]]
.. tab-item:: 7-155H
@@ -40,7 +39,8 @@ The tables below list the key performance indicators for inference on built-in G
:name: supportedModelsTable_V3
:header-rows: 1
:file: ../../_static/benchmarks_files/llm_models_7-155H.csv
- :hidden: [3,4,6]
+ :data-column-hidden: [3,4,6]
+ :data-order: [[ 0, "asc" ]]
.. grid:: 1 1 2 2
diff --git a/docs/openvino_sphinx_theme/openvino_sphinx_theme/directives/code.py b/docs/openvino_sphinx_theme/openvino_sphinx_theme/directives/code.py
index c3e0e81eec3b3a..814517289ce114 100644
--- a/docs/openvino_sphinx_theme/openvino_sphinx_theme/directives/code.py
+++ b/docs/openvino_sphinx_theme/openvino_sphinx_theme/directives/code.py
@@ -11,7 +11,7 @@
import requests
import re
import json
-
+import html
import csv
logger = logging.getLogger(__name__)
@@ -147,7 +147,9 @@ class DataTable(Directive):
'file': directives.path,
'class': directives.unchanged,
'name': directives.unchanged,
- 'hidden': directives.unchanged
+ 'data-column-hidden': directives.unchanged,
+ 'data-page-length': directives.unchanged,
+ 'data-order': directives.unchanged
}
def run(self) -> List[Node]:
@@ -159,10 +161,12 @@ def run(self) -> List[Node]:
csv_node = []
with open(csv_file, 'r') as j:
csv_data = list(csv.reader(j))
- class_table_tag = ' class="' + "".join(c for c in str(self.options['class']) + '"') if 'class' in self.options is not None else ""
- id_table_tag = ' id="' + "".join(c for c in str(self.options['name']) + '"') if 'name' in self.options is not None else ""
- hidden_table_tag = ' data-columns-hidden="' + "".join(c for c in str(self.options['hidden']) + '"') if 'hidden' in self.options is not None else ""
- csv_table_html = '
'
+ class_table_tag = f' class="{html.escape(self.options["class"])}"' if "class" in self.options else ""
+ id_table_tag = f' id="{html.escape(self.options["name"])}"' if "name" in self.options else ""
+ data_column_hidden_tag = f' data-column-hidden="{html.escape(self.options["data-column-hidden"])}"' if "data-column-hidden" in self.options else ""
+ data_order_tag = f' data-order="{html.escape(self.options["data-order"])}"' if "data-order" in self.options else ""
+ data_page_length_tag = f' data-page-length="{html.escape(self.options["data-page-length"])}"' if "data-page-length" in self.options else ""
+ csv_table_html = f'
'
head_rows = 0
head_rows += self.options.get('header-rows', 0)
row_count = 0
diff --git a/docs/sphinx_setup/_static/css/custom.css b/docs/sphinx_setup/_static/css/custom.css
index de8a05732a4d06..1679f7309da044 100644
--- a/docs/sphinx_setup/_static/css/custom.css
+++ b/docs/sphinx_setup/_static/css/custom.css
@@ -69,7 +69,7 @@ a#wap_dns {
/* Sphinx-design tabs override */
.sd-tab-set>input:checked+label {
color: var(--sd-color-black) !important;
- background-color: #f8f8f8 !important;
+ background-color: white !important;
border: solid 1px #bdbdbd;
border-bottom: solid 0px;
margin-bottom: -1px;
@@ -96,7 +96,7 @@ a#wap_dns {
cursor: pointer;
font-size: var(--sd-fontsize-tabs-label);
font-weight: 400 !important;
- padding: 5px 16px 2px !important;
+ padding: 5px 16px 0px !important;
transition: color 250ms;
width: auto;
z-index: 1;
@@ -110,7 +110,6 @@ a#wap_dns {
box-shadow: 0 0 0 0;
border: solid 1px var(--sd-color-tabs-overline);
border-color: #bdbdbd;
- background-color: #f8f8f8;
padding-right: 4px;
padding-left: 4px;
padding-bottom: 6px;
diff --git a/docs/sphinx_setup/_static/css/openVinoDataTables.css b/docs/sphinx_setup/_static/css/openVinoDataTables.css
index 526aabb6abe15d..bedc0f5206e260 100644
--- a/docs/sphinx_setup/_static/css/openVinoDataTables.css
+++ b/docs/sphinx_setup/_static/css/openVinoDataTables.css
@@ -6,8 +6,7 @@ div.dt-buttons>.dt-button, div.dt-buttons>div.dt-button-split .dt-button {
}
div.dt-container .dt-paging .dt-paging-button:hover {
- color: white !important;
- border: 1px solid #aaa;
+ border: 1px solid #aaa !important;
background:none !important;
background-color: var(--bttn-act-bg-hover) !important
}
@@ -190,10 +189,9 @@ div.dt-container .dt-paging .dt-paging-button {
div.dt-container .dt-paging .dt-paging-button.current, div.dt-container .dt-paging .dt-paging-button.current:hover {
background: none !important;
- background-color: var(--bttn-act-bg-active) !important;
+ background-color: var(--bttn-sec-border-color) !important;
border-color: var(--bttn-act-bg-active) !important;
border-radius: 0px !important;
- color: white !important;
border: 1px !important
}
table.dataTable thead>tr>th.dt-orderable-asc span.dt-column-order:before, table.dataTable thead>tr>th.dt-orderable-asc span.dt-column-order:after, table.dataTable thead>tr>th.dt-orderable-desc span.dt-column-order:before, table.dataTable thead>tr>th.dt-orderable-desc span.dt-column-order:after, table.dataTable thead>tr>th.dt-ordering-asc span.dt-column-order:before, table.dataTable thead>tr>th.dt-ordering-asc span.dt-column-order:after, table.dataTable thead>tr>th.dt-ordering-desc span.dt-column-order:before, table.dataTable thead>tr>th.dt-ordering-desc span.dt-column-order:after, table.dataTable thead>tr>td.dt-orderable-asc span.dt-column-order:before, table.dataTable thead>tr>td.dt-orderable-asc span.dt-column-order:after, table.dataTable thead>tr>td.dt-orderable-desc span.dt-column-order:before, table.dataTable thead>tr>td.dt-orderable-desc span.dt-column-order:after, table.dataTable thead>tr>td.dt-ordering-asc span.dt-column-order:before, table.dataTable thead>tr>td.dt-ordering-asc span.dt-column-order:after, table.dataTable thead>tr>td.dt-ordering-desc span.dt-column-order:before, table.dataTable thead>tr>td.dt-ordering-desc span.dt-column-order:after {
diff --git a/docs/sphinx_setup/_static/js/openVinoDataTables.js b/docs/sphinx_setup/_static/js/openVinoDataTables.js
index bd56a71533786c..fb3a57d959020c 100644
--- a/docs/sphinx_setup/_static/js/openVinoDataTables.js
+++ b/docs/sphinx_setup/_static/js/openVinoDataTables.js
@@ -1,16 +1,15 @@
$(document).ready(function () {
var columnDefs = [];
-
var tables = $('table.modeldata');
for (let table of tables) {
- var hidden = table.getAttribute('data-columns-hidden');
+ var hidden = table.getAttribute('data-column-hidden');
columnDefs = [{ "visible": false, "targets": JSON.parse(hidden) }]
$(table).DataTable({
responsive: true,
"autoWidth": false,
language: {
buttons: {
- colvisRestore: "Restore default"
+ colvisRestore: "Restore default selection"
}
},
lengthMenu: [
diff --git a/docs/sphinx_setup/_templates/layout.html b/docs/sphinx_setup/_templates/layout.html
index 0d2331b2c83fe3..a791091e1f13a4 100644
--- a/docs/sphinx_setup/_templates/layout.html
+++ b/docs/sphinx_setup/_templates/layout.html
@@ -9,6 +9,7 @@
+
diff --git a/src/bindings/python/src/openvino/__init__.py b/src/bindings/python/src/openvino/__init__.py
index 7643f742e0067d..69c678909b1c9e 100644
--- a/src/bindings/python/src/openvino/__init__.py
+++ b/src/bindings/python/src/openvino/__init__.py
@@ -7,7 +7,7 @@
# Required for Windows OS platforms
# Note: always top-level
try:
- from openvino.package_utils import _add_openvino_libs_to_search_path
+ from openvino.utils import _add_openvino_libs_to_search_path
_add_openvino_libs_to_search_path()
except ImportError:
pass
@@ -17,47 +17,6 @@
# # This __init__.py forces checking of runtime modules to propagate errors.
# # It is not compared with init files from openvino-dev package.
# #
-
-# Openvino pybind bindings
-from openvino._pyopenvino import AxisSet
-from openvino._pyopenvino import AxisVector
-from openvino._pyopenvino import ConstOutput
-from openvino._pyopenvino import Coordinate
-from openvino._pyopenvino import CoordinateDiff
-from openvino._pyopenvino import DiscreteTypeInfo
-from openvino._pyopenvino import Extension
-from openvino._pyopenvino import ProfilingInfo
-from openvino._pyopenvino import RTMap
-from openvino._pyopenvino import Version
-from openvino._pyopenvino import Symbol
-from openvino._pyopenvino import Dimension
-from openvino._pyopenvino import Input
-from openvino._pyopenvino import Output
-from openvino._pyopenvino import Node
-from openvino._pyopenvino import Strides
-from openvino._pyopenvino import PartialShape
-from openvino._pyopenvino import Shape
-from openvino._pyopenvino import Layout
-from openvino._pyopenvino import Type
-from openvino._pyopenvino import Tensor
-from openvino._pyopenvino import OVAny
-from openvino._pyopenvino import get_batch
-from openvino._pyopenvino import set_batch
-from openvino._pyopenvino import serialize
-from openvino._pyopenvino import shutdown
-from openvino._pyopenvino import save_model
-from openvino._pyopenvino import layout_helpers
-from openvino._pyopenvino import RemoteContext
-from openvino._pyopenvino import RemoteTensor
-from openvino._pyopenvino import Op
-
-# Import public classes from _ov_api
-from openvino._ov_api import Model
-from openvino._ov_api import Core
-from openvino._ov_api import CompiledModel
-from openvino._ov_api import InferRequest
-from openvino._ov_api import AsyncInferQueue
-
# Import all public modules
from openvino import runtime as runtime
from openvino import frontend as frontend
@@ -67,10 +26,36 @@
from openvino import utils as utils
from openvino import properties as properties
+# Import most important classes and functions from openvino.runtime
+from openvino._ov_api import Model
+from openvino._ov_api import Core
+from openvino._ov_api import CompiledModel
+from openvino._ov_api import InferRequest
+from openvino._ov_api import AsyncInferQueue
+
+from openvino.runtime import Symbol
+from openvino.runtime import Dimension
+from openvino.runtime import Strides
+from openvino.runtime import PartialShape
+from openvino.runtime import Shape
+from openvino.runtime import Layout
+from openvino.runtime import Type
+from openvino.runtime import Tensor
+from openvino.runtime import OVAny
+
# Helper functions for openvino module
-from openvino.utils.data_helpers import tensor_from_file
+from openvino.runtime.utils.data_helpers import tensor_from_file
from openvino._ov_api import compile_model
+from openvino.runtime import get_batch
+from openvino.runtime import set_batch
+from openvino.runtime import serialize
+from openvino.runtime import shutdown
+from openvino.runtime import save_model
+from openvino.runtime import layout_helpers
+from openvino._pyopenvino import RemoteContext
+from openvino._pyopenvino import RemoteTensor
+from openvino._pyopenvino import Op
# Import opsets
from openvino import opset1
@@ -95,7 +80,7 @@
from openvino._pyopenvino import VASurfaceTensor
# Set version for openvino package
-from openvino._pyopenvino import get_version
+from openvino.runtime import get_version
__version__ = get_version()
# Tools
diff --git a/src/bindings/python/src/openvino/_ov_api.py b/src/bindings/python/src/openvino/_ov_api.py
index da31fab4c95d8e..53d0fa5316498b 100644
--- a/src/bindings/python/src/openvino/_ov_api.py
+++ b/src/bindings/python/src/openvino/_ov_api.py
@@ -5,7 +5,9 @@
from types import TracebackType
from typing import Any, Iterable, Union, Optional, Dict, Type
from pathlib import Path
+import warnings
+import numpy as np
from openvino._pyopenvino import Model as ModelBase
from openvino._pyopenvino import Core as CoreBase
@@ -14,7 +16,7 @@
from openvino._pyopenvino import Tensor
from openvino._pyopenvino import Node
-from openvino.utils.data_helpers import (
+from openvino.runtime.utils.data_helpers import (
OVDict,
_InferRequestWrapper,
_data_dispatch,
diff --git a/src/bindings/python/src/openvino/frontend/frontend.py b/src/bindings/python/src/openvino/frontend/frontend.py
index 6a16d5a573b7d7..4d549d24b4ef7c 100644
--- a/src/bindings/python/src/openvino/frontend/frontend.py
+++ b/src/bindings/python/src/openvino/frontend/frontend.py
@@ -7,7 +7,7 @@
from openvino._pyopenvino import FrontEnd as FrontEndBase
from openvino._pyopenvino import FrontEndManager as FrontEndManagerBase
from openvino._pyopenvino import InputModel
-from openvino import Model
+from openvino.runtime import Model
class FrontEnd(FrontEndBase):
diff --git a/src/bindings/python/src/openvino/frontend/jax/jaxpr_decoder.py b/src/bindings/python/src/openvino/frontend/jax/jaxpr_decoder.py
index 9072598f824939..914f6b2e2ee548 100644
--- a/src/bindings/python/src/openvino/frontend/jax/jaxpr_decoder.py
+++ b/src/bindings/python/src/openvino/frontend/jax/jaxpr_decoder.py
@@ -6,7 +6,7 @@
import jax.core
from openvino.frontend.jax.py_jax_frontend import _FrontEndJaxDecoder as Decoder
-from openvino import PartialShape, Type as OVType, OVAny
+from openvino.runtime import PartialShape, Type as OVType, OVAny
from openvino.frontend.jax.utils import jax_array_to_ov_const, get_ov_type_for_value, \
ivalue_to_constant, param_to_constants
diff --git a/src/bindings/python/src/openvino/frontend/jax/utils.py b/src/bindings/python/src/openvino/frontend/jax/utils.py
index 659677b11d5af8..4535265d6de082 100644
--- a/src/bindings/python/src/openvino/frontend/jax/utils.py
+++ b/src/bindings/python/src/openvino/frontend/jax/utils.py
@@ -8,7 +8,7 @@
import jax.numpy as jnp
import numpy as np
from openvino.frontend.jax.passes import filter_element, filter_ivalue, filter_param
-from openvino import op, Type as OVType, Shape, OVAny
+from openvino.runtime import op, Type as OVType, Shape, OVAny
numpy_to_ov_type_map = {
np.float32: OVType.f32,
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py
index 81a2764ee1188d..c448571f1ac17a 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py
@@ -10,7 +10,7 @@
from openvino.frontend.pytorch.py_pytorch_frontend import _FrontEndPytorchDecoder as Decoder
from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType
-from openvino import PartialShape, Type as OVType, OVAny, Shape
+from openvino.runtime import PartialShape, Type as OVType, OVAny, Shape
from openvino.frontend.pytorch.utils import make_constant, fetch_attr, pt_to_ov_type_map, torch_tensor_to_ov_const
logger = logging.getLogger(__name__)
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py
index a9a65781dcb254..9f2ef019769875 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py
@@ -18,7 +18,7 @@
from torch._decomp import decomposition_table, get_decompositions
from openvino.frontend import FrontEndManager
-from openvino import Core, Type, PartialShape
+from openvino.runtime import Core, Type, PartialShape
from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder
from openvino.frontend.pytorch.torchdynamo import decompositions
from openvino.frontend.pytorch.torchdynamo.decompositions import get_aot_decomposition_list, get_inf_decomposition_list
@@ -27,7 +27,7 @@
from openvino.frontend.pytorch.torchdynamo.compile import cached_model_name, openvino_compile_cached_model
from openvino.frontend.pytorch.torchdynamo.backend_utils import _get_cache_dir, _get_device, _get_model_caching, _get_decompositions, _get_aot_autograd
-from openvino import Core, Type, PartialShape
+from openvino.runtime import Core, Type, PartialShape
logger = logging.getLogger(__name__)
logger.setLevel(logging.WARNING)
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend_utils.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend_utils.py
index c9a772b3feac42..47b3b82806b18b 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend_utils.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend_utils.py
@@ -5,7 +5,7 @@
# mypy: ignore-errors
from typing import Optional, Any
-from openvino import Core
+from openvino.runtime import Core
def _get_device(options) -> Optional[Any]:
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py
index ca8d5478e76c15..fa446893a05d07 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py
@@ -14,7 +14,7 @@
from openvino.frontend import FrontEndManager
from openvino.frontend.pytorch.fx_decoder import TorchFXPythonDecoder
-from openvino import Core, Type, PartialShape, serialize
+from openvino.runtime import Core, Type, PartialShape, serialize
from openvino.frontend.pytorch.torchdynamo.backend_utils import _get_cache_dir, _get_device, _get_config, _is_cache_dir_in_config
from typing import Callable, Optional
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/execute.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/execute.py
index 7527ad7acb37a4..4f41f7b5a6a9de 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/execute.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/execute.py
@@ -20,7 +20,7 @@
from openvino.frontend.pytorch.fx_decoder import TorchFXPythonDecoder
from openvino.frontend.pytorch.torchdynamo.partition import Partitioner
from openvino.frontend.pytorch.torchdynamo.compile import openvino_compile
-from openvino import Core, Type, PartialShape
+from openvino.runtime import Core, Type, PartialShape
from openvino.frontend.pytorch.torchdynamo.backend_utils import _get_cache_dir, _get_device, _get_aot_autograd
from typing import Callable, Optional, Any
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py
index 7bb8073167a654..6d8fdb1658793e 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py
@@ -6,7 +6,7 @@
from openvino.frontend.pytorch.py_pytorch_frontend import _FrontEndPytorchDecoder as Decoder
from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType
-from openvino import op, PartialShape, Type as OVType, OVAny
+from openvino.runtime import op, PartialShape, Type as OVType, OVAny
from openvino.frontend.pytorch.utils import (
ivalue_to_constant,
get_value_from_getattr,
@@ -15,7 +15,7 @@
convert_quantized_tensor,
graph_has_ops,
)
-from openvino import opset11 as ops
+from openvino.runtime import opset11 as ops
from openvino.frontend.pytorch import quantized, patch_model
from openvino.frontend.pytorch.module_extension import ModuleExtension
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/utils.py b/src/bindings/python/src/openvino/frontend/pytorch/utils.py
index 9ba36707037c9e..826d766505fa79 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/utils.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/utils.py
@@ -7,8 +7,8 @@
import torch
import numpy as np
-from openvino import op, Type as OVType, Shape, Tensor
-from openvino import opset11 as ops
+from openvino.runtime import op, Type as OVType, Shape, Tensor
+from openvino.runtime import opset11 as ops
def make_constant(*args, **kwargs):
diff --git a/src/bindings/python/src/openvino/frontend/tensorflow/node_decoder.py b/src/bindings/python/src/openvino/frontend/tensorflow/node_decoder.py
index d15262cbc30366..fcedd7a74c2b51 100644
--- a/src/bindings/python/src/openvino/frontend/tensorflow/node_decoder.py
+++ b/src/bindings/python/src/openvino/frontend/tensorflow/node_decoder.py
@@ -7,7 +7,7 @@
import numpy as np
import tensorflow as tf
from openvino.frontend.tensorflow.py_tensorflow_frontend import _FrontEndDecoderBase as DecoderBase
-from openvino import PartialShape, Type, OVAny, Tensor
+from openvino.runtime import PartialShape, Type, OVAny, Tensor
def tf_type_to_ov_type(tf_type_int):
diff --git a/src/bindings/python/src/openvino/frontend/tensorflow/utils.py b/src/bindings/python/src/openvino/frontend/tensorflow/utils.py
index 7de5dc950be53e..74c0dfff92297e 100644
--- a/src/bindings/python/src/openvino/frontend/tensorflow/utils.py
+++ b/src/bindings/python/src/openvino/frontend/tensorflow/utils.py
@@ -8,7 +8,7 @@
import logging as log
import numpy as np
import sys
-from openvino import PartialShape, Dimension, Type
+from openvino.runtime import PartialShape, Dimension, Type
from packaging.version import parse, Version
from typing import List, Dict, Union
diff --git a/src/bindings/python/src/openvino/helpers/packing.py b/src/bindings/python/src/openvino/helpers/packing.py
index d0956e09fc6261..796af87402f3a6 100644
--- a/src/bindings/python/src/openvino/helpers/packing.py
+++ b/src/bindings/python/src/openvino/helpers/packing.py
@@ -5,7 +5,7 @@
import numpy as np
from typing import Union
-from openvino import Type, Shape
+from openvino.runtime import Type, Shape
def pack_data(array: np.ndarray, type: Type) -> np.ndarray:
diff --git a/src/bindings/python/src/openvino/opset1/ops.py b/src/bindings/python/src/openvino/opset1/ops.py
index e264aea304fb1f..edca6c62a0b246 100644
--- a/src/bindings/python/src/openvino/opset1/ops.py
+++ b/src/bindings/python/src/openvino/opset1/ops.py
@@ -8,17 +8,17 @@
import numpy as np
from functools import partial
-from openvino import Node, PartialShape, Type
+from openvino.runtime import Node, PartialShape, Type
from openvino.op import Constant, Parameter, tensor_iterator
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import binary_op, nameable_op, unary_op
-from openvino.utils.input_validation import (
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op
+from openvino.runtime.utils.input_validation import (
check_valid_attributes,
is_non_negative_value,
is_positive_value,
)
-from openvino.utils.node_factory import NodeFactory
-from openvino.utils.types import (
+from openvino.runtime.utils.node_factory import NodeFactory
+from openvino.runtime.utils.types import (
NodeInput,
NumericData,
NumericType,
diff --git a/src/bindings/python/src/openvino/opset10/ops.py b/src/bindings/python/src/openvino/opset10/ops.py
index d0bc3cbf1cba4a..c7b75777484a59 100644
--- a/src/bindings/python/src/openvino/opset10/ops.py
+++ b/src/bindings/python/src/openvino/opset10/ops.py
@@ -6,10 +6,10 @@
from functools import partial
from typing import List, Optional
-from openvino import Node
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import nameable_op
-from openvino.utils.types import (
+from openvino.runtime import Node
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import nameable_op
+from openvino.runtime.utils.types import (
NodeInput,
as_nodes,
as_node,
diff --git a/src/bindings/python/src/openvino/opset11/ops.py b/src/bindings/python/src/openvino/opset11/ops.py
index 95767b4800db1c..575c99501d2d6c 100644
--- a/src/bindings/python/src/openvino/opset11/ops.py
+++ b/src/bindings/python/src/openvino/opset11/ops.py
@@ -6,10 +6,10 @@
from functools import partial
from typing import List, Optional
-from openvino import Node
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import nameable_op
-from openvino.utils.types import (
+from openvino.runtime import Node
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import nameable_op
+from openvino.runtime.utils.types import (
NodeInput,
as_nodes,
)
diff --git a/src/bindings/python/src/openvino/opset12/ops.py b/src/bindings/python/src/openvino/opset12/ops.py
index 4b354b1fcff973..928bf4f71a9773 100644
--- a/src/bindings/python/src/openvino/opset12/ops.py
+++ b/src/bindings/python/src/openvino/opset12/ops.py
@@ -6,10 +6,10 @@
from functools import partial
from typing import Optional
-from openvino import Node
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import nameable_op
-from openvino.utils.types import (
+from openvino.runtime import Node
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import nameable_op
+from openvino.runtime.utils.types import (
NodeInput,
as_nodes,
as_node,
diff --git a/src/bindings/python/src/openvino/opset13/ops.py b/src/bindings/python/src/openvino/opset13/ops.py
index 5c6863740120f8..12f0d06b1a28e6 100644
--- a/src/bindings/python/src/openvino/opset13/ops.py
+++ b/src/bindings/python/src/openvino/opset13/ops.py
@@ -11,12 +11,12 @@
log = logging.getLogger(__name__)
-from openvino import Node, Shape, Type, Output, Tensor
+from openvino.runtime import Node, Shape, Type, Output, Tensor
from openvino.op import Constant, Result
from openvino.opset1 import convert_like
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import binary_op, nameable_op, unary_op, overloading
-from openvino.utils.types import (
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op, overloading
+from openvino.runtime.utils.types import (
NumericData,
NodeInput,
NumericType,
diff --git a/src/bindings/python/src/openvino/opset14/ops.py b/src/bindings/python/src/openvino/opset14/ops.py
index 59e1bfd3e89c6f..fa872d24eb7f1a 100644
--- a/src/bindings/python/src/openvino/opset14/ops.py
+++ b/src/bindings/python/src/openvino/opset14/ops.py
@@ -7,11 +7,11 @@
from typing import Union, Optional, List
-from openvino import Node, Type
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.types import TensorShape
-from openvino.utils.decorators import nameable_op
-from openvino.utils.types import NodeInput, as_node, as_nodes
+from openvino.runtime import Node, Type
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.types import TensorShape
+from openvino.runtime.utils.decorators import nameable_op
+from openvino.runtime.utils.types import NodeInput, as_node, as_nodes
_get_node_factory_opset14 = partial(_get_node_factory, "opset14")
diff --git a/src/bindings/python/src/openvino/opset15/ops.py b/src/bindings/python/src/openvino/opset15/ops.py
index 97d4419fc4834b..8e6b8bd46d5f7c 100644
--- a/src/bindings/python/src/openvino/opset15/ops.py
+++ b/src/bindings/python/src/openvino/opset15/ops.py
@@ -7,12 +7,12 @@
from typing import List, Literal, Optional
import numpy as np
-from openvino import Node, Type
+from openvino.runtime import Node, Type
from openvino.opset1 import convert_like
from openvino.opset14 import constant
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import binary_op, nameable_op
-from openvino.utils.types import NodeInput, as_nodes
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import binary_op, nameable_op
+from openvino.runtime.utils.types import NodeInput, as_nodes
_get_node_factory_opset15 = partial(_get_node_factory, "opset15")
diff --git a/src/bindings/python/src/openvino/opset16/ops.py b/src/bindings/python/src/openvino/opset16/ops.py
index e5ebdc7a2a11d6..60656f6d993b6a 100644
--- a/src/bindings/python/src/openvino/opset16/ops.py
+++ b/src/bindings/python/src/openvino/opset16/ops.py
@@ -6,10 +6,10 @@
from functools import partial
from typing import Optional
-from openvino import Node
-from openvino.utils.decorators import nameable_op
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.types import NodeInput, as_nodes
+from openvino.runtime import Node
+from openvino.runtime.utils.decorators import nameable_op
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.types import NodeInput, as_nodes
_get_node_factory_opset16 = partial(_get_node_factory, "opset16")
diff --git a/src/bindings/python/src/openvino/opset2/ops.py b/src/bindings/python/src/openvino/opset2/ops.py
index f76f608fe9a5c7..45b33f5bc0288b 100644
--- a/src/bindings/python/src/openvino/opset2/ops.py
+++ b/src/bindings/python/src/openvino/opset2/ops.py
@@ -9,17 +9,18 @@
from functools import partial
import warnings
-from openvino import Node, Shape
+from openvino.runtime import Node, Shape
from openvino.op import Constant, Parameter
-from openvino.utils.decorators import binary_op, nameable_op, unary_op
-from openvino.utils.input_validation import (
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op
+from openvino.runtime.utils.input_validation import (
assert_list_of_ints,
check_valid_attributes,
is_non_negative_value,
is_positive_value,
)
-from openvino.utils.node_factory import NodeFactory, _get_node_factory
-from openvino.utils.types import (
+from openvino.runtime.utils.node_factory import NodeFactory
+from openvino.runtime.utils.types import (
NodeInput,
NumericData,
NumericType,
diff --git a/src/bindings/python/src/openvino/opset3/ops.py b/src/bindings/python/src/openvino/opset3/ops.py
index 1c2c7e309fe919..989f5819acb685 100644
--- a/src/bindings/python/src/openvino/opset3/ops.py
+++ b/src/bindings/python/src/openvino/opset3/ops.py
@@ -8,17 +8,18 @@
import numpy as np
from functools import partial
-from openvino import Node, Shape
+from openvino.runtime import Node, Shape
from openvino.op import Constant, Parameter
-from openvino.utils.decorators import binary_op, nameable_op, unary_op
-from openvino.utils.input_validation import (
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op
+from openvino.runtime.utils.input_validation import (
assert_list_of_ints,
check_valid_attributes,
is_non_negative_value,
is_positive_value,
)
-from openvino.utils.node_factory import NodeFactory, _get_node_factory
-from openvino.utils.types import (
+from openvino.runtime.utils.node_factory import NodeFactory
+from openvino.runtime.utils.types import (
NodeInput,
NumericData,
NumericType,
diff --git a/src/bindings/python/src/openvino/opset4/ops.py b/src/bindings/python/src/openvino/opset4/ops.py
index e6f3a3a1550937..4f6ba016852b02 100644
--- a/src/bindings/python/src/openvino/opset4/ops.py
+++ b/src/bindings/python/src/openvino/opset4/ops.py
@@ -8,17 +8,18 @@
import numpy as np
from functools import partial
-from openvino import Node, Shape
+from openvino.runtime import Node, Shape
from openvino.op import Constant, Parameter
-from openvino.utils.decorators import binary_op, nameable_op, unary_op
-from openvino.utils.input_validation import (
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op
+from openvino.runtime.utils.input_validation import (
assert_list_of_ints,
check_valid_attributes,
is_non_negative_value,
is_positive_value,
)
-from openvino.utils.node_factory import NodeFactory, _get_node_factory
-from openvino.utils.types import (
+from openvino.runtime.utils.node_factory import NodeFactory
+from openvino.runtime.utils.types import (
NodeInput,
NumericData,
NumericType,
diff --git a/src/bindings/python/src/openvino/opset5/ops.py b/src/bindings/python/src/openvino/opset5/ops.py
index 9217830752b1d8..20057b78c7c31d 100644
--- a/src/bindings/python/src/openvino/opset5/ops.py
+++ b/src/bindings/python/src/openvino/opset5/ops.py
@@ -8,17 +8,18 @@
import numpy as np
from functools import partial
-from openvino import Node, Shape
+from openvino.runtime import Node, Shape
from openvino.op import Constant, Parameter, loop
-from openvino.utils.decorators import binary_op, nameable_op, unary_op
-from openvino.utils.input_validation import (
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op
+from openvino.runtime.utils.input_validation import (
assert_list_of_ints,
check_valid_attributes,
is_non_negative_value,
is_positive_value,
)
-from openvino.utils.node_factory import NodeFactory, _get_node_factory
-from openvino.utils.types import (
+from openvino.runtime.utils.node_factory import NodeFactory
+from openvino.runtime.utils.types import (
NodeInput,
NumericData,
NumericType,
diff --git a/src/bindings/python/src/openvino/opset6/ops.py b/src/bindings/python/src/openvino/opset6/ops.py
index 340d0405b4ba23..8020715f20dea3 100644
--- a/src/bindings/python/src/openvino/opset6/ops.py
+++ b/src/bindings/python/src/openvino/opset6/ops.py
@@ -9,13 +9,13 @@
from functools import partial, singledispatch
-from openvino import Node, Type, PartialShape, Output, Shape
+from openvino.runtime import Node, Type, PartialShape, Output, Shape
from openvino.op import assign, Constant, Parameter
from openvino.op import read_value as _read_value
from openvino.op.util import VariableInfo, Variable
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import nameable_op, overloading
-from openvino.utils.types import (
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import nameable_op, overloading
+from openvino.runtime.utils.types import (
NodeInput,
NumericType,
TensorShape,
diff --git a/src/bindings/python/src/openvino/opset7/ops.py b/src/bindings/python/src/openvino/opset7/ops.py
index e33d266debedf1..59e09b64888eb1 100644
--- a/src/bindings/python/src/openvino/opset7/ops.py
+++ b/src/bindings/python/src/openvino/opset7/ops.py
@@ -7,17 +7,18 @@
from typing import Callable, Iterable, List, Optional, Set, Union
import numpy as np
-from openvino import Node, Shape
+from openvino.runtime import Node, Shape
from openvino.op import Constant, Parameter
-from openvino.utils.decorators import binary_op, nameable_op, unary_op
-from openvino.utils.input_validation import (
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op
+from openvino.runtime.utils.input_validation import (
assert_list_of_ints,
check_valid_attributes,
is_non_negative_value,
is_positive_value,
)
-from openvino.utils.node_factory import NodeFactory, _get_node_factory
-from openvino.utils.types import (
+from openvino.runtime.utils.node_factory import NodeFactory
+from openvino.runtime.utils.types import (
NodeInput,
NumericData,
NumericType,
diff --git a/src/bindings/python/src/openvino/opset8/ops.py b/src/bindings/python/src/openvino/opset8/ops.py
index a9a868e7b541d8..6995d55a28a776 100644
--- a/src/bindings/python/src/openvino/opset8/ops.py
+++ b/src/bindings/python/src/openvino/opset8/ops.py
@@ -9,15 +9,15 @@
import numpy as np
from openvino.exceptions import UserInputError
from openvino.op import Constant, Parameter, if_op
-from openvino import Node
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import nameable_op
-from openvino.utils.input_validation import (
+from openvino.runtime import Node
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import nameable_op
+from openvino.runtime.utils.input_validation import (
check_valid_attributes,
is_non_negative_value,
is_positive_value,
)
-from openvino.utils.types import (
+from openvino.runtime.utils.types import (
NodeInput,
TensorShape,
as_node,
diff --git a/src/bindings/python/src/openvino/opset9/ops.py b/src/bindings/python/src/openvino/opset9/ops.py
index e2264845e058dc..a6d45cfd0be2cc 100644
--- a/src/bindings/python/src/openvino/opset9/ops.py
+++ b/src/bindings/python/src/openvino/opset9/ops.py
@@ -7,10 +7,10 @@
from typing import Optional
import numpy as np
-from openvino import Node
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import nameable_op
-from openvino.utils.types import (
+from openvino.runtime import Node
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import nameable_op
+from openvino.runtime.utils.types import (
NodeInput,
as_nodes,
as_node,
diff --git a/src/bindings/python/src/openvino/preprocess/torchvision/preprocess_converter.py b/src/bindings/python/src/openvino/preprocess/torchvision/preprocess_converter.py
index 717e945217468c..c14635cc118208 100644
--- a/src/bindings/python/src/openvino/preprocess/torchvision/preprocess_converter.py
+++ b/src/bindings/python/src/openvino/preprocess/torchvision/preprocess_converter.py
@@ -5,7 +5,7 @@
from typing import Callable, Any, Union
import logging
-import openvino as ov
+import openvino.runtime as ov
class PreprocessConverter():
diff --git a/src/bindings/python/src/openvino/preprocess/torchvision/torchvision_preprocessing.py b/src/bindings/python/src/openvino/preprocess/torchvision/torchvision_preprocessing.py
index 5dad42b47da44a..f8b51afd546f57 100644
--- a/src/bindings/python/src/openvino/preprocess/torchvision/torchvision_preprocessing.py
+++ b/src/bindings/python/src/openvino/preprocess/torchvision/torchvision_preprocessing.py
@@ -20,10 +20,10 @@
import torchvision.transforms as transforms
from torchvision.transforms import InterpolationMode
-import openvino as ov
-import openvino.opset11 as ops
-from openvino import Layout, Type
-from openvino.utils.decorators import custom_preprocess_function
+import openvino.runtime as ov
+import openvino.runtime.opset11 as ops
+from openvino.runtime import Layout, Type
+from openvino.runtime.utils.decorators import custom_preprocess_function
from openvino.preprocess import PrePostProcessor, ResizeAlgorithm, ColorFormat
diff --git a/src/bindings/python/src/openvino/runtime/opset_utils.py b/src/bindings/python/src/openvino/runtime/opset_utils.py
new file mode 100644
index 00000000000000..475750e71f87c5
--- /dev/null
+++ b/src/bindings/python/src/openvino/runtime/opset_utils.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional
+import numpy as np
+
+from openvino.runtime import Node
+from openvino.runtime.utils.decorators import nameable_op
+from openvino.runtime.utils.node_factory import NodeFactory
+from openvino.runtime.utils.types import (
+ as_node,
+ NodeInput,
+)
+
+
+def _get_node_factory(opset_version: Optional[str] = None) -> NodeFactory:
+ """Return NodeFactory configured to create operators from specified opset version."""
+ if opset_version:
+ return NodeFactory(opset_version)
+ else:
+ return NodeFactory()
diff --git a/src/bindings/python/src/openvino/runtime/opset_utils/__init__.py b/src/bindings/python/src/openvino/runtime/opset_utils/__init__.py
deleted file mode 100644
index 6fb3e5f6f0c950..00000000000000
--- a/src/bindings/python/src/openvino/runtime/opset_utils/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-
-from openvino.utils.node_factory import _get_node_factory
diff --git a/src/bindings/python/src/openvino/runtime/utils/__init__.py b/src/bindings/python/src/openvino/runtime/utils/__init__.py
index 8447e93a907277..73399ccbed2598 100644
--- a/src/bindings/python/src/openvino/runtime/utils/__init__.py
+++ b/src/bindings/python/src/openvino/runtime/utils/__init__.py
@@ -4,4 +4,4 @@
"""Generic utilities. Factor related functions out to separate files."""
-from openvino.utils import numpy_to_c, replace_node, replace_output_update_name
+from openvino._pyopenvino.util import numpy_to_c, replace_node, replace_output_update_name
diff --git a/src/bindings/python/src/openvino/utils/broadcasting.py b/src/bindings/python/src/openvino/runtime/utils/broadcasting.py
similarity index 87%
rename from src/bindings/python/src/openvino/utils/broadcasting.py
rename to src/bindings/python/src/openvino/runtime/utils/broadcasting.py
index 01549625e2c628..9fd13da7728e29 100644
--- a/src/bindings/python/src/openvino/utils/broadcasting.py
+++ b/src/bindings/python/src/openvino/runtime/utils/broadcasting.py
@@ -3,11 +3,14 @@
# SPDX-License-Identifier: Apache-2.0
import logging
-from typing import Optional
+from typing import List, Optional
-from openvino import AxisSet
-from openvino.utils.types import (
+from openvino.runtime import AxisSet, Node
+from openvino.runtime.utils.types import (
+ NodeInput,
TensorShape,
+ get_dtype,
+ make_constant_node,
)
log = logging.getLogger(__name__)
diff --git a/src/bindings/python/src/openvino/runtime/utils/broadcasting/__init__.py b/src/bindings/python/src/openvino/runtime/utils/broadcasting/__init__.py
deleted file mode 100644
index 3219f239f0ab44..00000000000000
--- a/src/bindings/python/src/openvino/runtime/utils/broadcasting/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-from openvino.utils.broadcasting import get_broadcast_axes
diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py
index 282547dd9df79a..a46105efaaeadb 100644
--- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py
+++ b/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py
@@ -2,7 +2,7 @@
# Copyright (C) 2018-2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
-from openvino.utils.data_helpers.data_dispatcher import _data_dispatch
-from openvino.utils.data_helpers.wrappers import tensor_from_file
-from openvino.utils.data_helpers.wrappers import _InferRequestWrapper
-from openvino.utils.data_helpers.wrappers import OVDict
+from openvino.runtime.utils.data_helpers.data_dispatcher import _data_dispatch
+from openvino.runtime.utils.data_helpers.wrappers import tensor_from_file
+from openvino.runtime.utils.data_helpers.wrappers import _InferRequestWrapper
+from openvino.runtime.utils.data_helpers.wrappers import OVDict
diff --git a/src/bindings/python/src/openvino/utils/data_helpers/data_dispatcher.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher.py
similarity index 99%
rename from src/bindings/python/src/openvino/utils/data_helpers/data_dispatcher.py
rename to src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher.py
index d4db7cb07b629c..bce10c9c3774ef 100644
--- a/src/bindings/python/src/openvino/utils/data_helpers/data_dispatcher.py
+++ b/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher.py
@@ -8,7 +8,7 @@
import numpy as np
from openvino._pyopenvino import ConstOutput, Tensor, Type, RemoteTensor
-from openvino.utils.data_helpers.wrappers import _InferRequestWrapper, OVDict
+from openvino.runtime.utils.data_helpers.wrappers import _InferRequestWrapper, OVDict
ContainerTypes = Union[dict, list, tuple, OVDict]
ScalarTypes = Union[np.number, int, float]
diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher/__init__.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher/__init__.py
deleted file mode 100644
index e0a2d022660dd3..00000000000000
--- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-
-from openvino.utils.data_helpers.data_dispatcher import ContainerTypes
-from openvino.utils.data_helpers.data_dispatcher import ScalarTypes
-from openvino.utils.data_helpers.data_dispatcher import ValidKeys
-
-from openvino.utils.data_helpers.data_dispatcher import is_list_simple_type
-from openvino.utils.data_helpers.data_dispatcher import get_request_tensor
-from openvino.utils.data_helpers.data_dispatcher import value_to_tensor
-from openvino.utils.data_helpers.data_dispatcher import to_c_style
-from openvino.utils.data_helpers.data_dispatcher import normalize_arrays
-from openvino.utils.data_helpers.data_dispatcher import create_shared
-from openvino.utils.data_helpers.data_dispatcher import set_request_tensor
-from openvino.utils.data_helpers.data_dispatcher import update_tensor
-from openvino.utils.data_helpers.data_dispatcher import update_inputs
-from openvino.utils.data_helpers.data_dispatcher import create_copied
-from openvino.utils.data_helpers.data_dispatcher import _data_dispatch
diff --git a/src/bindings/python/src/openvino/utils/data_helpers/wrappers.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py
similarity index 100%
rename from src/bindings/python/src/openvino/utils/data_helpers/wrappers.py
rename to src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py
diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers/__init__.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers/__init__.py
deleted file mode 100644
index 22214fd24682da..00000000000000
--- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-
-from openvino.utils.data_helpers.wrappers import tensor_from_file
-from openvino.utils.data_helpers.wrappers import _InferRequestWrapper
-from openvino.utils.data_helpers.wrappers import OVDict
diff --git a/src/bindings/python/src/openvino/utils/decorators.py b/src/bindings/python/src/openvino/runtime/utils/decorators.py
similarity index 98%
rename from src/bindings/python/src/openvino/utils/decorators.py
rename to src/bindings/python/src/openvino/runtime/utils/decorators.py
index 9418c359d129e8..98da1ba4389ef7 100644
--- a/src/bindings/python/src/openvino/utils/decorators.py
+++ b/src/bindings/python/src/openvino/runtime/utils/decorators.py
@@ -6,8 +6,8 @@
from inspect import signature
from typing import Any, Callable, Dict, Optional, Union, get_origin, get_args
-from openvino import Node, Output
-from openvino.utils.types import NodeInput, as_node, as_nodes
+from openvino.runtime import Node, Output
+from openvino.runtime.utils.types import NodeInput, as_node, as_nodes
def _get_name(**kwargs: Any) -> Node:
diff --git a/src/bindings/python/src/openvino/runtime/utils/decorators/__init__.py b/src/bindings/python/src/openvino/runtime/utils/decorators/__init__.py
deleted file mode 100644
index bb0bac112d2c5f..00000000000000
--- a/src/bindings/python/src/openvino/runtime/utils/decorators/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-from openvino.utils.decorators import _get_name
-from openvino.utils.decorators import _set_node_friendly_name
-from openvino.utils.decorators import nameable_op
-from openvino.utils.decorators import unary_op
-from openvino.utils.decorators import binary_op
-from openvino.utils.decorators import custom_preprocess_function
-from openvino.utils.decorators import MultiMethod
-from openvino.utils.decorators import registry
-from openvino.utils.decorators import overloading
diff --git a/src/bindings/python/src/openvino/utils/input_validation.py b/src/bindings/python/src/openvino/runtime/utils/input_validation.py
similarity index 98%
rename from src/bindings/python/src/openvino/utils/input_validation.py
rename to src/bindings/python/src/openvino/runtime/utils/input_validation.py
index 1de08452e1da9f..e79a16c48581b1 100644
--- a/src/bindings/python/src/openvino/utils/input_validation.py
+++ b/src/bindings/python/src/openvino/runtime/utils/input_validation.py
@@ -9,7 +9,7 @@
import numpy as np
-from openvino.exceptions import UserInputError
+from openvino.runtime.exceptions import UserInputError
log = logging.getLogger(__name__)
diff --git a/src/bindings/python/src/openvino/runtime/utils/input_validation/__init__.py b/src/bindings/python/src/openvino/runtime/utils/input_validation/__init__.py
deleted file mode 100644
index 0b49e9ea33c40d..00000000000000
--- a/src/bindings/python/src/openvino/runtime/utils/input_validation/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-from openvino.utils.input_validation import assert_list_of_ints
-from openvino.utils.input_validation import _check_value
-from openvino.utils.input_validation import check_valid_attribute
-from openvino.utils.input_validation import check_valid_attributes
-from openvino.utils.input_validation import is_positive_value
-from openvino.utils.input_validation import is_non_negative_value
diff --git a/src/bindings/python/src/openvino/utils/node_factory.py b/src/bindings/python/src/openvino/runtime/utils/node_factory.py
similarity index 92%
rename from src/bindings/python/src/openvino/utils/node_factory.py
rename to src/bindings/python/src/openvino/runtime/utils/node_factory.py
index e999ae6988814a..25daf739223dba 100644
--- a/src/bindings/python/src/openvino/utils/node_factory.py
+++ b/src/bindings/python/src/openvino/runtime/utils/node_factory.py
@@ -2,16 +2,17 @@
# Copyright (C) 2018-2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
+import logging as log
-from functools import singledispatchmethod
+from functools import partial, singledispatchmethod
from typing import Any, Dict, List, Optional, Union
from pathlib import Path
from openvino._pyopenvino import NodeFactory as _NodeFactory
-from openvino import Node, Output, Extension
+from openvino.runtime import Node, Output, Extension
-from openvino.exceptions import UserInputError
+from openvino.runtime.exceptions import UserInputError
DEFAULT_OPSET = "opset13"
@@ -124,11 +125,3 @@ def _arguments_as_outputs(arguments: List[Union[Node, Output]]) -> List[Output]:
else:
outputs.extend(argument.outputs())
return outputs
-
-
-def _get_node_factory(opset_version: Optional[str] = None) -> NodeFactory:
- """Return NodeFactory configured to create operators from specified opset version."""
- if opset_version:
- return NodeFactory(opset_version)
- else:
- return NodeFactory()
diff --git a/src/bindings/python/src/openvino/runtime/utils/node_factory/__init__.py b/src/bindings/python/src/openvino/runtime/utils/node_factory/__init__.py
deleted file mode 100644
index 945ea8deb7863c..00000000000000
--- a/src/bindings/python/src/openvino/runtime/utils/node_factory/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-from openvino.utils.node_factory import NodeFactory
diff --git a/src/bindings/python/src/openvino/utils/reduction.py b/src/bindings/python/src/openvino/runtime/utils/reduction.py
similarity index 95%
rename from src/bindings/python/src/openvino/utils/reduction.py
rename to src/bindings/python/src/openvino/runtime/utils/reduction.py
index e6be6d0ac9a104..71d0af8de7376e 100644
--- a/src/bindings/python/src/openvino/utils/reduction.py
+++ b/src/bindings/python/src/openvino/runtime/utils/reduction.py
@@ -4,7 +4,7 @@
from typing import Iterable, Optional
-from openvino import Node
+from openvino.runtime import Node
def get_reduction_axes(node: Node, reduction_axes: Optional[Iterable[int]]) -> Iterable[int]:
diff --git a/src/bindings/python/src/openvino/runtime/utils/reduction/__init__.py b/src/bindings/python/src/openvino/runtime/utils/reduction/__init__.py
deleted file mode 100644
index a2fbff9e793dca..00000000000000
--- a/src/bindings/python/src/openvino/runtime/utils/reduction/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-from openvino.utils.reduction import get_reduction_axes
diff --git a/src/bindings/python/src/openvino/utils/types.py b/src/bindings/python/src/openvino/runtime/utils/types.py
similarity index 97%
rename from src/bindings/python/src/openvino/utils/types.py
rename to src/bindings/python/src/openvino/runtime/utils/types.py
index b3543739741d94..52f1faf8e1e839 100644
--- a/src/bindings/python/src/openvino/utils/types.py
+++ b/src/bindings/python/src/openvino/runtime/utils/types.py
@@ -9,9 +9,9 @@
import numpy as np
-from openvino.exceptions import OVTypeError
-from openvino import Node, Shape, Output, Type
-from openvino.op import Constant
+from openvino.runtime.exceptions import OVTypeError
+from openvino.runtime import Node, Shape, Output, Type
+from openvino.runtime.op import Constant
log = logging.getLogger(__name__)
diff --git a/src/bindings/python/src/openvino/runtime/utils/types/__init__.py b/src/bindings/python/src/openvino/runtime/utils/types/__init__.py
deleted file mode 100644
index 4f88d609988e8d..00000000000000
--- a/src/bindings/python/src/openvino/runtime/utils/types/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-from openvino.utils.types import TensorShape
-from openvino.utils.types import NumericData
-from openvino.utils.types import NumericType
-from openvino.utils.types import ScalarData
-from openvino.utils.types import NodeInput
-
-from openvino.utils.types import openvino_to_numpy_types_map
-from openvino.utils.types import openvino_to_numpy_types_str_map
-from openvino.utils.types import get_element_type
-from openvino.utils.types import get_element_type_str
-from openvino.utils.types import get_dtype
-from openvino.utils.types import get_numpy_ctype
-from openvino.utils.types import get_ndarray
-from openvino.utils.types import get_shape
-from openvino.utils.types import make_constant_node
-from openvino.utils.types import as_node
-from openvino.utils.types import as_nodes
diff --git a/src/bindings/python/src/openvino/package_utils.py b/src/bindings/python/src/openvino/utils.py
similarity index 97%
rename from src/bindings/python/src/openvino/package_utils.py
rename to src/bindings/python/src/openvino/utils.py
index 6aa3f3ed39b556..9890ae9b3e6460 100644
--- a/src/bindings/python/src/openvino/package_utils.py
+++ b/src/bindings/python/src/openvino/utils.py
@@ -21,9 +21,9 @@ def _add_openvino_libs_to_search_path() -> None:
if os.path.isdir(os.path.join(os.path.dirname(__file__), "libs")):
# looking for the libs in the pip installation path.
openvino_libs.append(os.path.join(os.path.dirname(__file__), "libs"))
- elif os.path.isdir(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir, "Library", "bin")):
+ elif os.path.isdir(os.path.join(os.path.dirname(__file__), "..", "..", "..", "Library", "bin")):
# looking for the libs in the conda installation path
- openvino_libs.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir, "Library", "bin"))
+ openvino_libs.append(os.path.join(os.path.dirname(__file__), "..", "..", "..", "Library", "bin"))
else:
# setupvars.bat script set all libs paths to OPENVINO_LIB_PATHS environment variable.
openvino_libs_installer = os.getenv("OPENVINO_LIB_PATHS")
diff --git a/src/bindings/python/src/openvino/utils/__init__.py b/src/bindings/python/src/openvino/utils/__init__.py
deleted file mode 100644
index 2ccc79d20cce84..00000000000000
--- a/src/bindings/python/src/openvino/utils/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-"""Generic utilities. Factor related functions out to separate files."""
-
-from openvino._pyopenvino.util import numpy_to_c, replace_node, replace_output_update_name
-
-from openvino.package_utils import get_cmake_path
-from openvino.package_utils import deprecated
-from openvino.package_utils import classproperty
-from openvino.package_utils import deprecatedclassproperty
diff --git a/src/bindings/python/src/openvino/utils/data_helpers/__init__.py b/src/bindings/python/src/openvino/utils/data_helpers/__init__.py
deleted file mode 100644
index 282547dd9df79a..00000000000000
--- a/src/bindings/python/src/openvino/utils/data_helpers/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-from openvino.utils.data_helpers.data_dispatcher import _data_dispatch
-from openvino.utils.data_helpers.wrappers import tensor_from_file
-from openvino.utils.data_helpers.wrappers import _InferRequestWrapper
-from openvino.utils.data_helpers.wrappers import OVDict
diff --git a/src/common/transformations/include/transformations/common_optimizations/sdpa_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/sdpa_fusion.hpp
new file mode 100644
index 00000000000000..84383b777604ea
--- /dev/null
+++ b/src/common/transformations/include/transformations/common_optimizations/sdpa_fusion.hpp
@@ -0,0 +1,60 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/matcher_pass.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+
+/// This pass transforms the following sub-graph to a single Scaled Dot Product Attention operation.
+/// Before:
+/// ┌───────┐ ┌───────┐ ┌───────┐
+/// │ Q │ │ K │ │ V │
+/// └───┬───┘ └───┬───┘ └───┬───┘
+/// │ │ │
+/// │ │ │
+/// ┌───┴───┐ ┌─────┴──────┐ │
+/// │ MatMul│<──│ Transpose │ │
+/// └───┬───┘ | (Optional) │ │
+/// │ └────────────┘ │
+/// ┌───┴───┐ ┌─────────────┐ │
+/// │ Add │<───│AttentionMask│ │
+/// └───┬───┘ | (Optional) │ │
+/// │ └─────────────┘ │
+/// ┌───┴───┐ │
+/// │Softmax│ │
+/// └───┬───┘ │
+/// │ │
+/// ┌───┴───┐ │
+/// │ MatMul│<─────────────────────┘
+/// └───┬───┘
+/// ┌───┴───┐
+/// │ Output│
+/// └───────┘
+///
+/// After:
+/// ┌───────┐ ┌───────┐ ┌───────┐ ┌─────────────┐
+/// │ Q │ │ K │ │ V │ │AttentionMask│
+/// └───┬───┘ └───┬───┘ └───┬───┘ └──────┬──────┘
+/// │ │ │ │
+/// │ │ │ │
+/// ┌───┴────────────┴────────────┴───────────────┴─┐
+/// │ ScaledDotProductAttention │
+/// └────────────────────┬──────────────────────────┘
+/// │
+/// │
+/// ┌────┴────┐
+/// │ Output │
+/// └─────────┘
+class TRANSFORMATIONS_API SDPAFusion : public ov::pass::MatcherPass {
+public:
+ OPENVINO_MATCHER_PASS_RTTI("SDPAFusion", "0");
+ SDPAFusion();
+};
+
+} // namespace pass
+} // namespace ov
diff --git a/src/common/transformations/include/transformations/common_optimizations/sdpa_scale_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/sdpa_scale_fusion.hpp
new file mode 100644
index 00000000000000..cae0363e785f4e
--- /dev/null
+++ b/src/common/transformations/include/transformations/common_optimizations/sdpa_scale_fusion.hpp
@@ -0,0 +1,58 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/matcher_pass.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+
+/// Merges explicit multiplication by scalar value for Q and K into scale attribute of SDPA op
+/// Before:
+/// ┌───────┐ ┌───────┐ ┌───────┐ ┌─────────────┐ ┌─────────────┐
+/// │ Q │ │ K │ │ V │ │AttentionMask│ │ Scale |
+/// └───┬───┘ └───┬───┘ └───┬───┘ │ (Optional) │ │ (Optional) │
+/// │ │ │ └──────┬──────┘ └───────┬─────┘
+/// │ │ │ │ |
+/// ┌───┴───┐ ┌───┴───┐ │ │ |
+/// │ Mul | │ Mul │ | │ |
+/// └───┬───┘ └───┬───┘ │ │ │
+/// │ │ │ │ │
+/// | │ │ │ │
+/// ┌───┴────────────┴────────────┴─────────────┴─┐ |
+/// │ ScaledDotProductAttention │──────────────────┘
+/// └────────────────────┬────────────────────────┘
+/// │
+/// │
+/// ┌────┴────┐
+/// │ Output │
+/// └─────────┘
+/// After:
+/// ┌───────┐ ┌───────┐ ┌───────┐ ┌─────────────┐ ┌───────┐
+/// │ Q │ │ K │ │ V │ │AttentionMask│ │ Scale |
+/// └───┬───┘ └───┬───┘ └───┬───┘ └──────┬──────┘ └───┬───┘
+/// │ │ │ │ |
+/// │ │ │ │ |
+/// | │ │ │ |
+/// ┌───┴────────────┴────────────┴─────────────┴─┐ |
+/// │ ScaledDotProductAttention │───────────┘
+/// └────────────────────┬────────────────────────┘
+/// │
+/// │
+/// ┌────┴────┐
+/// │ Output │
+/// └─────────┘
+/// Multiply ops for Q and K are eliminated in the following cases:
+/// 1. Q_scale and K_scale are constant
+/// 2. Q_scale * SDPA_Scale == 1 or K_scale * SDPA_Scale == 1
+class TRANSFORMATIONS_API SDPAScaleFusion : public ov::pass::MatcherPass {
+public:
+ OPENVINO_MATCHER_PASS_RTTI("SDPAScaleFusion", "0");
+ SDPAScaleFusion();
+};
+
+} // namespace pass
+} // namespace ov
diff --git a/src/common/transformations/include/transformations/sdpa_to_paged_attention/position_ids_replacer.hpp b/src/common/transformations/include/transformations/sdpa_to_paged_attention/position_ids_replacer.hpp
index 50c0ecd20e76af..825ce8acbd7998 100644
--- a/src/common/transformations/include/transformations/sdpa_to_paged_attention/position_ids_replacer.hpp
+++ b/src/common/transformations/include/transformations/sdpa_to_paged_attention/position_ids_replacer.hpp
@@ -15,6 +15,7 @@ namespace ov {
namespace pass {
class TRANSFORMATIONS_API PositionIDsReplacer;
+class TRANSFORMATIONS_API PositionIDsReplacerQwen;
} // namespace pass
} // namespace ov
@@ -24,3 +25,22 @@ class ov::pass::PositionIDsReplacer : public ov::pass::MatcherPass {
OPENVINO_MATCHER_PASS_RTTI("PositionIDsReplacer");
explicit PositionIDsReplacer(const Output& position_ids);
};
+
+/**
+ * @brief Qwen model expects data processing in order, the "position ids" input is detached and
+ * is not explicitly used in the model. The model uses implicitly defined "position ids" based
+ * on the past KV cache size.
+ *
+ * To use this model in Continuous batching mode, we need to apply position_ids and
+ * use the corresponding rotary_emb_cos/rotary_emb_sin.
+ * For this, we replace
+ * rotary_emb_cos/rotary_emb_sin -> Slice -> Slice
+ * With
+ * rotary_emb_cos/rotary_emb_sin -> Gather(by position_ids)
+ * Which enables applying RoPE for each token independently of their order in the input tensor.
+ */
+class ov::pass::PositionIDsReplacerQwen : public ov::pass::MatcherPass {
+public:
+ OPENVINO_MATCHER_PASS_RTTI("PositionIDsReplacerQwen");
+ explicit PositionIDsReplacerQwen(const Output& position_ids);
+};
diff --git a/src/common/transformations/include/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.hpp b/src/common/transformations/include/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.hpp
index f5497207eb4e17..d1cc5d5126cd67 100644
--- a/src/common/transformations/include/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.hpp
+++ b/src/common/transformations/include/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.hpp
@@ -4,7 +4,6 @@
#pragma once
-#include "openvino/cc/pass/itt.hpp"
#include "openvino/op/shape_of.hpp"
#include "openvino/op/subtract.hpp"
#include "openvino/pass/matcher_pass.hpp"
@@ -22,6 +21,8 @@ class TRANSFORMATIONS_API PrevSequenceLengthPattern;
class ov::pass::PrevSequenceLengthPattern : public ov::pass::MatcherPass {
public:
- OPENVINO_MATCHER_PASS_RTTI("PrevSequenceLengthPattern");
- explicit PrevSequenceLengthPattern(std::shared_ptr prev_max_seq_len, std::shared_ptr batch_dim);
+ OPENVINO_MATCHER_PASS_RTTI("PrevSequenceLengthPattern", "0");
+ explicit PrevSequenceLengthPattern(const std::shared_ptr& unsqueezed_input_ids,
+ const std::shared_ptr& max_context_len,
+ const std::shared_ptr& position_ids);
};
diff --git a/src/common/transformations/include/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.hpp b/src/common/transformations/include/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.hpp
index b5ecb96fa95198..2456161ea80a78 100644
--- a/src/common/transformations/include/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.hpp
+++ b/src/common/transformations/include/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.hpp
@@ -15,6 +15,7 @@ namespace ov {
namespace pass {
class TRANSFORMATIONS_API TotalSequenceLengthPattern;
+class TRANSFORMATIONS_API TotalSequenceLengthPatternQwen;
} // namespace pass
} // namespace ov
@@ -24,3 +25,22 @@ class ov::pass::TotalSequenceLengthPattern : public ov::pass::MatcherPass {
OPENVINO_MATCHER_PASS_RTTI("TotalSequenceLengthPattern");
explicit TotalSequenceLengthPattern(const std::shared_ptr& max_context_len);
};
+
+/**
+ * @brief Qwen model has a specific pattern for TotalSequenceLen place detection.
+ *
+ * common pattern: Add (PrevSeqLen, CurrentSeqLen)
+ *
+ * The CurrentSeqLen is presented in this form:
+ * CurrentSeqLen: Parameter(name: input_ids) -> ShapeOf -> Gather
+ *
+ * Before applying this transformation, we already detected the PrevSeqLen place in the PrevSequenceLengthPattern
+ * and replaced it with the next subgraph:
+ * PrevSeqLen: Subtract (in: Parameter(name: max_context_len), in: CurrentSeqLen)
+ *
+ **/
+class ov::pass::TotalSequenceLengthPatternQwen : public ov::pass::MatcherPass {
+public:
+ OPENVINO_MATCHER_PASS_RTTI("TotalSequenceLengthPattern", "0");
+ explicit TotalSequenceLengthPatternQwen(const std::shared_ptr& max_context_len);
+};
diff --git a/src/common/transformations/include/transformations/utils/gen_pattern.hpp b/src/common/transformations/include/transformations/utils/gen_pattern.hpp
index 21309e339c959c..976561b4844a17 100644
--- a/src/common/transformations/include/transformations/utils/gen_pattern.hpp
+++ b/src/common/transformations/include/transformations/utils/gen_pattern.hpp
@@ -539,6 +539,11 @@ class AttrSetter : public ov::AttributeVisitor {
a->set(m_attr_map[name].as_vector());
} else if (auto a = ov::as_type>(&adapter)) {
a->set(m_attr_map[name].as_T_vector());
+ } else if (auto a = dynamic_cast>*>(&adapter)) {
+ ov::op::util::VariableInfo var_info;
+ var_info.variable_id = m_attr_map[name].as_string();
+ auto variable = std::make_shared(var_info);
+ a->set(variable);
} else {
OPENVINO_THROW("unsupported AttributeAdapter for attribute : ", name);
}
@@ -896,6 +901,7 @@ struct PatternNode {
// scalar constant (treated as wildcard for single-element-constant with any rank)
PatternNode(int v) : node(std::make_shared(element::from(), Shape({}), v)) {}
PatternNode(float v) : node(std::make_shared(element::from(), Shape({}), v)) {}
+ PatternNode(long long v) : node(std::make_shared(element::from(), Shape({}), v)) {}
PatternNode(std::initializer_list v, values_info vi = nullptr) {
node = ConstVector(std::vector(v), vi);
diff --git a/src/common/transformations/include/transformations/utils/print_model.hpp b/src/common/transformations/include/transformations/utils/print_model.hpp
index 0829cd7e320e88..53fa7de51c5eca 100644
--- a/src/common/transformations/include/transformations/utils/print_model.hpp
+++ b/src/common/transformations/include/transformations/utils/print_model.hpp
@@ -19,6 +19,7 @@
#include "openvino/core/model.hpp"
#include "openvino/core/node.hpp"
#include "openvino/op/constant.hpp"
+#include "openvino/op/util/multi_subgraph_base.hpp"
#include "openvino/pass/pass.hpp"
#include "transformations/utils/utils.hpp"
diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
index 185ae84ec83642..23fbf882024bdc 100644
--- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
@@ -65,6 +65,7 @@
#include "transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.hpp"
#include "transformations/common_optimizations/reshape_sequence_fusion.hpp"
#include "transformations/common_optimizations/ric_fusion.hpp"
+#include "transformations/common_optimizations/sdpa_fusion.hpp"
#include "transformations/common_optimizations/select_with_one_value_condition.hpp"
#include "transformations/common_optimizations/sequence_fusion.hpp"
#include "transformations/common_optimizations/shared_ops_optimization.hpp"
@@ -229,6 +230,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr
ADD_MATCHER(common_fusions, ConvertTensorIteratorToSequence)
ADD_MATCHER(common_fusions, SplitConcatPairToInterpolateFusion, m_use_shapes)
ADD_MATCHER(common_fusions, ConvolutionToGroupConvolutionFusion)
+ ADD_MATCHER(common_fusions, SDPAFusion)
if (m_use_shapes) {
ADD_MATCHER(common_fusions, NearestNeighborUpsamplingFusion)
}
diff --git a/src/common/transformations/src/transformations/common_optimizations/sdpa_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/sdpa_fusion.cpp
new file mode 100644
index 00000000000000..fc581580f70001
--- /dev/null
+++ b/src/common/transformations/src/transformations/common_optimizations/sdpa_fusion.cpp
@@ -0,0 +1,127 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/common_optimizations/sdpa_fusion.hpp"
+
+#include "openvino/core/rt_info.hpp"
+#include "openvino/core/type.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/matmul.hpp"
+#include "openvino/op/scaled_dot_product_attention.hpp"
+#include "openvino/op/softmax.hpp"
+#include "openvino/op/transpose.hpp"
+#include "openvino/op/unsqueeze.hpp"
+#include "openvino/pass/pattern/op/optional.hpp"
+#include "openvino/pass/pattern/op/pattern.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "transformations/utils/gen_pattern.hpp"
+
+namespace ov {
+namespace pass {
+
+SDPAFusion::SDPAFusion() {
+ using namespace ov::pass::pattern;
+ using namespace ov::gen_pattern;
+
+ auto q = makePattern(ov::Rank(4));
+ auto k = makePattern(ov::Rank(4));
+ auto v = makePattern(ov::Rank(4));
+ auto mask = makePattern();
+
+ auto k_transpose_order = pattern::wrap_type([](const Output& node) {
+ auto axis_order =
+ std::dynamic_pointer_cast(node.get_node_shared_ptr())->cast_vector();
+ return axis_order == std::vector{0, 1, 3, 2};
+ });
+
+ auto k_t = pattern::wrap_type({k, k_transpose_order});
+ auto qk_nn = makePattern({q, k_t}, {{"transpose_a", false}, {"transpose_b", false}});
+ auto qk_nt = makePattern({q, k}, {{"transpose_a", false}, {"transpose_b", true}});
+ auto qk = qk_nt | qk_nn;
+ auto optional_add_mask = optional({qk, mask});
+ auto softmax = makePattern({optional_add_mask}, {{"axis", "-1"}});
+ auto qkv = makePattern({softmax, v}, {{"transpose_a", false}, {"transpose_b", false}});
+
+ auto valid_qk_shapes = [](const std::shared_ptr& qk_matmul) {
+ auto q_pshape = qk_matmul->get_input_partial_shape(0);
+ auto k_pshape = qk_matmul->get_input_partial_shape(1);
+
+ const size_t q_head_size_idx = 3;
+ const size_t k_head_size_idx = qk_matmul->get_transpose_b() ? 3 : 2;
+
+ return q_pshape.size() == 4 && k_pshape.size() == 4 && q_pshape[q_head_size_idx].is_static() &&
+ k_pshape[k_head_size_idx].is_static() &&
+ q_pshape[q_head_size_idx].get_length() == k_pshape[k_head_size_idx].get_length();
+ };
+
+ ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
+ const auto& pattern_map = m.get_pattern_value_map();
+ if (transformation_callback(m.get_match_root())) {
+ return false;
+ }
+
+ auto q_node = pattern_map.at(q);
+ auto k_node = pattern_map.at(k);
+ auto v_node = pattern_map.at(v);
+
+ if (!valid_qk_shapes(ov::as_type_ptr(pattern_map.at(qk).get_node_shared_ptr()))) {
+ return false;
+ }
+
+ if (pattern_map.at(qk).get_target_inputs().size() > 1 ||
+ pattern_map.at(softmax).get_target_inputs().size() > 1) {
+ return false;
+ }
+ if (pattern_map.count(optional_add_mask) && (pattern_map.at(optional_add_mask).get_target_inputs().size() > 1 ||
+ pattern_map.at(mask).get_partial_shape().size() > 4)) {
+ return false;
+ }
+
+ Output mask_value;
+ Output mask_input;
+ if (pattern_map.find(optional_add_mask) != pattern_map.end()) {
+ mask_value = pattern_map.at(mask);
+ } else {
+ mask_value = ov::op::v0::Constant::create(q_node.get_element_type(), ov::Shape{}, std::vector{0});
+ }
+
+ if (mask_value.get_partial_shape().size() > 4) {
+ return false;
+ }
+
+ if (mask_value.get_partial_shape().rank() == 0 || mask_value.get_partial_shape().rank() == 4) {
+ mask_input = mask_value;
+ } else {
+ size_t rank_diff = q_node.get_partial_shape().size() - mask_value.get_partial_shape().size();
+ std::vector axes(rank_diff);
+ std::iota(axes.begin(), axes.end(), 0);
+ mask_input = std::make_shared(
+ mask_value,
+ ov::op::v0::Constant::create(ov::element::i64, ov::Shape{rank_diff}, axes));
+ }
+
+ std::shared_ptr scale_node =
+ ov::op::v0::Constant::create(q_node.get_element_type(), ov::Shape{}, std::vector{1.0f});
+
+ std::shared_ptr sdpa = std::make_shared(q_node,
+ k_node,
+ v_node,
+ mask_input,
+ scale_node,
+ false);
+
+ sdpa->set_friendly_name(m.get_match_root()->get_friendly_name());
+ ov::copy_runtime_info(m.get_matched_nodes(), sdpa);
+ ov::replace_node(m.get_match_root(), sdpa);
+
+ return true;
+ };
+
+ auto m = std::make_shared(qkv, "SDPAFusion");
+ this->register_matcher(m, callback);
+}
+
+} // namespace pass
+} // namespace ov
diff --git a/src/common/transformations/src/transformations/common_optimizations/sdpa_scale_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/sdpa_scale_fusion.cpp
new file mode 100644
index 00000000000000..3d750fe38a868e
--- /dev/null
+++ b/src/common/transformations/src/transformations/common_optimizations/sdpa_scale_fusion.cpp
@@ -0,0 +1,140 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/common_optimizations/sdpa_scale_fusion.hpp"
+
+#include
+
+#include "openvino/core/node.hpp"
+#include "openvino/core/rt_info.hpp"
+#include "openvino/core/type.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/scaled_dot_product_attention.hpp"
+#include "openvino/pass/pattern/op/optional.hpp"
+#include "openvino/pass/pattern/op/pattern.hpp"
+#include "transformations/utils/gen_pattern.hpp"
+
+namespace ov {
+namespace pass {
+
+SDPAScaleFusion::SDPAScaleFusion() {
+ using namespace ov::pass::pattern;
+ using namespace ov::gen_pattern;
+
+ auto q = makePattern(ov::Rank(4));
+ auto k = makePattern(ov::Rank(4));
+ auto v = makePattern(ov::Rank(4));
+ auto mask = makePattern();
+ auto sdpa_scale = makeConst({});
+ auto scale_q = makePattern("[]") | makePattern("[1]");
+ auto scale_k = makePattern("[]") | makePattern("[1]");
+
+ auto scaled_q = optional({q, scale_q});
+ auto scaled_k = optional({k, scale_k});
+ auto sdpa_mask_scale =
+ makePattern({scaled_q, scaled_k, v, mask, sdpa_scale},
+ {{"causal", false}});
+ auto sdpa_mask =
+ makePattern({scaled_q, scaled_k, v, mask}, {{"causal", false}});
+ auto sdpa_simple =
+ makePattern({scaled_q, scaled_k, v}, {{"causal", false}});
+ auto sdpa = sdpa_simple | sdpa_mask | sdpa_mask_scale;
+
+ ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
+ const auto& pattern_map = m.get_pattern_value_map();
+ if (transformation_callback(m.get_match_root())) {
+ return false;
+ }
+
+ auto sdpa = m.get_match_root();
+
+ const bool has_q_scale = pattern_map.count(scaled_q);
+ const bool has_k_scale = pattern_map.count(scaled_k);
+
+ // Nothing to do
+ if (!has_q_scale && !has_k_scale)
+ return false;
+
+ auto prev_scale_value = 1.0f;
+ auto scale_q_value = 1.0f;
+ auto scale_k_value = 1.0f;
+ auto scale_et = sdpa->get_output_element_type(0);
+
+ Output q_input = sdpa->get_input_source_output(0);
+ Output k_input = sdpa->get_input_source_output(1);
+
+ std::shared_ptr scale_q_node = nullptr;
+ std::shared_ptr scale_k_node = nullptr;
+
+ if (pattern_map.find(sdpa_scale) != pattern_map.end()) {
+ auto prev_scale_node =
+ ov::as_type_ptr(pattern_map.at(sdpa_scale).get_node_shared_ptr());
+ prev_scale_value = prev_scale_node->cast_vector()[0];
+ scale_et = prev_scale_node->get_output_element_type(0);
+ } else {
+ auto head_size = q_input.get_partial_shape()[3];
+ if (head_size.is_dynamic())
+ return false;
+
+ prev_scale_value = 1.0f / std::sqrt(static_cast(head_size.get_length()));
+ }
+
+ // Extract scalar scale values for Q and K if those are constant and set new inputs for SDPA
+ if (has_q_scale) {
+ scale_q_node = pattern_map.at(scale_q).get_node_shared_ptr();
+ if (ov::is_type(scale_q_node)) {
+ scale_q_value = ov::as_type_ptr(scale_q_node)->cast_vector()[0];
+ q_input = pattern_map.at(q);
+ }
+ }
+ if (has_k_scale) {
+ scale_k_node = pattern_map.at(scale_k).get_node_shared_ptr();
+ if (ov::is_type(scale_k_node)) {
+ scale_k_value = ov::as_type_ptr(scale_k_node)->cast_vector()[0];
+ k_input = pattern_map.at(k);
+ }
+ }
+
+ Output new_scale_node;
+ auto new_scale_val = prev_scale_value * scale_q_value * scale_k_value;
+
+ // If new scale is 1 and we have non-constant scale node for either Q or K, then we can make it a scale of SDPA
+ if (new_scale_val == 1.0f) {
+ if (has_q_scale && !ov::is_type(scale_q_node)) {
+ new_scale_node = pattern_map.at(scale_q);
+ q_input = pattern_map.at(q);
+ } else if (has_k_scale && !ov::is_type(scale_k_node)) {
+ new_scale_node = pattern_map.at(scale_k);
+ k_input = pattern_map.at(k);
+ } else {
+ new_scale_node = ov::op::v0::Constant::create(scale_et, ov::Shape{}, std::vector{new_scale_val});
+ }
+ } else {
+ new_scale_node = ov::op::v0::Constant::create(scale_et, ov::Shape{}, std::vector{new_scale_val});
+ }
+
+ OutputVector new_inputs = {q_input, k_input, pattern_map.at(v)};
+ if (pattern_map.find(mask) != pattern_map.end()) {
+ new_inputs.push_back(pattern_map.at(mask));
+ } else {
+ new_inputs.push_back(
+ ov::op::v0::Constant::create(new_scale_node.get_element_type(), ov::Shape{}, std::vector{0.0f}));
+ }
+
+ new_inputs.push_back(new_scale_node);
+
+ auto new_sdpa = sdpa->clone_with_new_inputs(new_inputs);
+ new_sdpa->set_friendly_name(sdpa->get_friendly_name());
+ ov::copy_runtime_info(sdpa, new_sdpa);
+ ov::replace_node(sdpa, new_sdpa);
+
+ return true;
+ };
+
+ auto m = std::make_shared(sdpa, "SDPAScaleFusion");
+ this->register_matcher(m, callback);
+}
+
+} // namespace pass
+} // namespace ov
diff --git a/src/common/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp b/src/common/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp
index a72a49fb4832eb..397746c75bb84d 100644
--- a/src/common/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp
+++ b/src/common/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp
@@ -7,11 +7,18 @@
#include "openvino/cc/pass/itt.hpp"
#include "openvino/op/gather.hpp"
#include "openvino/op/matmul.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/reshape.hpp"
+#include "openvino/op/shape_of.hpp"
+#include "openvino/op/slice.hpp"
+#include "openvino/op/squeeze.hpp"
+#include "openvino/op/unsqueeze.hpp"
#include "openvino/pass/pattern/op/optional.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "transformations/utils/utils.hpp"
using namespace ov::op;
+using namespace ov::pass::pattern;
// TODO: Instead of using the following transformation that matches quite a specific place in a model graph in case when
// position_ids parameter is missing, consider replacing always existing attention_mask parameter with a sub-graph using
@@ -19,25 +26,90 @@ using namespace ov::op;
ov::pass::PositionIDsReplacer::PositionIDsReplacer(const Output& position_ids) {
MATCHER_SCOPE(PositionIDsReplacer);
- auto input_ids = pattern::any_input();
- auto input_embed = pattern::wrap_type({pattern::any_input(), input_ids, pattern::any_input()});
+ auto input_ids = any_input();
+ auto input_embed = wrap_type({any_input(), input_ids, any_input()});
- auto position_ids_pattern = pattern::any_input();
- auto offset = pattern::wrap_type();
- auto add_offset = pattern::wrap_type({position_ids_pattern, offset});
- auto convert = pattern::wrap_type({add_offset});
- auto position_embed = pattern::wrap_type({pattern::any_input(), convert, pattern::any_input()});
+ auto position_ids_pattern = any_input();
+ auto offset = wrap_type();
+ auto add_offset = wrap_type({position_ids_pattern, offset});
+ auto convert = wrap_type({add_offset});
+ auto position_embed = wrap_type({any_input(), convert, any_input()});
- auto mul = pattern::optional({input_embed, pattern::any_input()});
+ auto mul = optional({input_embed, any_input()});
- auto add = pattern::wrap_type({mul, position_embed});
+ auto add = wrap_type({mul, position_embed});
- ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
+ ov::matcher_pass_callback callback = [=](Matcher& m) {
const auto& pattern_map = m.get_pattern_value_map();
replace_node(pattern_map.at(position_ids_pattern).get_node_shared_ptr(), position_ids.get_node_shared_ptr());
return true;
};
- auto m = std::make_shared(add, matcher_name);
+ auto m = std::make_shared(add, matcher_name);
register_matcher(m, callback);
-}
\ No newline at end of file
+}
+
+ov::pass::PositionIDsReplacerQwen::PositionIDsReplacerQwen(const Output& position_ids) {
+ MATCHER_SCOPE(PositionIDsReplacerQwen);
+
+ auto _const = []() {
+ return wrap_type();
+ };
+
+ // total seq len:
+ auto p_max_context_len = wrap_type();
+ auto p_opt_convert = optional(p_max_context_len);
+ auto p_opt_reshape = optional({p_opt_convert, any_input()});
+
+ // current seq len:
+ // it might be present in 2 different ways:
+ // input_ids -> unsqueeze -> reshape -> convert -> shape_of -> gather
+ // QKV -> variadic_split(Q or K) -> rope Q/K -> shape_of -> gather
+ // Probably we can use the symbols to re-use one of these ways.
+ // Currently, "any_input" is used to detect the both places.
+ auto p_shape_of = wrap_type({any_input()});
+ auto p_current_len = wrap_type({p_shape_of, _const(), _const()});
+
+ auto p_neg_const = wrap_type();
+ auto p_neg_mul = wrap_type({p_current_len, p_neg_const});
+ // the rotary_emb_cos/rotary_emb_sin are sliced by the total length [1,..4096,1,128]
+ auto p_rotary_emb_sincos = wrap_type();
+ auto p_slice_1 = wrap_type({p_rotary_emb_sincos, _const(), p_opt_reshape, _const(), _const()});
+ auto p_slice_2 = wrap_type({p_slice_1, p_neg_mul, _const(), _const(), _const()});
+
+ ov::matcher_pass_callback callback = [=](Matcher& m) {
+ const auto& pattern_map = m.get_pattern_value_map();
+ auto max_context_len = pattern_map.at(p_max_context_len).get_node_shared_ptr();
+ if (max_context_len->get_friendly_name() != "max_context_len") {
+ return false;
+ }
+ auto rotary_emb_sincos = pattern_map.at(p_rotary_emb_sincos).get_node_shared_ptr();
+ auto slice_1 = pattern_map.at(p_slice_1).get_node_shared_ptr();
+ auto slice_2 = pattern_map.at(p_slice_2).get_node_shared_ptr();
+
+ auto axis = v0::Constant::create(element::i64, Shape{}, {1});
+ // in case of PagedAttention (Continuous batching) the rotary_emb_cos/rotary_emb_sin
+ // are used not in the sequential order, so we need to use position_ids to get the expected values.
+ auto gather = std::make_shared(slice_1->input_value(0), position_ids, axis);
+ gather->set_friendly_name(slice_2->get_friendly_name());
+ gather->validate_and_infer_types();
+
+ auto pshape = rotary_emb_sincos->get_output_partial_shape(0);
+ if (pshape.rank().is_dynamic() || pshape.rank().get_length() != 4) {
+ return false;
+ }
+
+ // PagedAttention expects the next layout for Q,K,V:
+ // [batch_size_in_tokens, num_kv_heads * head_size]
+ // so here we need to reshape the output tensor to move the seq dim (num tokens) to the batch
+ // num_kv_heads * head_size are already handled in the StateManagementPattern transformation
+ auto head_size = static_cast(pshape[3].get_length());
+ auto new_shape = v0::Constant::create(element::i64, Shape{4}, std::vector{-1, 1, 1, head_size});
+ auto reshape = std::make_shared(gather, new_shape, false);
+ replace_node(slice_2, reshape);
+ return true;
+ };
+
+ auto m = std::make_shared(p_slice_2, matcher_name);
+ register_matcher(m, callback);
+}
diff --git a/src/common/transformations/src/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.cpp b/src/common/transformations/src/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.cpp
index 36d9d88975b2e0..55d7af822c3857 100644
--- a/src/common/transformations/src/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.cpp
+++ b/src/common/transformations/src/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.cpp
@@ -14,8 +14,9 @@
using namespace ov::op;
-ov::pass::PrevSequenceLengthPattern::PrevSequenceLengthPattern(std::shared_ptr prev_max_seq_len,
- std::shared_ptr batch_dim) {
+ov::pass::PrevSequenceLengthPattern::PrevSequenceLengthPattern(const std::shared_ptr& unsqueezed_input_ids,
+ const std::shared_ptr& max_context_len,
+ const std::shared_ptr& position_ids) {
MATCHER_SCOPE(PrevSequenceLengthPattern);
// The transformation addresses two cases that look similar: (1) previous sequence length, (2) batch size in
// kv-cache state In first case it should replace it by prev_max_seq_len. For the second case, connect to batch_dim.
@@ -40,8 +41,16 @@ ov::pass::PrevSequenceLengthPattern::PrevSequenceLengthPattern(std::shared_ptrget_output_element_type(0);
std::shared_ptr replacement;
if (kv_init_shape[axis].is_static() && kv_init_shape[axis].get_length() == 0) {
+ auto cur_seq_len = std::make_shared(std::make_shared(unsqueezed_input_ids),
+ v0::Constant::create(element::i64, Shape{}, {1}),
+ v0::Constant::create(element::i64, Shape{}, {0}));
+ auto cur_seq_len_i32 = std::make_shared(cur_seq_len, element::i32);
+ auto prev_max_seq_len = std::make_shared(max_context_len, cur_seq_len_i32);
replacement = prev_max_seq_len;
} else {
+ // it is not always required, so will be disposed if not needed
+ auto batch_dim = std::make_shared(position_ids);
+
// assumption that any other axis should point to batch dimension, precise reasoning is too complex
// TODO: provide more reliable check
replacement = batch_dim;
diff --git a/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp b/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp
index b55c3d73316120..a36085c34237a4 100644
--- a/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp
+++ b/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp
@@ -437,6 +437,7 @@ ov::pass::StateManagementPattern::StateManagementPattern(ParameterVector& kv_par
parameters_to_remove.push_back(param);
}
+ pa_transpose->set_friendly_name(sdpa_node->get_friendly_name());
replace_node(m.get_match_root(), pa_transpose);
return true;
};
diff --git a/src/common/transformations/src/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.cpp b/src/common/transformations/src/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.cpp
index 18387d5ca1ae04..cbf9426a0c82c5 100644
--- a/src/common/transformations/src/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.cpp
+++ b/src/common/transformations/src/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.cpp
@@ -6,27 +6,49 @@
#include "openvino/cc/pass/itt.hpp"
#include "openvino/core/validation_util.hpp"
+#include "openvino/op/add.hpp"
#include "openvino/op/concat.hpp"
#include "openvino/op/gather.hpp"
+#include "openvino/op/reshape.hpp"
#include "openvino/op/shape_of.hpp"
+#include "openvino/op/subtract.hpp"
+#include "openvino/op/unsqueeze.hpp"
+#include "openvino/pass/pattern/op/optional.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "transformations/utils/utils.hpp"
using namespace ov::op;
+using namespace ov::pass::pattern;
+
+namespace {
+
+void align_replacement(std::shared_ptr& replacement,
+ const ov::PartialShape& required_shape,
+ ov::element::Type target_type) {
+ if (replacement->get_output_element_type(0) != target_type) {
+ replacement = std::make_shared(replacement, target_type);
+ }
+
+ if (replacement->get_output_partial_shape(0) != required_shape && required_shape.rank().is_static()) {
+ replacement = ov::op::util::reshapeTo(replacement, ov::Shape(required_shape.rank().get_length(), 1));
+ }
+}
+
+} // namespace
ov::pass::TotalSequenceLengthPattern::TotalSequenceLengthPattern(
const std::shared_ptr& max_context_len) {
MATCHER_SCOPE(TotalSequenceLengthPattern);
- auto kv_past = pattern::wrap_type({pattern::any_input()});
- auto kv_gather = pattern::wrap_type({kv_past, pattern::any_input(), pattern::any_input()});
- auto kv_current = pattern::any_input();
- auto kv_concat = pattern::wrap_type({kv_gather, kv_current});
- auto kv_shape = pattern::wrap_type({kv_concat});
- auto gather_idx_label = pattern::wrap_type();
- auto seq = pattern::wrap_type({kv_shape, gather_idx_label, pattern::any_input()});
+ auto kv_past = wrap_type({any_input()});
+ auto kv_gather = wrap_type({kv_past, any_input(), any_input()});
+ auto kv_current = any_input();
+ auto kv_concat = wrap_type({kv_gather, kv_current});
+ auto kv_shape = wrap_type({kv_concat});
+ auto gather_idx_label = wrap_type();
+ auto seq = wrap_type({kv_shape, gather_idx_label, any_input()});
- ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
+ ov::matcher_pass_callback callback = [=](Matcher& m) {
// TODO: Check that seq has axis that really takes sequence len but not any other dimension --
// use symbolic infra or look at the constant input
const auto& pattern_map = m.get_pattern_value_map();
@@ -71,16 +93,8 @@ ov::pass::TotalSequenceLengthPattern::TotalSequenceLengthPattern(
if (concat_axis_to_compare == gather_idx_to_compare) {
auto target_type = gather->get_output_element_type(0);
-
- if (replacement->get_output_element_type(0) != target_type) {
- replacement = std::make_shared(replacement, target_type);
- }
-
auto required_shape = gather->get_output_partial_shape(0);
-
- if (replacement->get_output_partial_shape(0) != required_shape && required_shape.rank().is_static()) {
- replacement = op::util::reshapeTo(replacement, Shape(required_shape.rank().get_length(), 1));
- }
+ align_replacement(replacement, required_shape, target_type);
} else {
// TODO: change in the future when we start supporting dynamic shapes here
replacement = ov::util::get_constant_from_source(gather->output(0));
@@ -94,6 +108,41 @@ ov::pass::TotalSequenceLengthPattern::TotalSequenceLengthPattern(
return true;
};
- auto m = std::make_shared(seq, matcher_name);
+ auto m = std::make_shared(seq, matcher_name);
+ register_matcher(m, callback);
+}
+
+ov::pass::TotalSequenceLengthPatternQwen::TotalSequenceLengthPatternQwen(
+ const std::shared_ptr& max_context_len) {
+ MATCHER_SCOPE(TotalSequenceLengthPatternQwen);
+
+ auto p_input_ids = wrap_type();
+ auto p_unsqueeze = wrap_type({p_input_ids, any_input()});
+ auto p_opt_reshape_1 = optional({p_unsqueeze, any_input()});
+ auto p_opt_convert_1 = optional(p_opt_reshape_1);
+ auto p_kv_shape_current = wrap_type({p_opt_convert_1});
+ auto p_seq_current = wrap_type({p_kv_shape_current, any_input(), any_input()});
+ auto p_opt_convert_2 = optional(p_seq_current);
+
+ auto p_max_context_len = wrap_type();
+ auto p_prev_max_seq_len = wrap_type({p_max_context_len, any_input()});
+ auto p_opt_convert_3 = optional(p_prev_max_seq_len);
+ auto p_opt_reshape_2 = optional({p_opt_convert_3, any_input()});
+ auto p_total_seq = wrap_type({p_opt_convert_2, p_opt_reshape_2});
+
+ ov::matcher_pass_callback callback = [=](Matcher& m) {
+ const auto& pattern_map = m.get_pattern_value_map();
+ auto total_seq = pattern_map.at(p_total_seq).get_node_shared_ptr();
+ std::shared_ptr replacement = max_context_len;
+
+ auto target_type = total_seq->get_output_element_type(0);
+ auto required_shape = total_seq->get_output_partial_shape(0);
+ align_replacement(replacement, required_shape, target_type);
+
+ replace_node(total_seq, replacement);
+ return true;
+ };
+
+ auto m = std::make_shared(p_total_seq, matcher_name);
register_matcher(m, callback);
-}
\ No newline at end of file
+}
diff --git a/src/common/transformations/tests/common_optimizations/sdpa_fusion_test.cpp b/src/common/transformations/tests/common_optimizations/sdpa_fusion_test.cpp
new file mode 100644
index 00000000000000..52c10ba5967bd8
--- /dev/null
+++ b/src/common/transformations/tests/common_optimizations/sdpa_fusion_test.cpp
@@ -0,0 +1,234 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "common_test_utils/ov_test_utils.hpp"
+#include "openvino/op/matmul.hpp"
+#include "openvino/op/softmax.hpp"
+#include "openvino/op/transpose.hpp"
+
+using namespace testing;
+using namespace ov::pass;
+using namespace ov;
+
+TEST_F(TransformationTestsF, SDPAFusionTest1) {
+ const PartialShape query_shape{1, 32, -1, 32};
+ const PartialShape key_shape{1, 32, -1, 32};
+ const PartialShape value_shape{1, 32, -1, 32};
+
+ const auto query = std::make_shared(element::f32, query_shape);
+ const auto key = std::make_shared(element::f32, key_shape);
+ const auto value = std::make_shared(element::f32, value_shape);
+ const auto casual = false;
+ {
+ const auto qk = std::make_shared(query, key, false, true);
+ const auto softmax = std::make_shared(qk, -1);
+ const auto qkv = std::make_shared(softmax, value, false, false);
+
+ model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value});
+ manager.register_pass();
+ }
+
+ {
+ const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{1.0f});
+ const auto mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{0.0f});
+ const auto sdpa = std::make_shared(query,
+ key,
+ value,
+ mask_const,
+ scale_const,
+ casual);
+ model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value});
+ }
+
+ comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+ comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, SDPAFusionTest2) {
+ const PartialShape query_shape{1, 32, -1, 32};
+ const PartialShape key_shape{1, 32, -1, 32};
+ const PartialShape value_shape{1, 32, -1, 32};
+
+ const auto query = std::make_shared(element::f16, query_shape);
+ const auto key = std::make_shared(element::f16, key_shape);
+ const auto value = std::make_shared(element::f16, value_shape);
+ const auto casual = false;
+ {
+ const auto qk = std::make_shared(query, key, false, true);
+ const auto softmax = std::make_shared(qk, -1);
+ const auto qkv = std::make_shared(softmax, value, false, false);
+
+ model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value});
+ manager.register_pass();
+ }
+
+ {
+ const auto scale_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{1.0f});
+ const auto mask_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{0.0f});
+ const auto sdpa = std::make_shared(query,
+ key,
+ value,
+ mask_const,
+ scale_const,
+ casual);
+ model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value});
+ }
+
+ comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+ comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, SDPAFusionTest3) {
+ const PartialShape query_shape{1, 32, -1, 32};
+ const PartialShape key_shape{1, 32, -1, 32};
+ const PartialShape value_shape{1, 32, -1, 32};
+
+ const auto query = std::make_shared(element::f16, query_shape);
+ const auto key = std::make_shared(element::f16, key_shape);
+ const auto value = std::make_shared(element::f16, value_shape);
+ const auto casual = false;
+ {
+ const auto key_t =
+ std::make_shared(key,
+ op::v0::Constant::create(element::i64, Shape{4}, {0, 1, 3, 2}));
+ const auto qk = std::make_shared(query, key_t, false, false);
+ const auto softmax = std::make_shared(qk, -1);
+ const auto qkv = std::make_shared(softmax, value, false, false);
+
+ model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value});
+ manager.register_pass();
+ }
+
+ {
+ const auto scale_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{1.0f});
+ const auto mask_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{0.0f});
+ const auto sdpa = std::make_shared(query,
+ key,
+ value,
+ mask_const,
+ scale_const,
+ casual);
+ model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value});
+ }
+
+ comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+ comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, SDPAFusionTest4) {
+ const PartialShape query_shape{1, 32, -1, 32};
+ const PartialShape key_shape{1, 32, 32, -1};
+ const PartialShape value_shape{1, 32, -1, 32};
+
+ const auto query = std::make_shared(element::f16, query_shape);
+ const auto key = std::make_shared(element::f16, key_shape);
+ const auto value = std::make_shared(element::f16, value_shape);
+ {
+ const auto qk = std::make_shared(query, key, false, false);
+ const auto softmax = std::make_shared(qk, -1);
+ const auto qkv = std::make_shared(softmax, value, false, false);
+
+ model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value});
+ manager.register_pass();
+ }
+
+ model_ref = model->clone();
+
+ comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+ comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, SDPAFusionTest5) {
+ const PartialShape query_shape{1, 32, -1, 32};
+ const PartialShape key_shape{1, 32, -1, 32};
+ const PartialShape value_shape{1, 32, -1, 32};
+ const PartialShape attention_mask_shape{1, 32, -1, -1};
+
+ const auto query = std::make_shared(element::f16, query_shape);
+ const auto key = std::make_shared(element::f16, key_shape);
+ const auto value = std::make_shared(element::f16, value_shape);
+ const auto mask = std::make_shared(element::f16, attention_mask_shape);
+ const auto casual = false;
+ {
+ const auto qk = std::make_shared(query, key, false, true);
+ const auto mask_add = std::make_shared(qk, mask);
+ const auto softmax = std::make_shared(mask_add, -1);
+ const auto qkv = std::make_shared(softmax, value, false, false);
+
+ model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value, mask});
+ manager.register_pass();
+ }
+
+ {
+ const auto scale_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{1.0f});
+ const auto sdpa =
+ std::make_shared(query, key, value, mask, scale_const, casual);
+ model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value, mask});
+ }
+
+ comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+ comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, SDPAFusionTest6) {
+ const PartialShape query_shape{1, 32, 10, 32};
+ const PartialShape key_shape{1, 32, 10, 32};
+ const PartialShape value_shape{1, 32, 10, 32};
+ const PartialShape attention_mask_shape{1, 1, 10, 10};
+
+ const auto query = std::make_shared(element::f16, query_shape);
+ const auto key = std::make_shared(element::f16, key_shape);
+ const auto value = std::make_shared(element::f16, value_shape);
+ const auto mask = std::make_shared(element::f16, attention_mask_shape);
+ const auto casual = false;
+ {
+ const auto qk = std::make_shared(query, key, false, true);
+ const auto mask_add = std::make_shared(qk, mask);
+ const auto softmax = std::make_shared(mask_add, -1);
+ const auto qkv = std::make_shared(softmax, value, false, false);
+
+ model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value, mask});
+ manager.register_pass();
+ }
+
+ {
+ const auto scale_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{1.0f});
+ const auto sdpa =
+ std::make_shared(query, key, value, mask, scale_const, casual);
+ model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value, mask});
+ }
+
+ comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+ comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, SDPAFusionTest7) {
+ const PartialShape query_shape{1, 8, -1, 32};
+ const PartialShape key_shape{-1, 1, 8, 32};
+ const PartialShape value_shape{1, 8, -1, 32};
+
+ const auto query = std::make_shared(element::f16, query_shape);
+ const auto key = std::make_shared(element::f16, key_shape);
+ const auto value = std::make_shared(element::f16, value_shape);
+ {
+ const auto key_t =
+ std::make_shared(key,
+ op::v0::Constant::create(element::i64, Shape{4}, {1, 2, 3, 0}));
+ const auto qk = std::make_shared(query, key_t, false, false);
+ const auto softmax = std::make_shared(qk, -1);
+ const auto qkv = std::make_shared(softmax, value, false, false);
+
+ model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value});
+ manager.register_pass();
+ }
+}
diff --git a/src/common/transformations/tests/common_optimizations/sdpa_scale_fusion_test.cpp b/src/common/transformations/tests/common_optimizations/sdpa_scale_fusion_test.cpp
new file mode 100644
index 00000000000000..f922f030a9c43b
--- /dev/null
+++ b/src/common/transformations/tests/common_optimizations/sdpa_scale_fusion_test.cpp
@@ -0,0 +1,228 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "common_test_utils/ov_test_utils.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/scaled_dot_product_attention.hpp"
+
+using namespace testing;
+using namespace ov::pass;
+using namespace ov;
+
+TEST_F(TransformationTestsF, SDPAScaleFusionTest1) {
+ const PartialShape query_shape{1, 32, -1, 32};
+ const PartialShape key_shape{1, 32, -1, 32};
+ const PartialShape value_shape{1, 32, -1, 32};
+
+ const auto query = std::make_shared(element::f32, query_shape);
+ const auto key = std::make_shared(element::f32, key_shape);
+ const auto value = std::make_shared(element::f32, value_shape);
+ const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{8.0f});
+ const auto v_scaled = std::make_shared(value, scale_const);
+ const auto casual = false;
+ {
+ const auto q_scaled = std::make_shared(query, scale_const);
+ const auto k_scaled = std::make_shared(key, scale_const);
+ const auto sdpa =
+ std::make_shared(q_scaled, k_scaled, v_scaled, casual);
+
+ model = std::make_shared