diff --git a/.github/scripts/workflow_rerun/errors_to_look_for.json b/.github/scripts/workflow_rerun/errors_to_look_for.json
index b9cac8f17adaa6..d8fe6ac2df03d2 100644
--- a/.github/scripts/workflow_rerun/errors_to_look_for.json
+++ b/.github/scripts/workflow_rerun/errors_to_look_for.json
@@ -86,5 +86,25 @@
{
"error_text": "because the GET request got Content-Type",
"ticket": 158400
+ },
+ {
+ "error_text": "Unable to make request:",
+ "ticket": 158401
+ },
+ {
+ "error_text": "Failed to make request",
+ "ticket": 158401
+ },
+ {
+ "error_text": "Failure when receiving data from the peer",
+ "ticket": 159323
+ },
+ {
+ "error_text": "HTTP response code said error",
+ "ticket": 159398
+ },
+ {
+ "error_text": "download failed after attempts",
+ "ticket": 159547
}
]
\ No newline at end of file
diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst
index d877cb1768d44d..f4ec275491fa32 100644
--- a/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst
+++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst
@@ -6,16 +6,14 @@ models from OpenVINO-supported frameworks may also work properly but have not be
**AI Models that run on Intel® Core Ultra™ Processors with OpenVINO™ toolkit:**
-.. raw:: html
-
-
-
-
-.. csv-table::
+.. data-table::
:class: modeldata stripe
:name: supportedModelsTable
:header-rows: 1
:file: ../../_static/download/supported_models.csv
+ :data-column-hidden: []
+ :data-order: [[ 0, "asc" ]]
+ :data-page-length: 10
| Marked cells indicate models that passed inference with no errors. Empty cells indicate
diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst
index d27f7626391f46..1bd8f5dae7c634 100644
--- a/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst
+++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst
@@ -41,27 +41,36 @@ Data as of OpenVINO 2024.4, 18 Oct. 2024.
.. tab-item:: PyTorch
- .. csv-table::
+ .. data-table::
:class: modeldata stripe
- :name: TensorFlow ops
+ :name: TensorFlow_ops_v1
:header-rows: 1
:file: ../../_static/conformance_files/pytorch_ops.csv
+ :data-column-hidden: []
+ :data-order: [[ 0, "asc" ]]
+ :data-page-length: 10
.. tab-item:: TensorFlow
- .. csv-table::
+ .. data-table::
:class: modeldata stripe
- :name: TensorFlow ops
+ :name: TensorFlow_ops_v2
:header-rows: 1
:file: ../../_static/conformance_files/tensorflow_ops.csv
+ :data-column-hidden: []
+ :data-order: [[ 0, "asc" ]]
+ :data-page-length: 10
.. tab-item:: PaddlePaddle
- .. csv-table::
+ .. data-table::
:class: modeldata stripe
- :name: Paddle ops
+ :name: Paddle_ops
:header-rows: 1
:file: ../../_static/conformance_files/paddlepaddle_ops.csv
+ :data-column-hidden: []
+ :data-order: [[ 0, "asc" ]]
+ :data-page-length: 10
.. tab-item:: ONNX
diff --git a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst
index 085a1ff8449151..83581d465df92e 100644
--- a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst
+++ b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst
@@ -8,10 +8,6 @@ The current data is as of OpenVINO 2024.4, 20 Nov. 2024.
The tables below list the key performance indicators for inference on built-in GPUs.
-.. raw:: html
-
-
-
.. tab-set::
@@ -22,7 +18,9 @@ The tables below list the key performance indicators for inference on built-in G
:name: supportedModelsTable_V1
:header-rows: 1
:file: ../../_static/benchmarks_files/llm_models_9-288V.csv
- :hidden: [3,4,6]
+ :data-column-hidden: [3,4,6]
+ :data-order: [[ 0, "asc" ]]
+ :data-page-length: 10
.. tab-item:: 7-268V
@@ -31,7 +29,8 @@ The tables below list the key performance indicators for inference on built-in G
:name: supportedModelsTable_V2
:header-rows: 1
:file: ../../_static/benchmarks_files/llm_models_7-258V.csv
- :hidden: [3,4,6]
+ :data-column-hidden: [3,4,6]
+ :data-order: [[ 0, "asc" ]]
.. tab-item:: 7-155H
@@ -40,7 +39,8 @@ The tables below list the key performance indicators for inference on built-in G
:name: supportedModelsTable_V3
:header-rows: 1
:file: ../../_static/benchmarks_files/llm_models_7-155H.csv
- :hidden: [3,4,6]
+ :data-column-hidden: [3,4,6]
+ :data-order: [[ 0, "asc" ]]
.. grid:: 1 1 2 2
diff --git a/docs/openvino_sphinx_theme/openvino_sphinx_theme/directives/code.py b/docs/openvino_sphinx_theme/openvino_sphinx_theme/directives/code.py
index c3e0e81eec3b3a..814517289ce114 100644
--- a/docs/openvino_sphinx_theme/openvino_sphinx_theme/directives/code.py
+++ b/docs/openvino_sphinx_theme/openvino_sphinx_theme/directives/code.py
@@ -11,7 +11,7 @@
import requests
import re
import json
-
+import html
import csv
logger = logging.getLogger(__name__)
@@ -147,7 +147,9 @@ class DataTable(Directive):
'file': directives.path,
'class': directives.unchanged,
'name': directives.unchanged,
- 'hidden': directives.unchanged
+ 'data-column-hidden': directives.unchanged,
+ 'data-page-length': directives.unchanged,
+ 'data-order': directives.unchanged
}
def run(self) -> List[Node]:
@@ -159,10 +161,12 @@ def run(self) -> List[Node]:
csv_node = []
with open(csv_file, 'r') as j:
csv_data = list(csv.reader(j))
- class_table_tag = ' class="' + "".join(c for c in str(self.options['class']) + '"') if 'class' in self.options is not None else ""
- id_table_tag = ' id="' + "".join(c for c in str(self.options['name']) + '"') if 'name' in self.options is not None else ""
- hidden_table_tag = ' data-columns-hidden="' + "".join(c for c in str(self.options['hidden']) + '"') if 'hidden' in self.options is not None else ""
- csv_table_html = '
'
+ class_table_tag = f' class="{html.escape(self.options["class"])}"' if "class" in self.options else ""
+ id_table_tag = f' id="{html.escape(self.options["name"])}"' if "name" in self.options else ""
+ data_column_hidden_tag = f' data-column-hidden="{html.escape(self.options["data-column-hidden"])}"' if "data-column-hidden" in self.options else ""
+ data_order_tag = f' data-order="{html.escape(self.options["data-order"])}"' if "data-order" in self.options else ""
+ data_page_length_tag = f' data-page-length="{html.escape(self.options["data-page-length"])}"' if "data-page-length" in self.options else ""
+ csv_table_html = f''
head_rows = 0
head_rows += self.options.get('header-rows', 0)
row_count = 0
diff --git a/docs/sphinx_setup/_static/css/custom.css b/docs/sphinx_setup/_static/css/custom.css
index de8a05732a4d06..1679f7309da044 100644
--- a/docs/sphinx_setup/_static/css/custom.css
+++ b/docs/sphinx_setup/_static/css/custom.css
@@ -69,7 +69,7 @@ a#wap_dns {
/* Sphinx-design tabs override */
.sd-tab-set>input:checked+label {
color: var(--sd-color-black) !important;
- background-color: #f8f8f8 !important;
+ background-color: white !important;
border: solid 1px #bdbdbd;
border-bottom: solid 0px;
margin-bottom: -1px;
@@ -96,7 +96,7 @@ a#wap_dns {
cursor: pointer;
font-size: var(--sd-fontsize-tabs-label);
font-weight: 400 !important;
- padding: 5px 16px 2px !important;
+ padding: 5px 16px 0px !important;
transition: color 250ms;
width: auto;
z-index: 1;
@@ -110,7 +110,6 @@ a#wap_dns {
box-shadow: 0 0 0 0;
border: solid 1px var(--sd-color-tabs-overline);
border-color: #bdbdbd;
- background-color: #f8f8f8;
padding-right: 4px;
padding-left: 4px;
padding-bottom: 6px;
diff --git a/docs/sphinx_setup/_static/css/openVinoDataTables.css b/docs/sphinx_setup/_static/css/openVinoDataTables.css
index 526aabb6abe15d..bedc0f5206e260 100644
--- a/docs/sphinx_setup/_static/css/openVinoDataTables.css
+++ b/docs/sphinx_setup/_static/css/openVinoDataTables.css
@@ -6,8 +6,7 @@ div.dt-buttons>.dt-button, div.dt-buttons>div.dt-button-split .dt-button {
}
div.dt-container .dt-paging .dt-paging-button:hover {
- color: white !important;
- border: 1px solid #aaa;
+ border: 1px solid #aaa !important;
background:none !important;
background-color: var(--bttn-act-bg-hover) !important
}
@@ -190,10 +189,9 @@ div.dt-container .dt-paging .dt-paging-button {
div.dt-container .dt-paging .dt-paging-button.current, div.dt-container .dt-paging .dt-paging-button.current:hover {
background: none !important;
- background-color: var(--bttn-act-bg-active) !important;
+ background-color: var(--bttn-sec-border-color) !important;
border-color: var(--bttn-act-bg-active) !important;
border-radius: 0px !important;
- color: white !important;
border: 1px !important
}
table.dataTable thead>tr>th.dt-orderable-asc span.dt-column-order:before, table.dataTable thead>tr>th.dt-orderable-asc span.dt-column-order:after, table.dataTable thead>tr>th.dt-orderable-desc span.dt-column-order:before, table.dataTable thead>tr>th.dt-orderable-desc span.dt-column-order:after, table.dataTable thead>tr>th.dt-ordering-asc span.dt-column-order:before, table.dataTable thead>tr>th.dt-ordering-asc span.dt-column-order:after, table.dataTable thead>tr>th.dt-ordering-desc span.dt-column-order:before, table.dataTable thead>tr>th.dt-ordering-desc span.dt-column-order:after, table.dataTable thead>tr>td.dt-orderable-asc span.dt-column-order:before, table.dataTable thead>tr>td.dt-orderable-asc span.dt-column-order:after, table.dataTable thead>tr>td.dt-orderable-desc span.dt-column-order:before, table.dataTable thead>tr>td.dt-orderable-desc span.dt-column-order:after, table.dataTable thead>tr>td.dt-ordering-asc span.dt-column-order:before, table.dataTable thead>tr>td.dt-ordering-asc span.dt-column-order:after, table.dataTable thead>tr>td.dt-ordering-desc span.dt-column-order:before, table.dataTable thead>tr>td.dt-ordering-desc span.dt-column-order:after {
diff --git a/docs/sphinx_setup/_static/js/openVinoDataTables.js b/docs/sphinx_setup/_static/js/openVinoDataTables.js
index bd56a71533786c..fb3a57d959020c 100644
--- a/docs/sphinx_setup/_static/js/openVinoDataTables.js
+++ b/docs/sphinx_setup/_static/js/openVinoDataTables.js
@@ -1,16 +1,15 @@
$(document).ready(function () {
var columnDefs = [];
-
var tables = $('table.modeldata');
for (let table of tables) {
- var hidden = table.getAttribute('data-columns-hidden');
+ var hidden = table.getAttribute('data-column-hidden');
columnDefs = [{ "visible": false, "targets": JSON.parse(hidden) }]
$(table).DataTable({
responsive: true,
"autoWidth": false,
language: {
buttons: {
- colvisRestore: "Restore default"
+ colvisRestore: "Restore default selection"
}
},
lengthMenu: [
diff --git a/docs/sphinx_setup/_templates/layout.html b/docs/sphinx_setup/_templates/layout.html
index 0d2331b2c83fe3..a791091e1f13a4 100644
--- a/docs/sphinx_setup/_templates/layout.html
+++ b/docs/sphinx_setup/_templates/layout.html
@@ -9,6 +9,7 @@
+
diff --git a/src/bindings/python/src/openvino/__init__.py b/src/bindings/python/src/openvino/__init__.py
index 7643f742e0067d..69c678909b1c9e 100644
--- a/src/bindings/python/src/openvino/__init__.py
+++ b/src/bindings/python/src/openvino/__init__.py
@@ -7,7 +7,7 @@
# Required for Windows OS platforms
# Note: always top-level
try:
- from openvino.package_utils import _add_openvino_libs_to_search_path
+ from openvino.utils import _add_openvino_libs_to_search_path
_add_openvino_libs_to_search_path()
except ImportError:
pass
@@ -17,47 +17,6 @@
# # This __init__.py forces checking of runtime modules to propagate errors.
# # It is not compared with init files from openvino-dev package.
# #
-
-# Openvino pybind bindings
-from openvino._pyopenvino import AxisSet
-from openvino._pyopenvino import AxisVector
-from openvino._pyopenvino import ConstOutput
-from openvino._pyopenvino import Coordinate
-from openvino._pyopenvino import CoordinateDiff
-from openvino._pyopenvino import DiscreteTypeInfo
-from openvino._pyopenvino import Extension
-from openvino._pyopenvino import ProfilingInfo
-from openvino._pyopenvino import RTMap
-from openvino._pyopenvino import Version
-from openvino._pyopenvino import Symbol
-from openvino._pyopenvino import Dimension
-from openvino._pyopenvino import Input
-from openvino._pyopenvino import Output
-from openvino._pyopenvino import Node
-from openvino._pyopenvino import Strides
-from openvino._pyopenvino import PartialShape
-from openvino._pyopenvino import Shape
-from openvino._pyopenvino import Layout
-from openvino._pyopenvino import Type
-from openvino._pyopenvino import Tensor
-from openvino._pyopenvino import OVAny
-from openvino._pyopenvino import get_batch
-from openvino._pyopenvino import set_batch
-from openvino._pyopenvino import serialize
-from openvino._pyopenvino import shutdown
-from openvino._pyopenvino import save_model
-from openvino._pyopenvino import layout_helpers
-from openvino._pyopenvino import RemoteContext
-from openvino._pyopenvino import RemoteTensor
-from openvino._pyopenvino import Op
-
-# Import public classes from _ov_api
-from openvino._ov_api import Model
-from openvino._ov_api import Core
-from openvino._ov_api import CompiledModel
-from openvino._ov_api import InferRequest
-from openvino._ov_api import AsyncInferQueue
-
# Import all public modules
from openvino import runtime as runtime
from openvino import frontend as frontend
@@ -67,10 +26,36 @@
from openvino import utils as utils
from openvino import properties as properties
+# Import most important classes and functions from openvino.runtime
+from openvino._ov_api import Model
+from openvino._ov_api import Core
+from openvino._ov_api import CompiledModel
+from openvino._ov_api import InferRequest
+from openvino._ov_api import AsyncInferQueue
+
+from openvino.runtime import Symbol
+from openvino.runtime import Dimension
+from openvino.runtime import Strides
+from openvino.runtime import PartialShape
+from openvino.runtime import Shape
+from openvino.runtime import Layout
+from openvino.runtime import Type
+from openvino.runtime import Tensor
+from openvino.runtime import OVAny
+
# Helper functions for openvino module
-from openvino.utils.data_helpers import tensor_from_file
+from openvino.runtime.utils.data_helpers import tensor_from_file
from openvino._ov_api import compile_model
+from openvino.runtime import get_batch
+from openvino.runtime import set_batch
+from openvino.runtime import serialize
+from openvino.runtime import shutdown
+from openvino.runtime import save_model
+from openvino.runtime import layout_helpers
+from openvino._pyopenvino import RemoteContext
+from openvino._pyopenvino import RemoteTensor
+from openvino._pyopenvino import Op
# Import opsets
from openvino import opset1
@@ -95,7 +80,7 @@
from openvino._pyopenvino import VASurfaceTensor
# Set version for openvino package
-from openvino._pyopenvino import get_version
+from openvino.runtime import get_version
__version__ = get_version()
# Tools
diff --git a/src/bindings/python/src/openvino/_ov_api.py b/src/bindings/python/src/openvino/_ov_api.py
index da31fab4c95d8e..53d0fa5316498b 100644
--- a/src/bindings/python/src/openvino/_ov_api.py
+++ b/src/bindings/python/src/openvino/_ov_api.py
@@ -5,7 +5,9 @@
from types import TracebackType
from typing import Any, Iterable, Union, Optional, Dict, Type
from pathlib import Path
+import warnings
+import numpy as np
from openvino._pyopenvino import Model as ModelBase
from openvino._pyopenvino import Core as CoreBase
@@ -14,7 +16,7 @@
from openvino._pyopenvino import Tensor
from openvino._pyopenvino import Node
-from openvino.utils.data_helpers import (
+from openvino.runtime.utils.data_helpers import (
OVDict,
_InferRequestWrapper,
_data_dispatch,
diff --git a/src/bindings/python/src/openvino/frontend/frontend.py b/src/bindings/python/src/openvino/frontend/frontend.py
index 6a16d5a573b7d7..4d549d24b4ef7c 100644
--- a/src/bindings/python/src/openvino/frontend/frontend.py
+++ b/src/bindings/python/src/openvino/frontend/frontend.py
@@ -7,7 +7,7 @@
from openvino._pyopenvino import FrontEnd as FrontEndBase
from openvino._pyopenvino import FrontEndManager as FrontEndManagerBase
from openvino._pyopenvino import InputModel
-from openvino import Model
+from openvino.runtime import Model
class FrontEnd(FrontEndBase):
diff --git a/src/bindings/python/src/openvino/frontend/jax/jaxpr_decoder.py b/src/bindings/python/src/openvino/frontend/jax/jaxpr_decoder.py
index 9072598f824939..914f6b2e2ee548 100644
--- a/src/bindings/python/src/openvino/frontend/jax/jaxpr_decoder.py
+++ b/src/bindings/python/src/openvino/frontend/jax/jaxpr_decoder.py
@@ -6,7 +6,7 @@
import jax.core
from openvino.frontend.jax.py_jax_frontend import _FrontEndJaxDecoder as Decoder
-from openvino import PartialShape, Type as OVType, OVAny
+from openvino.runtime import PartialShape, Type as OVType, OVAny
from openvino.frontend.jax.utils import jax_array_to_ov_const, get_ov_type_for_value, \
ivalue_to_constant, param_to_constants
diff --git a/src/bindings/python/src/openvino/frontend/jax/utils.py b/src/bindings/python/src/openvino/frontend/jax/utils.py
index 659677b11d5af8..4535265d6de082 100644
--- a/src/bindings/python/src/openvino/frontend/jax/utils.py
+++ b/src/bindings/python/src/openvino/frontend/jax/utils.py
@@ -8,7 +8,7 @@
import jax.numpy as jnp
import numpy as np
from openvino.frontend.jax.passes import filter_element, filter_ivalue, filter_param
-from openvino import op, Type as OVType, Shape, OVAny
+from openvino.runtime import op, Type as OVType, Shape, OVAny
numpy_to_ov_type_map = {
np.float32: OVType.f32,
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py
index 81a2764ee1188d..c448571f1ac17a 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py
@@ -10,7 +10,7 @@
from openvino.frontend.pytorch.py_pytorch_frontend import _FrontEndPytorchDecoder as Decoder
from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType
-from openvino import PartialShape, Type as OVType, OVAny, Shape
+from openvino.runtime import PartialShape, Type as OVType, OVAny, Shape
from openvino.frontend.pytorch.utils import make_constant, fetch_attr, pt_to_ov_type_map, torch_tensor_to_ov_const
logger = logging.getLogger(__name__)
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py
index a9a65781dcb254..9f2ef019769875 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py
@@ -18,7 +18,7 @@
from torch._decomp import decomposition_table, get_decompositions
from openvino.frontend import FrontEndManager
-from openvino import Core, Type, PartialShape
+from openvino.runtime import Core, Type, PartialShape
from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder
from openvino.frontend.pytorch.torchdynamo import decompositions
from openvino.frontend.pytorch.torchdynamo.decompositions import get_aot_decomposition_list, get_inf_decomposition_list
@@ -27,7 +27,7 @@
from openvino.frontend.pytorch.torchdynamo.compile import cached_model_name, openvino_compile_cached_model
from openvino.frontend.pytorch.torchdynamo.backend_utils import _get_cache_dir, _get_device, _get_model_caching, _get_decompositions, _get_aot_autograd
-from openvino import Core, Type, PartialShape
+from openvino.runtime import Core, Type, PartialShape
logger = logging.getLogger(__name__)
logger.setLevel(logging.WARNING)
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend_utils.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend_utils.py
index c9a772b3feac42..47b3b82806b18b 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend_utils.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend_utils.py
@@ -5,7 +5,7 @@
# mypy: ignore-errors
from typing import Optional, Any
-from openvino import Core
+from openvino.runtime import Core
def _get_device(options) -> Optional[Any]:
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py
index ca8d5478e76c15..fa446893a05d07 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py
@@ -14,7 +14,7 @@
from openvino.frontend import FrontEndManager
from openvino.frontend.pytorch.fx_decoder import TorchFXPythonDecoder
-from openvino import Core, Type, PartialShape, serialize
+from openvino.runtime import Core, Type, PartialShape, serialize
from openvino.frontend.pytorch.torchdynamo.backend_utils import _get_cache_dir, _get_device, _get_config, _is_cache_dir_in_config
from typing import Callable, Optional
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/execute.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/execute.py
index 7527ad7acb37a4..4f41f7b5a6a9de 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/execute.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/execute.py
@@ -20,7 +20,7 @@
from openvino.frontend.pytorch.fx_decoder import TorchFXPythonDecoder
from openvino.frontend.pytorch.torchdynamo.partition import Partitioner
from openvino.frontend.pytorch.torchdynamo.compile import openvino_compile
-from openvino import Core, Type, PartialShape
+from openvino.runtime import Core, Type, PartialShape
from openvino.frontend.pytorch.torchdynamo.backend_utils import _get_cache_dir, _get_device, _get_aot_autograd
from typing import Callable, Optional, Any
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py
index 7bb8073167a654..6d8fdb1658793e 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py
@@ -6,7 +6,7 @@
from openvino.frontend.pytorch.py_pytorch_frontend import _FrontEndPytorchDecoder as Decoder
from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType
-from openvino import op, PartialShape, Type as OVType, OVAny
+from openvino.runtime import op, PartialShape, Type as OVType, OVAny
from openvino.frontend.pytorch.utils import (
ivalue_to_constant,
get_value_from_getattr,
@@ -15,7 +15,7 @@
convert_quantized_tensor,
graph_has_ops,
)
-from openvino import opset11 as ops
+from openvino.runtime import opset11 as ops
from openvino.frontend.pytorch import quantized, patch_model
from openvino.frontend.pytorch.module_extension import ModuleExtension
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/utils.py b/src/bindings/python/src/openvino/frontend/pytorch/utils.py
index 9ba36707037c9e..826d766505fa79 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/utils.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/utils.py
@@ -7,8 +7,8 @@
import torch
import numpy as np
-from openvino import op, Type as OVType, Shape, Tensor
-from openvino import opset11 as ops
+from openvino.runtime import op, Type as OVType, Shape, Tensor
+from openvino.runtime import opset11 as ops
def make_constant(*args, **kwargs):
diff --git a/src/bindings/python/src/openvino/frontend/tensorflow/node_decoder.py b/src/bindings/python/src/openvino/frontend/tensorflow/node_decoder.py
index d15262cbc30366..fcedd7a74c2b51 100644
--- a/src/bindings/python/src/openvino/frontend/tensorflow/node_decoder.py
+++ b/src/bindings/python/src/openvino/frontend/tensorflow/node_decoder.py
@@ -7,7 +7,7 @@
import numpy as np
import tensorflow as tf
from openvino.frontend.tensorflow.py_tensorflow_frontend import _FrontEndDecoderBase as DecoderBase
-from openvino import PartialShape, Type, OVAny, Tensor
+from openvino.runtime import PartialShape, Type, OVAny, Tensor
def tf_type_to_ov_type(tf_type_int):
diff --git a/src/bindings/python/src/openvino/frontend/tensorflow/utils.py b/src/bindings/python/src/openvino/frontend/tensorflow/utils.py
index 7de5dc950be53e..74c0dfff92297e 100644
--- a/src/bindings/python/src/openvino/frontend/tensorflow/utils.py
+++ b/src/bindings/python/src/openvino/frontend/tensorflow/utils.py
@@ -8,7 +8,7 @@
import logging as log
import numpy as np
import sys
-from openvino import PartialShape, Dimension, Type
+from openvino.runtime import PartialShape, Dimension, Type
from packaging.version import parse, Version
from typing import List, Dict, Union
diff --git a/src/bindings/python/src/openvino/helpers/packing.py b/src/bindings/python/src/openvino/helpers/packing.py
index d0956e09fc6261..796af87402f3a6 100644
--- a/src/bindings/python/src/openvino/helpers/packing.py
+++ b/src/bindings/python/src/openvino/helpers/packing.py
@@ -5,7 +5,7 @@
import numpy as np
from typing import Union
-from openvino import Type, Shape
+from openvino.runtime import Type, Shape
def pack_data(array: np.ndarray, type: Type) -> np.ndarray:
diff --git a/src/bindings/python/src/openvino/opset1/ops.py b/src/bindings/python/src/openvino/opset1/ops.py
index e264aea304fb1f..edca6c62a0b246 100644
--- a/src/bindings/python/src/openvino/opset1/ops.py
+++ b/src/bindings/python/src/openvino/opset1/ops.py
@@ -8,17 +8,17 @@
import numpy as np
from functools import partial
-from openvino import Node, PartialShape, Type
+from openvino.runtime import Node, PartialShape, Type
from openvino.op import Constant, Parameter, tensor_iterator
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import binary_op, nameable_op, unary_op
-from openvino.utils.input_validation import (
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op
+from openvino.runtime.utils.input_validation import (
check_valid_attributes,
is_non_negative_value,
is_positive_value,
)
-from openvino.utils.node_factory import NodeFactory
-from openvino.utils.types import (
+from openvino.runtime.utils.node_factory import NodeFactory
+from openvino.runtime.utils.types import (
NodeInput,
NumericData,
NumericType,
diff --git a/src/bindings/python/src/openvino/opset10/ops.py b/src/bindings/python/src/openvino/opset10/ops.py
index d0bc3cbf1cba4a..c7b75777484a59 100644
--- a/src/bindings/python/src/openvino/opset10/ops.py
+++ b/src/bindings/python/src/openvino/opset10/ops.py
@@ -6,10 +6,10 @@
from functools import partial
from typing import List, Optional
-from openvino import Node
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import nameable_op
-from openvino.utils.types import (
+from openvino.runtime import Node
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import nameable_op
+from openvino.runtime.utils.types import (
NodeInput,
as_nodes,
as_node,
diff --git a/src/bindings/python/src/openvino/opset11/ops.py b/src/bindings/python/src/openvino/opset11/ops.py
index 95767b4800db1c..575c99501d2d6c 100644
--- a/src/bindings/python/src/openvino/opset11/ops.py
+++ b/src/bindings/python/src/openvino/opset11/ops.py
@@ -6,10 +6,10 @@
from functools import partial
from typing import List, Optional
-from openvino import Node
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import nameable_op
-from openvino.utils.types import (
+from openvino.runtime import Node
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import nameable_op
+from openvino.runtime.utils.types import (
NodeInput,
as_nodes,
)
diff --git a/src/bindings/python/src/openvino/opset12/ops.py b/src/bindings/python/src/openvino/opset12/ops.py
index 4b354b1fcff973..928bf4f71a9773 100644
--- a/src/bindings/python/src/openvino/opset12/ops.py
+++ b/src/bindings/python/src/openvino/opset12/ops.py
@@ -6,10 +6,10 @@
from functools import partial
from typing import Optional
-from openvino import Node
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import nameable_op
-from openvino.utils.types import (
+from openvino.runtime import Node
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import nameable_op
+from openvino.runtime.utils.types import (
NodeInput,
as_nodes,
as_node,
diff --git a/src/bindings/python/src/openvino/opset13/ops.py b/src/bindings/python/src/openvino/opset13/ops.py
index 5c6863740120f8..12f0d06b1a28e6 100644
--- a/src/bindings/python/src/openvino/opset13/ops.py
+++ b/src/bindings/python/src/openvino/opset13/ops.py
@@ -11,12 +11,12 @@
log = logging.getLogger(__name__)
-from openvino import Node, Shape, Type, Output, Tensor
+from openvino.runtime import Node, Shape, Type, Output, Tensor
from openvino.op import Constant, Result
from openvino.opset1 import convert_like
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import binary_op, nameable_op, unary_op, overloading
-from openvino.utils.types import (
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op, overloading
+from openvino.runtime.utils.types import (
NumericData,
NodeInput,
NumericType,
diff --git a/src/bindings/python/src/openvino/opset14/ops.py b/src/bindings/python/src/openvino/opset14/ops.py
index 59e1bfd3e89c6f..fa872d24eb7f1a 100644
--- a/src/bindings/python/src/openvino/opset14/ops.py
+++ b/src/bindings/python/src/openvino/opset14/ops.py
@@ -7,11 +7,11 @@
from typing import Union, Optional, List
-from openvino import Node, Type
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.types import TensorShape
-from openvino.utils.decorators import nameable_op
-from openvino.utils.types import NodeInput, as_node, as_nodes
+from openvino.runtime import Node, Type
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.types import TensorShape
+from openvino.runtime.utils.decorators import nameable_op
+from openvino.runtime.utils.types import NodeInput, as_node, as_nodes
_get_node_factory_opset14 = partial(_get_node_factory, "opset14")
diff --git a/src/bindings/python/src/openvino/opset15/ops.py b/src/bindings/python/src/openvino/opset15/ops.py
index 97d4419fc4834b..8e6b8bd46d5f7c 100644
--- a/src/bindings/python/src/openvino/opset15/ops.py
+++ b/src/bindings/python/src/openvino/opset15/ops.py
@@ -7,12 +7,12 @@
from typing import List, Literal, Optional
import numpy as np
-from openvino import Node, Type
+from openvino.runtime import Node, Type
from openvino.opset1 import convert_like
from openvino.opset14 import constant
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import binary_op, nameable_op
-from openvino.utils.types import NodeInput, as_nodes
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import binary_op, nameable_op
+from openvino.runtime.utils.types import NodeInput, as_nodes
_get_node_factory_opset15 = partial(_get_node_factory, "opset15")
diff --git a/src/bindings/python/src/openvino/opset16/ops.py b/src/bindings/python/src/openvino/opset16/ops.py
index e5ebdc7a2a11d6..60656f6d993b6a 100644
--- a/src/bindings/python/src/openvino/opset16/ops.py
+++ b/src/bindings/python/src/openvino/opset16/ops.py
@@ -6,10 +6,10 @@
from functools import partial
from typing import Optional
-from openvino import Node
-from openvino.utils.decorators import nameable_op
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.types import NodeInput, as_nodes
+from openvino.runtime import Node
+from openvino.runtime.utils.decorators import nameable_op
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.types import NodeInput, as_nodes
_get_node_factory_opset16 = partial(_get_node_factory, "opset16")
diff --git a/src/bindings/python/src/openvino/opset2/ops.py b/src/bindings/python/src/openvino/opset2/ops.py
index f76f608fe9a5c7..45b33f5bc0288b 100644
--- a/src/bindings/python/src/openvino/opset2/ops.py
+++ b/src/bindings/python/src/openvino/opset2/ops.py
@@ -9,17 +9,18 @@
from functools import partial
import warnings
-from openvino import Node, Shape
+from openvino.runtime import Node, Shape
from openvino.op import Constant, Parameter
-from openvino.utils.decorators import binary_op, nameable_op, unary_op
-from openvino.utils.input_validation import (
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op
+from openvino.runtime.utils.input_validation import (
assert_list_of_ints,
check_valid_attributes,
is_non_negative_value,
is_positive_value,
)
-from openvino.utils.node_factory import NodeFactory, _get_node_factory
-from openvino.utils.types import (
+from openvino.runtime.utils.node_factory import NodeFactory
+from openvino.runtime.utils.types import (
NodeInput,
NumericData,
NumericType,
diff --git a/src/bindings/python/src/openvino/opset3/ops.py b/src/bindings/python/src/openvino/opset3/ops.py
index 1c2c7e309fe919..989f5819acb685 100644
--- a/src/bindings/python/src/openvino/opset3/ops.py
+++ b/src/bindings/python/src/openvino/opset3/ops.py
@@ -8,17 +8,18 @@
import numpy as np
from functools import partial
-from openvino import Node, Shape
+from openvino.runtime import Node, Shape
from openvino.op import Constant, Parameter
-from openvino.utils.decorators import binary_op, nameable_op, unary_op
-from openvino.utils.input_validation import (
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op
+from openvino.runtime.utils.input_validation import (
assert_list_of_ints,
check_valid_attributes,
is_non_negative_value,
is_positive_value,
)
-from openvino.utils.node_factory import NodeFactory, _get_node_factory
-from openvino.utils.types import (
+from openvino.runtime.utils.node_factory import NodeFactory
+from openvino.runtime.utils.types import (
NodeInput,
NumericData,
NumericType,
diff --git a/src/bindings/python/src/openvino/opset4/ops.py b/src/bindings/python/src/openvino/opset4/ops.py
index e6f3a3a1550937..4f6ba016852b02 100644
--- a/src/bindings/python/src/openvino/opset4/ops.py
+++ b/src/bindings/python/src/openvino/opset4/ops.py
@@ -8,17 +8,18 @@
import numpy as np
from functools import partial
-from openvino import Node, Shape
+from openvino.runtime import Node, Shape
from openvino.op import Constant, Parameter
-from openvino.utils.decorators import binary_op, nameable_op, unary_op
-from openvino.utils.input_validation import (
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op
+from openvino.runtime.utils.input_validation import (
assert_list_of_ints,
check_valid_attributes,
is_non_negative_value,
is_positive_value,
)
-from openvino.utils.node_factory import NodeFactory, _get_node_factory
-from openvino.utils.types import (
+from openvino.runtime.utils.node_factory import NodeFactory
+from openvino.runtime.utils.types import (
NodeInput,
NumericData,
NumericType,
diff --git a/src/bindings/python/src/openvino/opset5/ops.py b/src/bindings/python/src/openvino/opset5/ops.py
index 9217830752b1d8..20057b78c7c31d 100644
--- a/src/bindings/python/src/openvino/opset5/ops.py
+++ b/src/bindings/python/src/openvino/opset5/ops.py
@@ -8,17 +8,18 @@
import numpy as np
from functools import partial
-from openvino import Node, Shape
+from openvino.runtime import Node, Shape
from openvino.op import Constant, Parameter, loop
-from openvino.utils.decorators import binary_op, nameable_op, unary_op
-from openvino.utils.input_validation import (
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op
+from openvino.runtime.utils.input_validation import (
assert_list_of_ints,
check_valid_attributes,
is_non_negative_value,
is_positive_value,
)
-from openvino.utils.node_factory import NodeFactory, _get_node_factory
-from openvino.utils.types import (
+from openvino.runtime.utils.node_factory import NodeFactory
+from openvino.runtime.utils.types import (
NodeInput,
NumericData,
NumericType,
diff --git a/src/bindings/python/src/openvino/opset6/ops.py b/src/bindings/python/src/openvino/opset6/ops.py
index 340d0405b4ba23..8020715f20dea3 100644
--- a/src/bindings/python/src/openvino/opset6/ops.py
+++ b/src/bindings/python/src/openvino/opset6/ops.py
@@ -9,13 +9,13 @@
from functools import partial, singledispatch
-from openvino import Node, Type, PartialShape, Output, Shape
+from openvino.runtime import Node, Type, PartialShape, Output, Shape
from openvino.op import assign, Constant, Parameter
from openvino.op import read_value as _read_value
from openvino.op.util import VariableInfo, Variable
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import nameable_op, overloading
-from openvino.utils.types import (
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import nameable_op, overloading
+from openvino.runtime.utils.types import (
NodeInput,
NumericType,
TensorShape,
diff --git a/src/bindings/python/src/openvino/opset7/ops.py b/src/bindings/python/src/openvino/opset7/ops.py
index e33d266debedf1..59e09b64888eb1 100644
--- a/src/bindings/python/src/openvino/opset7/ops.py
+++ b/src/bindings/python/src/openvino/opset7/ops.py
@@ -7,17 +7,18 @@
from typing import Callable, Iterable, List, Optional, Set, Union
import numpy as np
-from openvino import Node, Shape
+from openvino.runtime import Node, Shape
from openvino.op import Constant, Parameter
-from openvino.utils.decorators import binary_op, nameable_op, unary_op
-from openvino.utils.input_validation import (
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op
+from openvino.runtime.utils.input_validation import (
assert_list_of_ints,
check_valid_attributes,
is_non_negative_value,
is_positive_value,
)
-from openvino.utils.node_factory import NodeFactory, _get_node_factory
-from openvino.utils.types import (
+from openvino.runtime.utils.node_factory import NodeFactory
+from openvino.runtime.utils.types import (
NodeInput,
NumericData,
NumericType,
diff --git a/src/bindings/python/src/openvino/opset8/ops.py b/src/bindings/python/src/openvino/opset8/ops.py
index a9a868e7b541d8..6995d55a28a776 100644
--- a/src/bindings/python/src/openvino/opset8/ops.py
+++ b/src/bindings/python/src/openvino/opset8/ops.py
@@ -9,15 +9,15 @@
import numpy as np
from openvino.exceptions import UserInputError
from openvino.op import Constant, Parameter, if_op
-from openvino import Node
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import nameable_op
-from openvino.utils.input_validation import (
+from openvino.runtime import Node
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import nameable_op
+from openvino.runtime.utils.input_validation import (
check_valid_attributes,
is_non_negative_value,
is_positive_value,
)
-from openvino.utils.types import (
+from openvino.runtime.utils.types import (
NodeInput,
TensorShape,
as_node,
diff --git a/src/bindings/python/src/openvino/opset9/ops.py b/src/bindings/python/src/openvino/opset9/ops.py
index e2264845e058dc..a6d45cfd0be2cc 100644
--- a/src/bindings/python/src/openvino/opset9/ops.py
+++ b/src/bindings/python/src/openvino/opset9/ops.py
@@ -7,10 +7,10 @@
from typing import Optional
import numpy as np
-from openvino import Node
-from openvino.utils.node_factory import _get_node_factory
-from openvino.utils.decorators import nameable_op
-from openvino.utils.types import (
+from openvino.runtime import Node
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import nameable_op
+from openvino.runtime.utils.types import (
NodeInput,
as_nodes,
as_node,
diff --git a/src/bindings/python/src/openvino/preprocess/torchvision/preprocess_converter.py b/src/bindings/python/src/openvino/preprocess/torchvision/preprocess_converter.py
index 717e945217468c..c14635cc118208 100644
--- a/src/bindings/python/src/openvino/preprocess/torchvision/preprocess_converter.py
+++ b/src/bindings/python/src/openvino/preprocess/torchvision/preprocess_converter.py
@@ -5,7 +5,7 @@
from typing import Callable, Any, Union
import logging
-import openvino as ov
+import openvino.runtime as ov
class PreprocessConverter():
diff --git a/src/bindings/python/src/openvino/preprocess/torchvision/torchvision_preprocessing.py b/src/bindings/python/src/openvino/preprocess/torchvision/torchvision_preprocessing.py
index 5dad42b47da44a..f8b51afd546f57 100644
--- a/src/bindings/python/src/openvino/preprocess/torchvision/torchvision_preprocessing.py
+++ b/src/bindings/python/src/openvino/preprocess/torchvision/torchvision_preprocessing.py
@@ -20,10 +20,10 @@
import torchvision.transforms as transforms
from torchvision.transforms import InterpolationMode
-import openvino as ov
-import openvino.opset11 as ops
-from openvino import Layout, Type
-from openvino.utils.decorators import custom_preprocess_function
+import openvino.runtime as ov
+import openvino.runtime.opset11 as ops
+from openvino.runtime import Layout, Type
+from openvino.runtime.utils.decorators import custom_preprocess_function
from openvino.preprocess import PrePostProcessor, ResizeAlgorithm, ColorFormat
diff --git a/src/bindings/python/src/openvino/runtime/opset_utils.py b/src/bindings/python/src/openvino/runtime/opset_utils.py
new file mode 100644
index 00000000000000..475750e71f87c5
--- /dev/null
+++ b/src/bindings/python/src/openvino/runtime/opset_utils.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional
+import numpy as np
+
+from openvino.runtime import Node
+from openvino.runtime.utils.decorators import nameable_op
+from openvino.runtime.utils.node_factory import NodeFactory
+from openvino.runtime.utils.types import (
+ as_node,
+ NodeInput,
+)
+
+
+def _get_node_factory(opset_version: Optional[str] = None) -> NodeFactory:
+ """Return NodeFactory configured to create operators from specified opset version."""
+ if opset_version:
+ return NodeFactory(opset_version)
+ else:
+ return NodeFactory()
diff --git a/src/bindings/python/src/openvino/runtime/opset_utils/__init__.py b/src/bindings/python/src/openvino/runtime/opset_utils/__init__.py
deleted file mode 100644
index 6fb3e5f6f0c950..00000000000000
--- a/src/bindings/python/src/openvino/runtime/opset_utils/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-
-from openvino.utils.node_factory import _get_node_factory
diff --git a/src/bindings/python/src/openvino/runtime/utils/__init__.py b/src/bindings/python/src/openvino/runtime/utils/__init__.py
index 8447e93a907277..73399ccbed2598 100644
--- a/src/bindings/python/src/openvino/runtime/utils/__init__.py
+++ b/src/bindings/python/src/openvino/runtime/utils/__init__.py
@@ -4,4 +4,4 @@
"""Generic utilities. Factor related functions out to separate files."""
-from openvino.utils import numpy_to_c, replace_node, replace_output_update_name
+from openvino._pyopenvino.util import numpy_to_c, replace_node, replace_output_update_name
diff --git a/src/bindings/python/src/openvino/utils/broadcasting.py b/src/bindings/python/src/openvino/runtime/utils/broadcasting.py
similarity index 87%
rename from src/bindings/python/src/openvino/utils/broadcasting.py
rename to src/bindings/python/src/openvino/runtime/utils/broadcasting.py
index 01549625e2c628..9fd13da7728e29 100644
--- a/src/bindings/python/src/openvino/utils/broadcasting.py
+++ b/src/bindings/python/src/openvino/runtime/utils/broadcasting.py
@@ -3,11 +3,14 @@
# SPDX-License-Identifier: Apache-2.0
import logging
-from typing import Optional
+from typing import List, Optional
-from openvino import AxisSet
-from openvino.utils.types import (
+from openvino.runtime import AxisSet, Node
+from openvino.runtime.utils.types import (
+ NodeInput,
TensorShape,
+ get_dtype,
+ make_constant_node,
)
log = logging.getLogger(__name__)
diff --git a/src/bindings/python/src/openvino/runtime/utils/broadcasting/__init__.py b/src/bindings/python/src/openvino/runtime/utils/broadcasting/__init__.py
deleted file mode 100644
index 3219f239f0ab44..00000000000000
--- a/src/bindings/python/src/openvino/runtime/utils/broadcasting/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-from openvino.utils.broadcasting import get_broadcast_axes
diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py
index 282547dd9df79a..a46105efaaeadb 100644
--- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py
+++ b/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py
@@ -2,7 +2,7 @@
# Copyright (C) 2018-2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
-from openvino.utils.data_helpers.data_dispatcher import _data_dispatch
-from openvino.utils.data_helpers.wrappers import tensor_from_file
-from openvino.utils.data_helpers.wrappers import _InferRequestWrapper
-from openvino.utils.data_helpers.wrappers import OVDict
+from openvino.runtime.utils.data_helpers.data_dispatcher import _data_dispatch
+from openvino.runtime.utils.data_helpers.wrappers import tensor_from_file
+from openvino.runtime.utils.data_helpers.wrappers import _InferRequestWrapper
+from openvino.runtime.utils.data_helpers.wrappers import OVDict
diff --git a/src/bindings/python/src/openvino/utils/data_helpers/data_dispatcher.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher.py
similarity index 99%
rename from src/bindings/python/src/openvino/utils/data_helpers/data_dispatcher.py
rename to src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher.py
index d4db7cb07b629c..bce10c9c3774ef 100644
--- a/src/bindings/python/src/openvino/utils/data_helpers/data_dispatcher.py
+++ b/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher.py
@@ -8,7 +8,7 @@
import numpy as np
from openvino._pyopenvino import ConstOutput, Tensor, Type, RemoteTensor
-from openvino.utils.data_helpers.wrappers import _InferRequestWrapper, OVDict
+from openvino.runtime.utils.data_helpers.wrappers import _InferRequestWrapper, OVDict
ContainerTypes = Union[dict, list, tuple, OVDict]
ScalarTypes = Union[np.number, int, float]
diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher/__init__.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher/__init__.py
deleted file mode 100644
index e0a2d022660dd3..00000000000000
--- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-
-from openvino.utils.data_helpers.data_dispatcher import ContainerTypes
-from openvino.utils.data_helpers.data_dispatcher import ScalarTypes
-from openvino.utils.data_helpers.data_dispatcher import ValidKeys
-
-from openvino.utils.data_helpers.data_dispatcher import is_list_simple_type
-from openvino.utils.data_helpers.data_dispatcher import get_request_tensor
-from openvino.utils.data_helpers.data_dispatcher import value_to_tensor
-from openvino.utils.data_helpers.data_dispatcher import to_c_style
-from openvino.utils.data_helpers.data_dispatcher import normalize_arrays
-from openvino.utils.data_helpers.data_dispatcher import create_shared
-from openvino.utils.data_helpers.data_dispatcher import set_request_tensor
-from openvino.utils.data_helpers.data_dispatcher import update_tensor
-from openvino.utils.data_helpers.data_dispatcher import update_inputs
-from openvino.utils.data_helpers.data_dispatcher import create_copied
-from openvino.utils.data_helpers.data_dispatcher import _data_dispatch
diff --git a/src/bindings/python/src/openvino/utils/data_helpers/wrappers.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py
similarity index 100%
rename from src/bindings/python/src/openvino/utils/data_helpers/wrappers.py
rename to src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py
diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers/__init__.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers/__init__.py
deleted file mode 100644
index 22214fd24682da..00000000000000
--- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-
-from openvino.utils.data_helpers.wrappers import tensor_from_file
-from openvino.utils.data_helpers.wrappers import _InferRequestWrapper
-from openvino.utils.data_helpers.wrappers import OVDict
diff --git a/src/bindings/python/src/openvino/utils/decorators.py b/src/bindings/python/src/openvino/runtime/utils/decorators.py
similarity index 98%
rename from src/bindings/python/src/openvino/utils/decorators.py
rename to src/bindings/python/src/openvino/runtime/utils/decorators.py
index 9418c359d129e8..98da1ba4389ef7 100644
--- a/src/bindings/python/src/openvino/utils/decorators.py
+++ b/src/bindings/python/src/openvino/runtime/utils/decorators.py
@@ -6,8 +6,8 @@
from inspect import signature
from typing import Any, Callable, Dict, Optional, Union, get_origin, get_args
-from openvino import Node, Output
-from openvino.utils.types import NodeInput, as_node, as_nodes
+from openvino.runtime import Node, Output
+from openvino.runtime.utils.types import NodeInput, as_node, as_nodes
def _get_name(**kwargs: Any) -> Node:
diff --git a/src/bindings/python/src/openvino/runtime/utils/decorators/__init__.py b/src/bindings/python/src/openvino/runtime/utils/decorators/__init__.py
deleted file mode 100644
index bb0bac112d2c5f..00000000000000
--- a/src/bindings/python/src/openvino/runtime/utils/decorators/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-from openvino.utils.decorators import _get_name
-from openvino.utils.decorators import _set_node_friendly_name
-from openvino.utils.decorators import nameable_op
-from openvino.utils.decorators import unary_op
-from openvino.utils.decorators import binary_op
-from openvino.utils.decorators import custom_preprocess_function
-from openvino.utils.decorators import MultiMethod
-from openvino.utils.decorators import registry
-from openvino.utils.decorators import overloading
diff --git a/src/bindings/python/src/openvino/utils/input_validation.py b/src/bindings/python/src/openvino/runtime/utils/input_validation.py
similarity index 98%
rename from src/bindings/python/src/openvino/utils/input_validation.py
rename to src/bindings/python/src/openvino/runtime/utils/input_validation.py
index 1de08452e1da9f..e79a16c48581b1 100644
--- a/src/bindings/python/src/openvino/utils/input_validation.py
+++ b/src/bindings/python/src/openvino/runtime/utils/input_validation.py
@@ -9,7 +9,7 @@
import numpy as np
-from openvino.exceptions import UserInputError
+from openvino.runtime.exceptions import UserInputError
log = logging.getLogger(__name__)
diff --git a/src/bindings/python/src/openvino/runtime/utils/input_validation/__init__.py b/src/bindings/python/src/openvino/runtime/utils/input_validation/__init__.py
deleted file mode 100644
index 0b49e9ea33c40d..00000000000000
--- a/src/bindings/python/src/openvino/runtime/utils/input_validation/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-from openvino.utils.input_validation import assert_list_of_ints
-from openvino.utils.input_validation import _check_value
-from openvino.utils.input_validation import check_valid_attribute
-from openvino.utils.input_validation import check_valid_attributes
-from openvino.utils.input_validation import is_positive_value
-from openvino.utils.input_validation import is_non_negative_value
diff --git a/src/bindings/python/src/openvino/utils/node_factory.py b/src/bindings/python/src/openvino/runtime/utils/node_factory.py
similarity index 92%
rename from src/bindings/python/src/openvino/utils/node_factory.py
rename to src/bindings/python/src/openvino/runtime/utils/node_factory.py
index e999ae6988814a..25daf739223dba 100644
--- a/src/bindings/python/src/openvino/utils/node_factory.py
+++ b/src/bindings/python/src/openvino/runtime/utils/node_factory.py
@@ -2,16 +2,17 @@
# Copyright (C) 2018-2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
+import logging as log
-from functools import singledispatchmethod
+from functools import partial, singledispatchmethod
from typing import Any, Dict, List, Optional, Union
from pathlib import Path
from openvino._pyopenvino import NodeFactory as _NodeFactory
-from openvino import Node, Output, Extension
+from openvino.runtime import Node, Output, Extension
-from openvino.exceptions import UserInputError
+from openvino.runtime.exceptions import UserInputError
DEFAULT_OPSET = "opset13"
@@ -124,11 +125,3 @@ def _arguments_as_outputs(arguments: List[Union[Node, Output]]) -> List[Output]:
else:
outputs.extend(argument.outputs())
return outputs
-
-
-def _get_node_factory(opset_version: Optional[str] = None) -> NodeFactory:
- """Return NodeFactory configured to create operators from specified opset version."""
- if opset_version:
- return NodeFactory(opset_version)
- else:
- return NodeFactory()
diff --git a/src/bindings/python/src/openvino/runtime/utils/node_factory/__init__.py b/src/bindings/python/src/openvino/runtime/utils/node_factory/__init__.py
deleted file mode 100644
index 945ea8deb7863c..00000000000000
--- a/src/bindings/python/src/openvino/runtime/utils/node_factory/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-from openvino.utils.node_factory import NodeFactory
diff --git a/src/bindings/python/src/openvino/utils/reduction.py b/src/bindings/python/src/openvino/runtime/utils/reduction.py
similarity index 95%
rename from src/bindings/python/src/openvino/utils/reduction.py
rename to src/bindings/python/src/openvino/runtime/utils/reduction.py
index e6be6d0ac9a104..71d0af8de7376e 100644
--- a/src/bindings/python/src/openvino/utils/reduction.py
+++ b/src/bindings/python/src/openvino/runtime/utils/reduction.py
@@ -4,7 +4,7 @@
from typing import Iterable, Optional
-from openvino import Node
+from openvino.runtime import Node
def get_reduction_axes(node: Node, reduction_axes: Optional[Iterable[int]]) -> Iterable[int]:
diff --git a/src/bindings/python/src/openvino/runtime/utils/reduction/__init__.py b/src/bindings/python/src/openvino/runtime/utils/reduction/__init__.py
deleted file mode 100644
index a2fbff9e793dca..00000000000000
--- a/src/bindings/python/src/openvino/runtime/utils/reduction/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-from openvino.utils.reduction import get_reduction_axes
diff --git a/src/bindings/python/src/openvino/utils/types.py b/src/bindings/python/src/openvino/runtime/utils/types.py
similarity index 97%
rename from src/bindings/python/src/openvino/utils/types.py
rename to src/bindings/python/src/openvino/runtime/utils/types.py
index b3543739741d94..52f1faf8e1e839 100644
--- a/src/bindings/python/src/openvino/utils/types.py
+++ b/src/bindings/python/src/openvino/runtime/utils/types.py
@@ -9,9 +9,9 @@
import numpy as np
-from openvino.exceptions import OVTypeError
-from openvino import Node, Shape, Output, Type
-from openvino.op import Constant
+from openvino.runtime.exceptions import OVTypeError
+from openvino.runtime import Node, Shape, Output, Type
+from openvino.runtime.op import Constant
log = logging.getLogger(__name__)
diff --git a/src/bindings/python/src/openvino/runtime/utils/types/__init__.py b/src/bindings/python/src/openvino/runtime/utils/types/__init__.py
deleted file mode 100644
index 4f88d609988e8d..00000000000000
--- a/src/bindings/python/src/openvino/runtime/utils/types/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-from openvino.utils.types import TensorShape
-from openvino.utils.types import NumericData
-from openvino.utils.types import NumericType
-from openvino.utils.types import ScalarData
-from openvino.utils.types import NodeInput
-
-from openvino.utils.types import openvino_to_numpy_types_map
-from openvino.utils.types import openvino_to_numpy_types_str_map
-from openvino.utils.types import get_element_type
-from openvino.utils.types import get_element_type_str
-from openvino.utils.types import get_dtype
-from openvino.utils.types import get_numpy_ctype
-from openvino.utils.types import get_ndarray
-from openvino.utils.types import get_shape
-from openvino.utils.types import make_constant_node
-from openvino.utils.types import as_node
-from openvino.utils.types import as_nodes
diff --git a/src/bindings/python/src/openvino/package_utils.py b/src/bindings/python/src/openvino/utils.py
similarity index 97%
rename from src/bindings/python/src/openvino/package_utils.py
rename to src/bindings/python/src/openvino/utils.py
index 6aa3f3ed39b556..9890ae9b3e6460 100644
--- a/src/bindings/python/src/openvino/package_utils.py
+++ b/src/bindings/python/src/openvino/utils.py
@@ -21,9 +21,9 @@ def _add_openvino_libs_to_search_path() -> None:
if os.path.isdir(os.path.join(os.path.dirname(__file__), "libs")):
# looking for the libs in the pip installation path.
openvino_libs.append(os.path.join(os.path.dirname(__file__), "libs"))
- elif os.path.isdir(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir, "Library", "bin")):
+ elif os.path.isdir(os.path.join(os.path.dirname(__file__), "..", "..", "..", "Library", "bin")):
# looking for the libs in the conda installation path
- openvino_libs.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir, "Library", "bin"))
+ openvino_libs.append(os.path.join(os.path.dirname(__file__), "..", "..", "..", "Library", "bin"))
else:
# setupvars.bat script set all libs paths to OPENVINO_LIB_PATHS environment variable.
openvino_libs_installer = os.getenv("OPENVINO_LIB_PATHS")
diff --git a/src/bindings/python/src/openvino/utils/__init__.py b/src/bindings/python/src/openvino/utils/__init__.py
deleted file mode 100644
index 2ccc79d20cce84..00000000000000
--- a/src/bindings/python/src/openvino/utils/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-"""Generic utilities. Factor related functions out to separate files."""
-
-from openvino._pyopenvino.util import numpy_to_c, replace_node, replace_output_update_name
-
-from openvino.package_utils import get_cmake_path
-from openvino.package_utils import deprecated
-from openvino.package_utils import classproperty
-from openvino.package_utils import deprecatedclassproperty
diff --git a/src/bindings/python/src/openvino/utils/data_helpers/__init__.py b/src/bindings/python/src/openvino/utils/data_helpers/__init__.py
deleted file mode 100644
index 282547dd9df79a..00000000000000
--- a/src/bindings/python/src/openvino/utils/data_helpers/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-from openvino.utils.data_helpers.data_dispatcher import _data_dispatch
-from openvino.utils.data_helpers.wrappers import tensor_from_file
-from openvino.utils.data_helpers.wrappers import _InferRequestWrapper
-from openvino.utils.data_helpers.wrappers import OVDict
diff --git a/src/common/transformations/include/transformations/op_conversions/fake_convert_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/fake_convert_decomposition.hpp
new file mode 100644
index 00000000000000..e149152b2bcf6d
--- /dev/null
+++ b/src/common/transformations/include/transformations/op_conversions/fake_convert_decomposition.hpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/matcher_pass.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+
+class TRANSFORMATIONS_API FakeConvertDecomposition;
+
+} // namespace pass
+} // namespace ov
+
+/**
+ * @ingroup ov_transformation_common_api
+ * @brief FakeConvertDecomposition transformation decomposes FakeConvert layer.
+ * f8: f8e4m3, f8e5m2
+ * downconvert: f32->f8, f16->f8, bf16->f8
+ * upconvert: f8->f32, f8->f16, f8->bf16
+ * output = (upconvert(downconvert(input * scale - shift)) + shift) / scale
+ *
+ */
+
+class ov::pass::FakeConvertDecomposition : public ov::pass::MatcherPass {
+public:
+ OPENVINO_MATCHER_PASS_RTTI("FakeConvertDecomposition");
+ FakeConvertDecomposition();
+};
diff --git a/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp
new file mode 100644
index 00000000000000..7f0a44df6a151d
--- /dev/null
+++ b/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp
@@ -0,0 +1,76 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/op_conversions/fake_convert_decomposition.hpp"
+
+#include "itt.hpp"
+#include "openvino/core/rt_info.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/convert.hpp"
+#include "openvino/op/divide.hpp"
+#include "openvino/op/fake_convert.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/subtract.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+
+ov::pass::FakeConvertDecomposition::FakeConvertDecomposition() {
+ MATCHER_SCOPE(FakeConvertDecomposition);
+ auto data = pattern::any_input();
+
+ auto fake_convert = ov::pass::pattern::wrap_type();
+
+ matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) {
+ auto& pattern_to_output = m.get_pattern_value_map();
+ const auto fake_convert_node =
+ ov::as_type_ptr(pattern_to_output.at(fake_convert).get_node_shared_ptr());
+
+ if (fake_convert_node == nullptr || transformation_callback(fake_convert_node)) {
+ return false;
+ }
+
+ Output data{fake_convert_node->input_value(0)};
+ const Output input_scale{fake_convert_node->input_value(1)};
+ auto input_type = data.get_element_type();
+
+ ov::pass::NodeRegistry decomp_ops;
+ if (input_type != input_scale.get_element_type()) {
+ input_type = input_scale.get_element_type();
+ data = std::make_shared(data, input_type);
+ data = decomp_ops.add(data.get_node_shared_ptr());
+ }
+
+ std::shared_ptr result;
+ const auto scale = decomp_ops.make(data, input_scale);
+ if (fake_convert_node->get_input_size() == 2) {
+ const auto downconvert =
+ decomp_ops.make(scale, fake_convert_node->get_destination_element_type());
+ const auto upconvert = decomp_ops.make(downconvert, input_type);
+
+ result = decomp_ops.make(upconvert, input_scale);
+ } else {
+ const Output input_shift{fake_convert_node->input_value(2)};
+ const auto shift = decomp_ops.make(scale, input_shift);
+
+ const auto downconvert =
+ decomp_ops.make(shift, fake_convert_node->get_destination_element_type());
+ const auto upconvert = decomp_ops.make(downconvert, input_type);
+
+ const auto deshift = decomp_ops.make(upconvert, input_shift);
+ result = decomp_ops.make(deshift, input_scale);
+ }
+
+ if (result->get_output_element_type(0) != fake_convert_node->get_output_element_type(0)) {
+ result = decomp_ops.make(result, fake_convert_node->get_output_element_type(0));
+ }
+
+ result->set_friendly_name(m.get_match_root()->get_friendly_name());
+ ov::copy_runtime_info(fake_convert_node, decomp_ops.get());
+ ov::replace_node(m.get_match_root(), result);
+ return true;
+ };
+
+ auto m = std::make_shared(fake_convert, matcher_name);
+ register_matcher(m, callback);
+}
diff --git a/src/common/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp b/src/common/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp
index 1cc9be37606950..397746c75bb84d 100644
--- a/src/common/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp
+++ b/src/common/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp
@@ -61,16 +61,19 @@ ov::pass::PositionIDsReplacerQwen::PositionIDsReplacerQwen(const Output& p
auto p_opt_convert = optional(p_max_context_len);
auto p_opt_reshape = optional({p_opt_convert, any_input()});
- // current seg len
- auto p_input_ids = wrap_type();
- auto p_unsqueeze = wrap_type({p_input_ids, _const()});
- auto p_shape_of = wrap_type({p_unsqueeze});
+ // current seq len:
+ // it might be present in 2 different ways:
+ // input_ids -> unsqueeze -> reshape -> convert -> shape_of -> gather
+ // QKV -> variadic_split(Q or K) -> rope Q/K -> shape_of -> gather
+ // Probably we can use the symbols to re-use one of these ways.
+ // Currently, "any_input" is used to detect the both places.
+ auto p_shape_of = wrap_type({any_input()});
auto p_current_len = wrap_type({p_shape_of, _const(), _const()});
- auto p_rotary_emb_sincos = wrap_type();
auto p_neg_const = wrap_type();
auto p_neg_mul = wrap_type({p_current_len, p_neg_const});
// the rotary_emb_cos/rotary_emb_sin are sliced by the total length [1,..4096,1,128]
+ auto p_rotary_emb_sincos = wrap_type();
auto p_slice_1 = wrap_type({p_rotary_emb_sincos, _const(), p_opt_reshape, _const(), _const()});
auto p_slice_2 = wrap_type({p_slice_1, p_neg_mul, _const(), _const(), _const()});
diff --git a/src/common/transformations/tests/op_conversions/fake_convert_decomposition_test.cpp b/src/common/transformations/tests/op_conversions/fake_convert_decomposition_test.cpp
new file mode 100644
index 00000000000000..33b167ace11e24
--- /dev/null
+++ b/src/common/transformations/tests/op_conversions/fake_convert_decomposition_test.cpp
@@ -0,0 +1,149 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/op_conversions/fake_convert_decomposition.hpp"
+
+#include
+
+#include "common_test_utils/common_utils.hpp"
+#include "common_test_utils/ov_test_utils.hpp"
+#include "openvino/opsets/opset1.hpp"
+#include "openvino/opsets/opset13.hpp"
+
+using namespace ov;
+
+using FakeConvertDecompositionParams = std::tuple; // default shift
+
+class FakeConvertDecompositionTest : public ov::test::TestsCommon,
+ public ::testing::WithParamInterface {
+public:
+ static std::string getTestCaseName(::testing::TestParamInfo obj) {
+ FakeConvertDecompositionParams params = obj.param;
+
+ Shape data_shape, scale_shape, shift_shape;
+ element::Type_t data_prec, dst_prec;
+ bool default_shift;
+ std::tie(data_shape, scale_shape, shift_shape, data_prec, dst_prec, default_shift) = params;
+
+ std::ostringstream result;
+ result << "dataShape=" << ov::test::utils::vec2str(data_shape) << "_";
+ result << "scaleShape=" << ov::test::utils::vec2str(scale_shape) << "_";
+ result << "shiftShape=" << ov::test::utils::vec2str(shift_shape) << "_";
+ result << "dataPrecision=" << element::Type(data_prec) << "_";
+ result << "destinationPrecision=" << element::Type(dst_prec) << "_";
+ if (default_shift)
+ result << "defaultShift=true";
+ else
+ result << "defaultShift=false";
+ return result.str();
+ }
+};
+
+TEST_P(FakeConvertDecompositionTest, CompareFunctions) {
+ FakeConvertDecompositionParams params = this->GetParam();
+
+ Shape data_shape, scale_shape, shift_shape;
+ element::Type_t data_prec, dst_prec;
+ bool default_shift;
+ std::tie(data_shape, scale_shape, shift_shape, data_prec, dst_prec, default_shift) = params;
+
+ std::shared_ptr model(nullptr);
+ {
+ const auto data = std::make_shared(data_prec, PartialShape(data_shape));
+ const auto scale = std::make_shared(data_prec, scale_shape);
+ const auto shift = std::make_shared(data_prec, shift_shape);
+
+ const auto fake_convert = default_shift ? std::make_shared(data, scale, dst_prec)
+ : std::make_shared(data, scale, shift, dst_prec);
+ model = std::make_shared(NodeVector{fake_convert}, ParameterVector{data});
+
+ pass::Manager manager;
+ manager.register_pass();
+ manager.register_pass();
+ manager.run_passes(model);
+
+ OV_ASSERT_NO_THROW(check_rt_info(model));
+ }
+
+ std::shared_ptr model_ref(nullptr);
+ {
+ const auto input_data = std::make_shared(data_prec, PartialShape(data_shape));
+ const auto input_scale = std::make_shared(data_prec, scale_shape);
+ const auto input_shift = std::make_shared(data_prec, shift_shape);
+ ParameterVector params;
+ params.push_back(input_data);
+ std::shared_ptr data = input_data;
+
+ std::shared_ptr result;
+ const auto scale = std::make_shared(data, input_scale);
+ if (default_shift) {
+ const auto downconvert = std::make_shared(scale, dst_prec);
+ const auto upconvert = std::make_shared(downconvert, data_prec);
+
+ result = std::make_shared(upconvert, input_scale);
+ } else {
+ const auto shift = std::make_shared(scale, input_shift);
+
+ const auto downconvert = std::make_shared(shift, dst_prec);
+ const auto upconvert = std::make_shared(downconvert, data_prec);
+
+ const auto deshift = std::make_shared(upconvert, input_shift);
+ result = std::make_shared(deshift, input_scale);
+ }
+
+ model_ref = std::make_shared(NodeVector{result}, params);
+ }
+
+ const auto res = compare_functions(model, model_ref);
+ ASSERT_TRUE(res.first) << res.second;
+}
+
+const std::vector data_precisions = {element::Type_t::f32,
+ element::Type_t::f16,
+ element::Type_t::bf16};
+
+const std::vector destination_precisions = {element::Type_t::f8e4m3, element::Type_t::f8e5m2};
+
+const std::vector default_shift = {true, false};
+
+const auto simple_fake_convert_params = ::testing::Combine(::testing::Values(Shape{2, 3, 4, 5}),
+ ::testing::Values(Shape{1}),
+ ::testing::Values(Shape{1}),
+ ::testing::ValuesIn(data_precisions),
+ ::testing::ValuesIn(destination_precisions),
+ ::testing::ValuesIn(default_shift));
+
+const auto broadcast_fake_convert_params = ::testing::Combine(::testing::Values(Shape{2, 3, 4, 5}),
+ ::testing::Values(Shape{2, 3, 1, 1}),
+ ::testing::Values(Shape{2, 3, 1, 1}),
+ ::testing::ValuesIn(data_precisions),
+ ::testing::ValuesIn(destination_precisions),
+ ::testing::ValuesIn(default_shift));
+
+const auto elementwise_fake_convert_params = ::testing::Combine(::testing::Values(Shape{2, 3, 4, 5}),
+ ::testing::Values(Shape{2, 3, 4, 5}),
+ ::testing::Values(Shape{2, 3, 4, 5}),
+ ::testing::ValuesIn(data_precisions),
+ ::testing::ValuesIn(destination_precisions),
+ ::testing::ValuesIn(default_shift));
+
+INSTANTIATE_TEST_SUITE_P(SimpleFakeConvert_Decomposition,
+ FakeConvertDecompositionTest,
+ simple_fake_convert_params,
+ FakeConvertDecompositionTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(BroadcastFakeConvert_Decomposition,
+ FakeConvertDecompositionTest,
+ broadcast_fake_convert_params,
+ FakeConvertDecompositionTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(ElementwiseFakeConvert_Decomposition,
+ FakeConvertDecompositionTest,
+ elementwise_fake_convert_params,
+ FakeConvertDecompositionTest::getTestCaseName);
diff --git a/src/frontends/onnx/tests/__init__.py b/src/frontends/onnx/tests/__init__.py
index ef8cebfa361e3f..fdf1295dfd1dbe 100644
--- a/src/frontends/onnx/tests/__init__.py
+++ b/src/frontends/onnx/tests/__init__.py
@@ -147,7 +147,7 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True):
skip_dynamic_model = pytest.mark.skip(reason="CPU plug-in can't load a model with dynamic output shapes via legacy API")
# ONNX 1.14
-xfail_issue_119896 = xfail_test(reason="Unsupported element type: FLOAT8")
+xfail_issue_119896 = xfail_test(reason="Unsupported element type: FLOAT8", strict=False)
xfail_issue_119900 = xfail_test(reason="While validating ONNX node '': "
"half_pixel_symmetric - this type of coordinate transformation mode "
"is not supported. Choose one of the following modes: "
diff --git a/src/inference/src/os/lin/lin_system_conf.cpp b/src/inference/src/os/lin/lin_system_conf.cpp
index 64da4cb0ac836a..29c8bfddbd1ca4 100644
--- a/src/inference/src/os/lin/lin_system_conf.cpp
+++ b/src/inference/src/os/lin/lin_system_conf.cpp
@@ -219,14 +219,16 @@ CPU::CPU() {
} else if (valid_cpu_mapping_table.size() == (unsigned)_processors) {
return 0;
} else {
- std::lock_guard lock{_cpu_mutex};
_processors = valid_cpu_mapping_table.size();
_cpu_mapping_table.swap(valid_cpu_mapping_table);
- update_valid_processor_linux(std::move(phy_core_list),
- _numa_nodes,
- _cores,
- _proc_type_table,
- _cpu_mapping_table);
+ {
+ std::lock_guard lock{_cpu_mutex};
+ update_valid_processor_linux(std::move(phy_core_list),
+ _numa_nodes,
+ _cores,
+ _proc_type_table,
+ _cpu_mapping_table);
+ }
return 0;
}
};
@@ -235,7 +237,7 @@ CPU::CPU() {
if (!get_info_linux(cache_info_mode)) {
parse_cache_info_linux(system_info_table,
- node_info_table,
+ std::move(node_info_table),
_processors,
_numa_nodes,
_sockets,
@@ -249,7 +251,7 @@ CPU::CPU() {
(_proc_type_table[0][ALL_PROC] != _proc_type_table[0][EFFICIENT_CORE_PROC]))) {
if (!get_info_linux(freq_info_mode)) {
parse_freq_info_linux(system_info_table,
- node_info_table,
+ std::move(node_info_table),
_processors,
_numa_nodes,
_sockets,
diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
index 457f8368f734dd..1c5598b6d55e26 100644
--- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
+++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
@@ -36,6 +36,8 @@ uint8_t DnnlExtensionUtils::sizeOfDataType(dnnl::memory::data_type dataType) {
case dnnl::memory::data_type::s4:
case dnnl::memory::data_type::u4:
case dnnl::memory::data_type::f8_e8m0:
+ case dnnl::memory::data_type::f8_e4m3:
+ case dnnl::memory::data_type::f8_e5m2:
case dnnl::memory::data_type::f4_e2m1:
return 1;
case dnnl::memory::data_type::undef:
@@ -70,6 +72,10 @@ dnnl::memory::data_type DnnlExtensionUtils::ElementTypeToDataType(const ov::elem
return memory::data_type::u4;
case ov::element::f8e8m0:
return memory::data_type::f8_e8m0;
+ case ov::element::f8e4m3:
+ return memory::data_type::f8_e4m3;
+ case ov::element::f8e5m2:
+ return memory::data_type::f8_e5m2;
case ov::element::f4e2m1:
return memory::data_type::f4_e2m1;
case ov::element::undefined:
@@ -106,6 +112,10 @@ ov::element::Type DnnlExtensionUtils::DataTypeToElementType(const dnnl::memory::
return ov::element::u4;
case memory::data_type::f8_e8m0:
return ov::element::f8e8m0;
+ case memory::data_type::f8_e4m3:
+ return ov::element::f8e4m3;
+ case memory::data_type::f8_e5m2:
+ return ov::element::f8e5m2;
case memory::data_type::f4_e2m1:
return ov::element::f4e2m1;
case memory::data_type::undef:
diff --git a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp
index 0c8cddd905dc2e..f6aabe376d6eec 100644
--- a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp
+++ b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp
@@ -9,6 +9,7 @@
#include "utils/bfloat16.hpp"
#if defined(OPENVINO_ARCH_X86_64)
+# include "cpu/x64/jit_avx512_core_fp8cvt.hpp"
# include "nodes/kernels/x64/jit_kernel.hpp"
#else
# include "cpu_memory.h"
@@ -27,6 +28,18 @@ using namespace dnnl::impl::utils;
using namespace dnnl::impl::cpu::x64;
using namespace Xbyak;
+enum f8_type { none, f8e4m3, f8e5m2 };
+
+template
+f8_type get_f8_type() {
+ if (std::is_same::value || std::is_same::value) {
+ return f8_type::f8e4m3;
+ } else if (std::is_same::value || std::is_same::value) {
+ return f8_type::f8e5m2;
+ }
+ return f8_type::none;
+}
+
template
void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst);
@@ -50,12 +63,14 @@ void convert_vec(jit_generator& gen, const RegExp& src, cons
gen.movdqu(gen.xword[dst], f16vec);
}
+template
class jit_convert_array : public jit_kernel {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_convert_array)
void generate() override {
- constexpr size_t vlen = 8u;
- constexpr size_t vlen_log2 = 3;
+ bool is_fp8 = f8_e4m3_emu_ || f8_e5m2_emu_;
+ size_t vlen = is_fp8 ? 16u : 8u;
+ size_t vlen_log2 = is_fp8 ? 4 : 3;
preamble();
@@ -84,17 +99,24 @@ class jit_convert_array : public jit_kernel {
auto tail_size = var();
tail_size = size;
- tail_size <<= static_cast(std::logb(_src_size)) - 1;
- copy(tmp.pointer(), src, tail_size);
+ tail_size <<= static_cast(std::logb(_src_size));
+ copy(tmp.pointer(), src, tail_size);
_convert_vec(*this, tmp.pointer(), tmp.pointer());
tail_size = size;
- tail_size <<= static_cast(std::logb(_dst_size)) - 1;
- copy(dst, tmp.pointer(), tail_size);
+ tail_size <<= static_cast(std::logb(_dst_size));
+ copy(dst, tmp.pointer(), tail_size);
});
postamble();
+
+ if (f8_e4m3_emu_)
+ f8_e4m3_emu_->prepare_table();
+ if (f8_e5m2_emu_)
+ f8_e5m2_emu_->prepare_table();
+ if (uni_vcvtneps2bf16_)
+ uni_vcvtneps2bf16_->emit_data();
}
public:
@@ -108,16 +130,37 @@ class jit_convert_array : public jit_kernel {
typedef void (*convert_vec_t)(jit_generator&, const RegExp&, const RegExp&);
- jit_convert_array(convert_vec_t convert_vec, size_t src_size, size_t dst_size)
+ jit_convert_array(convert_vec_t convert_vec)
: jit_kernel(jit_name()),
_convert_vec(convert_vec),
- _src_size(src_size),
- _dst_size(dst_size) {}
+ _src_size(sizeof(src_t)),
+ _dst_size(sizeof(dst_t)) {
+ const auto type = get_f8_type();
+ if (type == f8_type::f8e4m3) {
+ f8_e4m3_emu_ = std::make_shared(this,
+ fp8_emu_reserv_1_,
+ fp8_emu_reserv_2_,
+ fp8_emu_reserv_3_,
+ fp8_emu_reserv_4_,
+ fp8_emu_reserv_5_,
+ fp8_emu_scratch_);
+ } else if (type == f8_type::f8e5m2) {
+ f8_e5m2_emu_ = std::make_shared(this,
+ fp8_emu_reserv_1_,
+ fp8_emu_reserv_2_,
+ fp8_emu_reserv_3_,
+ fp8_emu_kmask_aux_,
+ fp8_emu_scratch_);
+ }
+ const bool is_dst_bf16 = std::is_same::value;
+ if (is_dst_bf16 && mayiuse(cpu_isa_t::avx512_core)) {
+ uni_vcvtneps2bf16_ = std::make_shared(this, cpu_isa_t::avx512_core);
+ }
+ }
- template
static fn_t get() {
if (mayiuse(cpu_isa_t::avx2) && dnnl::impl::cpu::x64::cpu().has(Xbyak::util::Cpu::tF16C)) {
- static jit_convert_array converter(convert_vec, sizeof(src_t), sizeof(dst_t));
+ static jit_convert_array converter(convert_vec);
auto& generator = static_cast(converter);
generator.create_kernel();
return (fn_t)generator.jit_ker();
@@ -125,16 +168,192 @@ class jit_convert_array : public jit_kernel {
return nullptr;
}
+ std::shared_ptr get_f8_e4m3_emu() const {
+ return f8_e4m3_emu_;
+ }
+
+ std::shared_ptr get_f8_e5m2_emu() const {
+ return f8_e5m2_emu_;
+ }
+
+ std::shared_ptr get_uni_vcvtneps2bf16() const {
+ return uni_vcvtneps2bf16_;
+ }
+
private:
convert_vec_t _convert_vec;
size_t _src_size;
size_t _dst_size;
+
+ std::shared_ptr f8_e4m3_emu_;
+ std::shared_ptr f8_e5m2_emu_;
+ std::shared_ptr uni_vcvtneps2bf16_;
+
+ const Reg64 fp8_emu_scratch_ = rax;
+ const Zmm fp8_emu_reserv_1_ = Zmm(9);
+ const Zmm fp8_emu_reserv_2_ = Zmm(10);
+ const Zmm fp8_emu_reserv_3_ = Zmm(11);
+ const Zmm fp8_emu_reserv_4_ = Zmm(12);
+ const Zmm fp8_emu_reserv_5_ = Zmm(13);
+ const Opmask fp8_emu_kmask_aux_ = Opmask(1);
};
+template <>
+void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) {
+ auto const& f8vec = gen.xmm3;
+ auto const& f32vec = gen.zmm4;
+
+ auto& cvt = dynamic_cast&>(gen);
+
+ gen.vmovups(f32vec, gen.zword[src]);
+ cvt.get_f8_e4m3_emu()->vcvt_f32_to_f8(f8vec, f32vec);
+ gen.vmovdqu(gen.xword[dst], f8vec);
+}
+
+template <>
+void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) {
+ auto const& f8vec = gen.xmm3;
+ auto const& f32vec = gen.zmm4;
+
+ auto& cvt = dynamic_cast&>(gen);
+
+ gen.vmovdqu(f8vec, gen.xword[src]);
+ cvt.get_f8_e4m3_emu()->vcvt_f8_to_f32(f32vec, f8vec);
+ gen.vmovups(gen.zword[dst], f32vec);
+}
+
+template <>
+void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) {
+ auto const& f8vec = gen.xmm3;
+ auto const& f16vec = gen.ymm4;
+
+ auto& cvt = dynamic_cast&>(gen);
+
+ gen.vmovdqu(f16vec, gen.yword[src]);
+ cvt.get_f8_e4m3_emu()->vcvt_f16_to_f8(f8vec, f16vec);
+ gen.vmovdqu(gen.xword[dst], f8vec);
+}
+
+template <>
+void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) {
+ auto const& f8vec = gen.xmm3;
+ auto const& f16vec = gen.ymm4;
+
+ auto& cvt = dynamic_cast&>(gen);
+
+ gen.vmovdqu(f8vec, gen.xword[src]);
+ cvt.get_f8_e4m3_emu()->vcvt_f8_to_f16(f16vec, f8vec);
+ gen.vmovdqu(gen.yword[dst], f16vec);
+}
+
+template <>
+void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) {
+ auto const& f8vec = gen.xmm3;
+ auto const& f16vec = gen.zmm4;
+
+ auto& cvt = dynamic_cast&>(gen);
+
+ gen.vpmovzxwd(f16vec, gen.yword[src]);
+ gen.vpslld(f16vec, f16vec, 16);
+ cvt.get_f8_e4m3_emu()->vcvt_f32_to_f8(f8vec, f16vec);
+ gen.vmovdqu(gen.xword[dst], f8vec);
+}
+
+template <>
+void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) {
+ auto const& f8vec = gen.xmm3;
+ auto const& f16vec = gen.ymm4;
+ auto const& f32vec = gen.zmm4;
+
+ auto& cvt = dynamic_cast&>(gen);
+
+ gen.vmovdqu(f8vec, gen.xword[src]);
+ cvt.get_f8_e4m3_emu()->vcvt_f8_to_f32(f32vec, f8vec);
+ cvt.get_uni_vcvtneps2bf16()->emit_code({static_cast(f32vec.getIdx())},
+ {static_cast(f16vec.getIdx())});
+ gen.vmovdqu(gen.yword[dst], f16vec);
+}
+
+template <>
+void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) {
+ auto const& f8vec = gen.xmm3;
+ auto const& f32vec = gen.zmm4;
+
+ auto& cvt = dynamic_cast&>(gen);
+
+ gen.vmovups(f32vec, gen.zword[src]);
+ cvt.get_f8_e5m2_emu()->vcvt_f32_to_f8(f8vec, f32vec);
+ gen.vmovdqu(gen.xword[dst], f8vec);
+}
+
+template <>
+void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) {
+ auto const& f8vec = gen.xmm3;
+ auto const& f32vec = gen.zmm4;
+
+ auto& cvt = dynamic_cast&>(gen);
+
+ gen.vmovdqu(f8vec, gen.xword[src]);
+ cvt.get_f8_e5m2_emu()->vcvt_f8_to_f32(f32vec, f8vec);
+ gen.vmovups(gen.zword[dst], f32vec);
+}
+
+template <>
+void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) {
+ auto const& f8vec = gen.xmm3;
+ auto const& f16vec = gen.ymm4;
+
+ auto& cvt = dynamic_cast&>(gen);
+
+ gen.vmovdqu(f16vec, gen.yword[src]);
+ cvt.get_f8_e5m2_emu()->vcvt_f16_to_f8(f8vec, f16vec);
+ gen.vmovdqu(gen.xword[dst], f8vec);
+}
+
+template <>
+void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) {
+ auto const& f8vec = gen.xmm3;
+ auto const& f16vec = gen.ymm4;
+
+ auto& cvt = dynamic_cast&>(gen);
+
+ gen.vmovdqu(f8vec, gen.xword[src]);
+ cvt.get_f8_e5m2_emu()->vcvt_f8_to_f16(f16vec, f8vec);
+ gen.vmovdqu(gen.yword[dst], f16vec);
+}
+
+template <>
+void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) {
+ auto const& f8vec = gen.xmm3;
+ auto const& f16vec = gen.zmm4;
+
+ auto& cvt = dynamic_cast&>(gen);
+
+ gen.vpmovzxwd(f16vec, gen.yword[src]);
+ gen.vpslld(f16vec, f16vec, 16);
+ cvt.get_f8_e5m2_emu()->vcvt_f32_to_f8(f8vec, f16vec);
+ gen.vmovdqu(gen.xword[dst], f8vec);
+}
+
+template <>
+void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) {
+ auto const& f8vec = gen.xmm3;
+ auto const& f16vec = gen.ymm4;
+ auto const& f32vec = gen.zmm4;
+
+ auto& cvt = dynamic_cast&>(gen);
+
+ gen.vmovdqu(f8vec, gen.xword[src]);
+ cvt.get_f8_e5m2_emu()->vcvt_f8_to_f32(f32vec, f8vec);
+ cvt.get_uni_vcvtneps2bf16()->emit_code({static_cast(f32vec.getIdx())},
+ {static_cast(f16vec.getIdx())});
+ gen.vmovdqu(gen.yword[dst], f16vec);
+}
+
template
void jit_convert(const TI* arg, TO* out, size_t count) {
- using jit_impl = jit_convert_array;
- static auto converter = jit_impl::get();
+ using jit_impl = jit_convert_array;
+ static auto converter = jit_impl::get();
if (converter) {
typename jit_impl::args_t args = {arg, out, count};
@@ -185,6 +404,12 @@ const std::tuple& Range::fit(const ov::element::Type& prec) {
if (prec.is_real()) {
double lbound, ubound;
switch (prec) {
+ case ov::element::f8e4m3:
+ lbound = static_cast(std::numeric_limits::lowest());
+ ubound = static_cast(std::numeric_limits::max());
+ case ov::element::f8e5m2:
+ lbound = static_cast(std::numeric_limits::lowest());
+ ubound = static_cast(std::numeric_limits::max());
case ov::element::bf16:
lbound = static_cast(std::numeric_limits::lowest());
ubound = static_cast(std::numeric_limits::max());
@@ -293,6 +518,18 @@ struct ConvertPrecision> {
src_t lbound, ubound;
std::tie(lbound, ubound) = ctx.range();
+ // Align with the behavior of ngraph ref and jit implementation. Conversion from f8e4m3-inf
+ // to float should output float-inf instead of f8e4m3-max. Proper handling of special values
+ // (nan, inf, overflow) has already been assured by the conversion process.
+ if (std::is_same::value || std::is_same::value ||
+ std::is_same::value || std::is_same::value) {
+ parallel_for(ctx.size, [&](size_t i) {
+ dst[i] = static_cast(src[i]);
+ });
+ ctx.converted = true;
+ return;
+ }
+
if (std::is_integral::value || ctx.interimPrc.is_real() || std::is_integral::value) {
parallel_for(ctx.size, [&](size_t i) {
dst[i] = static_cast(std::max(std::min(src[i], ubound), lbound));
@@ -492,6 +729,12 @@ struct ConvertPrecision> {
PrecisionInfo::value_type, \
PrecisionInfo::value_type)
+#define INTEL_CPU_CVT_FP8_LIST \
+ INTEL_CPU_CVT(f32, f8e4m3), INTEL_CPU_CVT(f16, f8e4m3), INTEL_CPU_CVT(bf16, f8e4m3), INTEL_CPU_CVT(f8e4m3, f32), \
+ INTEL_CPU_CVT(f8e4m3, f16), INTEL_CPU_CVT(f8e4m3, bf16), INTEL_CPU_CVT(f32, f8e5m2), \
+ INTEL_CPU_CVT(f16, f8e5m2), INTEL_CPU_CVT(bf16, f8e5m2), INTEL_CPU_CVT(f8e5m2, f32), \
+ INTEL_CPU_CVT(f8e5m2, f16), INTEL_CPU_CVT(f8e5m2, bf16)
+
#define INTEL_CPU_CVT_LIST \
INTEL_CPU_CVT(u8, i8), INTEL_CPU_CVT(u8, u16), INTEL_CPU_CVT(u8, i16), INTEL_CPU_CVT(u8, u32), \
INTEL_CPU_CVT(u8, i32), INTEL_CPU_CVT(u8, u64), INTEL_CPU_CVT(u8, i64), INTEL_CPU_CVT(u8, f32), \
@@ -535,7 +778,8 @@ struct ConvertPrecision> {
INTEL_CPU_CVT(boolean, f16), INTEL_CPU_CVT(boolean, bf16), INTEL_CPU_CVT(boolean, f64), INTEL_CPU_CVT(u8, u8), \
INTEL_CPU_CVT(i8, i8), INTEL_CPU_CVT(u16, u16), INTEL_CPU_CVT(i16, i16), INTEL_CPU_CVT(u32, u32), \
INTEL_CPU_CVT(i32, i32), INTEL_CPU_CVT(u64, u64), INTEL_CPU_CVT(i64, i64), INTEL_CPU_CVT(f32, f32), \
- INTEL_CPU_CVT(f16, f16), INTEL_CPU_CVT(bf16, bf16), INTEL_CPU_CVT(f64, f64), INTEL_CPU_CVT(boolean, boolean)
+ INTEL_CPU_CVT(f16, f16), INTEL_CPU_CVT(bf16, bf16), INTEL_CPU_CVT(f64, f64), INTEL_CPU_CVT(boolean, boolean), \
+ INTEL_CPU_CVT_FP8_LIST
#define INTEL_CPU_CVT_FROM_BIN_LIST \
INTEL_CPU_CVT(u1, f32), INTEL_CPU_CVT(u1, f16), INTEL_CPU_CVT(u1, bf16), INTEL_CPU_CVT(u1, f64), \
@@ -667,6 +911,35 @@ struct ConvertFromByteFPPrecision> {
}
};
+#if defined(OPENVINO_ARCH_X86_64)
+struct ConvertFP8Context {
+ const void* srcPtr;
+ void* dstPtr;
+ size_t size;
+ bool converted;
+};
+
+template
+struct ConvertFP8Precision;
+
+template
+struct ConvertFP8Precision> {
+ void operator()(ConvertFP8Context& ctx) {
+ auto src = static_cast(ctx.srcPtr);
+ auto dst = static_cast(ctx.dstPtr);
+ constexpr size_t batch = 64;
+ const size_t iterations = ov::intel_cpu::div_up(ctx.size, batch);
+ parallel_for(iterations, [&](size_t i) {
+ const size_t offset = i * batch;
+ const size_t current_batch_size = std::min(ctx.size - offset, batch);
+ jit_convert(src + offset, dst + offset, current_batch_size);
+ });
+
+ ctx.converted = true;
+ }
+};
+#endif
+
void cpu_convert(const void* srcPtr,
void* dstPtr,
ov::element::Type srcPrc,
@@ -728,7 +1001,7 @@ void cpu_convert(const void* srcPtr,
OV_SWITCH(intel_cpu, ConvertFrom4BitPrecision, ctx, std::tie(srcPrc, dstPrc), INTEL_CPU_CVT_FROM_4BIT_LIST);
if (!ctx.converted)
OPENVINO_THROW("cpu_convert can't convert from: ", srcPrc, " precision to: ", dstPrc);
- } else if (srcPrc.bitwidth() == 8u && srcPrc.is_real()) {
+ } else if (srcPrc == ov::element::f8e8m0) {
ConvertFromByteFPContext ctx{srcPrc, srcPtr, dstPtr, size, false};
OV_SWITCH(intel_cpu,
ConvertFromByteFPPrecision,
@@ -737,6 +1010,15 @@ void cpu_convert(const void* srcPtr,
INTEL_CPU_CVT_FROM_BYTE_FP_LIST);
if (!ctx.converted)
OPENVINO_THROW("cpu_convert can't convert from: ", srcPrc, " precision to: ", dstPrc);
+#if defined(OPENVINO_ARCH_X86_64)
+ } else if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_fp16) &&
+ (one_of(srcPrc, ov::element::f8e4m3, ov::element::f8e5m2) ||
+ one_of(dstPrc, ov::element::f8e4m3, ov::element::f8e5m2))) {
+ ConvertFP8Context ctx{srcPtr, dstPtr, size, false};
+ OV_SWITCH(intel_cpu, ConvertFP8Precision, ctx, std::tie(srcPrc, dstPrc), INTEL_CPU_CVT_FP8_LIST);
+ if (!ctx.converted)
+ OPENVINO_THROW("cpu_convert can't convert from: ", srcPrc, " precision to: ", dstPrc);
+#endif
} else {
ConvertContext ctx{srcPtr, dstPtr, size, interimPrc, dstPrc, false};
OV_SWITCH(intel_cpu, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc), INTEL_CPU_CVT_LIST);
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index db55c728df725e..b3c2aa0b298a5a 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -218,6 +218,8 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr<
ov::element::Type_t::i4,
ov::element::Type_t::u8,
ov::element::Type_t::i8,
+ ov::element::Type_t::f8e4m3,
+ ov::element::Type_t::f8e5m2,
ov::element::Type_t::u16,
ov::element::Type_t::i16,
ov::element::Type_t::u32,
diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
index fb9e0925bc89e2..4d7df9a335e98a 100644
--- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
+++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
@@ -80,6 +80,7 @@
#include "transformations/op_conversions/detection_output_downgrade.hpp"
#include "transformations/op_conversions/detection_output_upgrade.hpp"
#include "transformations/op_conversions/eye_decomposition.hpp"
+#include "transformations/op_conversions/fake_convert_decomposition.hpp"
#include "transformations/op_conversions/fq_decomposition.hpp"
#include "transformations/op_conversions/gelu7_downgrade.hpp"
#include "transformations/op_conversions/group_normalization_decomposition.hpp"
@@ -1293,6 +1294,7 @@ void Transformations::PostSnippets(void) {
return node::FakeQuantize::isSupportedOperation(node, errMsg);
},
ov::pass::FakeQuantizeDecomposition);
+ CPU_REGISTER_PASS_COMMON(postSnippetsManager, ov::pass::FakeConvertDecomposition);
CPU_REGISTER_PASS_COMMON(postSnippetsManager, ov::pass::ConstantFolding);
postSnippetsManager.run_passes(model);
}
diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.cpp
index 4989fb3a0f04b7..a3c1f9ef7d3544 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.cpp
@@ -16,11 +16,45 @@ using namespace CPUTestUtils;
namespace ov {
namespace test {
+static std::string special_value_to_string(const ov::test::SpecialValue& value) {
+ if (value == SpecialValue::none) {
+ return "none";
+ } else if (value == SpecialValue::nan) {
+ return "nan";
+ } else if (value == SpecialValue::inf) {
+ return "inf";
+ } else if (value == SpecialValue::overflow) {
+ return "overflow";
+ }
+ return "unknown";
+}
+
+template
+static T set_special_value(T& value, const ov::test::SpecialValue& special_value) {
+ if (special_value == ov::test::SpecialValue::nan) {
+ value = NAN;
+ } else if (special_value == ov::test::SpecialValue::inf) {
+ value = INFINITY;
+ } else if (special_value == ov::test::SpecialValue::overflow) {
+ value = value + std::numeric_limits::max();
+ }
+ return value;
+}
+
+template
+static void modify_value(ov::Tensor& tensor, const ov::test::SpecialValue& special_value) {
+ T* dataPtr = static_cast(tensor.data());
+ for (size_t i = 0; i < tensor.get_size(); i++) {
+ set_special_value(dataPtr[i], special_value);
+ }
+}
+
std::string ConvertCPULayerTest::getTestCaseName(testing::TestParamInfo obj) {
InputShape inputShape;
ov::element::Type inPrc, outPrc;
+ ov::test::SpecialValue special_value;
CPUSpecificParams cpuParams;
- std::tie(inputShape, inPrc, outPrc, cpuParams) = obj.param;
+ std::tie(inputShape, inPrc, outPrc, special_value, cpuParams) = obj.param;
std::ostringstream result;
@@ -30,6 +64,7 @@ std::string ConvertCPULayerTest::getTestCaseName(testing::TestParamInfo(inPrc, shape));
@@ -101,6 +146,31 @@ void ConvertCPULayerTest::SetUp() {
function = makeNgraphFunction(inPrc, params, conversion, "ConversionCPU");
}
+void ConvertCPULayerTest::generate_inputs(const std::vector& targetInputStaticShapes) {
+ inputs.clear();
+ const auto& funcInputs = function->inputs();
+ for (size_t i = 0; i < funcInputs.size(); ++i) {
+ const auto& funcInput = funcInputs[i];
+ ov::Tensor tensor =
+ ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
+ if (special_value != ov::test::SpecialValue::none) {
+ if (inPrc == ov::element::f32) {
+ modify_value(tensor, special_value);
+ } else if (inPrc == ov::element::f16) {
+ modify_value(tensor, special_value);
+ } else if (inPrc == ov::element::bf16) {
+ modify_value(tensor, special_value);
+ } else if (inPrc == ov::element::f8e4m3) {
+ modify_value(tensor, special_value);
+ } else if (inPrc == ov::element::f8e5m2) {
+ modify_value(tensor, special_value);
+ }
+ }
+
+ inputs.insert({funcInput.get_node_shared_ptr(), tensor});
+ }
+}
+
void ConvertCPULayerTest::validate_out_prc() const {
if (outPrc == ov::element::boolean)
FAIL() << "ConvertCPULayerTest supports only non boolean output prc";
diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.hpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.hpp
index a53f56f873151c..a4f4e0fc56c238 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.hpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.hpp
@@ -13,9 +13,12 @@ using namespace CPUTestUtils;
namespace ov {
namespace test {
+enum SpecialValue { none, nan, inf, overflow };
+
using convertLayerTestParamsSet = std::tuple;
class ConvertCPULayerTest : public testing::WithParamInterface,
@@ -25,9 +28,12 @@ class ConvertCPULayerTest : public testing::WithParamInterface& targetInputStaticShapes) override;
virtual void validate_out_prc() const;
ov::element::Type inPrc, outPrc;
+private:
+ ov::test::SpecialValue special_value;
};
class ConvertToBooleanCPULayerTest : public ConvertCPULayerTest {
diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/conversion.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/conversion.cpp
index 11e0440b2e3618..e5d87f5cb2f3dd 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/conversion.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/conversion.cpp
@@ -16,6 +16,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_7D_Dynamic, ConvertCPULayerTe
::testing::ValuesIn(inShapes_7D_dynamic()),
::testing::ValuesIn(precisions()),
::testing::ValuesIn(precisions()),
+ ::testing::Values(ov::test::SpecialValue::none),
::testing::Values(CPUSpecificParams({}, {}, {}, {}))),
ConvertCPULayerTest::getTestCaseName);
@@ -24,6 +25,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_7D_Static, ConvertCPULayerTes
::testing::ValuesIn(inShapes_7D_static()),
::testing::ValuesIn(precisions()),
::testing::ValuesIn(precisions()),
+ ::testing::Values(ov::test::SpecialValue::none),
::testing::Values(CPUSpecificParams({}, {}, {}, {}))),
ConvertCPULayerTest::getTestCaseName);
diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/conversion.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/conversion.cpp
index 59ca1065bf78d9..8181304bf95e7d 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/conversion.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/conversion.cpp
@@ -31,6 +31,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_4D_Dynamic, ConvertCPULayerTe
::testing::ValuesIn(inShapes_4D_dynamic()),
::testing::ValuesIn(precisions()),
::testing::ValuesIn(precisions()),
+ ::testing::Values(ov::test::SpecialValue::none),
::testing::ValuesIn(memForm4D_dynamic)),
ConvertCPULayerTest::getTestCaseName);
@@ -39,6 +40,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_4bit_Dynamic, ConvertCPULayer
::testing::Combine(::testing::ValuesIn(inShapes_4D_dynamic()),
::testing::ValuesIn({ov::element::u4, ov::element::i4}),
::testing::ValuesIn({ov::element::f32, ov::element::bf16, ov::element::u8, ov::element::i8}),
+ ::testing::Values(ov::test::SpecialValue::none),
::testing::Values(CPUSpecificParams({nchw}, {nchw}, {}, {"ref"}))),
ConvertCPULayerTest::getTestCaseName);
@@ -52,9 +54,69 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_4D_Static, ConvertCPULayerTes
::testing::ValuesIn(inShapes_4D_static()),
::testing::ValuesIn(precisions()),
::testing::ValuesIn(precisions()),
+ ::testing::Values(ov::test::SpecialValue::none),
::testing::ValuesIn(memForm4D_static_common)),
ConvertCPULayerTest::getTestCaseName);
+const std::vector float_precisions = {
+ ov::element::f32,
+ ov::element::f16,
+ ov::element::bf16,
+};
+
+const std::vector f8_precisions = {
+ ov::element::f8e4m3,
+ ov::element::f8e5m2,
+};
+
+const std::vector specialValue = {
+ ov::test::SpecialValue::none,
+ ov::test::SpecialValue::nan,
+ ov::test::SpecialValue::inf,
+ ov::test::SpecialValue::overflow,
+};
+
+std::vector memForm4D_fp8 = {
+ CPUSpecificParams({nchw}, {nchw}, {}, expectedPrimitiveType()),
+ CPUSpecificParams({nhwc}, {nhwc}, {}, expectedPrimitiveType()),
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_from_fp8_Static, ConvertCPULayerTest,
+ ::testing::Combine(
+ ::testing::ValuesIn(inShapes_4D_static()),
+ ::testing::ValuesIn(f8_precisions),
+ ::testing::ValuesIn(float_precisions),
+ ::testing::ValuesIn(specialValue),
+ ::testing::ValuesIn(memForm4D_fp8)),
+ ConvertCPULayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_to_fp8_Static, ConvertCPULayerTest,
+ ::testing::Combine(
+ ::testing::ValuesIn(inShapes_4D_static()),
+ ::testing::ValuesIn(float_precisions),
+ ::testing::ValuesIn(f8_precisions),
+ ::testing::ValuesIn(specialValue),
+ ::testing::ValuesIn(memForm4D_fp8)),
+ ConvertCPULayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_from_fp8_Dynamic, ConvertCPULayerTest,
+ ::testing::Combine(
+ ::testing::ValuesIn(inShapes_4D_dynamic()),
+ ::testing::ValuesIn(f8_precisions),
+ ::testing::ValuesIn(float_precisions),
+ ::testing::ValuesIn(specialValue),
+ ::testing::ValuesIn(memForm4D_fp8)),
+ ConvertCPULayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_to_fp8_Dynamic, ConvertCPULayerTest,
+ ::testing::Combine(
+ ::testing::ValuesIn(inShapes_4D_dynamic()),
+ ::testing::ValuesIn(float_precisions),
+ ::testing::ValuesIn(f8_precisions),
+ ::testing::ValuesIn(specialValue),
+ ::testing::ValuesIn(memForm4D_fp8)),
+ ConvertCPULayerTest::getTestCaseName);
+
} // namespace Conversion
} // namespace test
} // namespace ov
\ No newline at end of file
diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/conversion.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/conversion.cpp
index 9c34d6220d4b2d..ab1e06639c5a3e 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/conversion.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/conversion.cpp
@@ -23,6 +23,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_blocked_Dynamic, ConvertCPULa
::testing::ValuesIn(inShapes_4D_dynamic()),
::testing::ValuesIn(precisions()),
::testing::ValuesIn(precisions()),
+ ::testing::Values(ov::test::SpecialValue::none),
::testing::ValuesIn(memForm4D_dynamic)),
ConvertCPULayerTest::getTestCaseName);
@@ -44,6 +45,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_Blocked, ConvertCPULayerTest,
::testing::ValuesIn(inShapes_4D_blocked),
::testing::ValuesIn(precisions()),
::testing::ValuesIn(precisions()),
+ ::testing::Values(ov::test::SpecialValue::none),
::testing::ValuesIn(filterCPUSpecificParams(memForm4D_static_blocked))),
ConvertCPULayerTest::getTestCaseName);
@@ -52,6 +54,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_BOOL_Static, ConvertToBoolean
::testing::ValuesIn(inShapes_4D_static()),
::testing::ValuesIn(precisions_floating_point),
::testing::Values(ov::element::boolean),
+ ::testing::Values(ov::test::SpecialValue::none),
::testing::Values(CPUSpecificParams({nchw}, {nchw}, {}, {}))),
ConvertToBooleanCPULayerTest::getTestCaseName);
@@ -60,6 +63,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_BOOL_Dynamic, ConvertToBoolea
::testing::ValuesIn(inShapes_4D_dynamic()),
::testing::ValuesIn(precisions_floating_point),
::testing::Values(ov::element::boolean),
+ ::testing::Values(ov::test::SpecialValue::none),
::testing::Values(CPUSpecificParams({nchw}, {nchw}, {}, {}))),
ConvertToBooleanCPULayerTest::getTestCaseName);
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/conversion.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/conversion.cpp
index 9ff4d0b989fefa..903b8c083b1a1f 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/conversion.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/conversion.cpp
@@ -32,6 +32,17 @@ const std::vector types = {
ov::element::f64,
};
+const std::vector floatTypes = {
+ ov::element::f32,
+ ov::element::f16,
+ ov::element::bf16,
+};
+
+const std::vector f8Types = {
+ ov::element::f8e4m3,
+ ov::element::f8e5m2,
+};
+
INSTANTIATE_TEST_SUITE_P(smoke_ConversionLayerTest,
ConversionLayerTest,
::testing::Combine(::testing::ValuesIn(conversionOpTypes),
@@ -49,4 +60,23 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConversionToBooleanLayerTest,
::testing::Values(ov::element::boolean),
::testing::Values(ov::test::utils::DEVICE_CPU)),
ConversionLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConversionToF8LayerTest,
+ ConversionLayerTest,
+ ::testing::Combine(::testing::Values(conversionOpTypes[0]),
+ ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes)),
+ ::testing::ValuesIn(floatTypes),
+ ::testing::ValuesIn(f8Types),
+ ::testing::Values(ov::test::utils::DEVICE_CPU)),
+ ConversionLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConversionFromF8LayerTest,
+ ConversionLayerTest,
+ ::testing::Combine(::testing::Values(conversionOpTypes[0]),
+ ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes)),
+ ::testing::ValuesIn(f8Types),
+ ::testing::ValuesIn(floatTypes),
+ ::testing::Values(ov::test::utils::DEVICE_CPU)),
+ ConversionLayerTest::getTestCaseName);
+
} // namespace
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/fake_convert.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/fake_convert.cpp
new file mode 100644
index 00000000000000..a2f17ea72cbb3e
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/fake_convert.cpp
@@ -0,0 +1,59 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "single_op_tests/fake_convert.hpp"
+
+namespace {
+using ov::test::FakeConvertLayerTest;
+
+const std::vector> shapes = {{{2, 3, 4, 5}}};
+
+const std::vector data_precisions = {ov::element::f32, ov::element::f16, ov::element::bf16};
+
+const std::vector destination_precisions = {ov::element::f8e4m3, ov::element::f8e5m2};
+
+const std::vector default_shift = {true, false};
+
+const auto simple_fake_convert_params =
+ ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes)),
+ ::testing::Values(ov::Shape{1}),
+ ::testing::Values(ov::Shape{1}),
+ ::testing::ValuesIn(data_precisions),
+ ::testing::ValuesIn(destination_precisions),
+ ::testing::ValuesIn(default_shift),
+ ::testing::Values(ov::test::utils::DEVICE_CPU));
+
+const auto broadcast_fake_convert_params =
+ ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes)),
+ ::testing::Values(ov::Shape{2, 3, 1, 1}),
+ ::testing::Values(ov::Shape{2, 3, 1, 1}),
+ ::testing::ValuesIn(data_precisions),
+ ::testing::ValuesIn(destination_precisions),
+ ::testing::ValuesIn(default_shift),
+ ::testing::Values(ov::test::utils::DEVICE_CPU));
+
+const auto elementwise_fake_convert_params =
+ ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes)),
+ ::testing::Values(ov::Shape{2, 3, 4, 5}),
+ ::testing::Values(ov::Shape{2, 3, 4, 5}),
+ ::testing::ValuesIn(data_precisions),
+ ::testing::ValuesIn(destination_precisions),
+ ::testing::ValuesIn(default_shift),
+ ::testing::Values(ov::test::utils::DEVICE_CPU));
+
+INSTANTIATE_TEST_SUITE_P(smoke_FakeConvert_simple,
+ FakeConvertLayerTest,
+ simple_fake_convert_params,
+ FakeConvertLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_FakeConvert_broadcast,
+ FakeConvertLayerTest,
+ broadcast_fake_convert_params,
+ FakeConvertLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_FakeConvert_elementwise,
+ FakeConvertLayerTest,
+ elementwise_fake_convert_params,
+ FakeConvertLayerTest::getTestCaseName);
+} // namespace
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
index 7af707df602bfc..4c34b3fd2506ac 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -173,6 +173,8 @@ std::vector disabledTestPatterns() {
R"(.*smoke_TopK/TopKLayerTest.Inference.*_k=21_.*_sort=value_modelType=f16_trgDev=CPU.*)",
// Issue: 121812
R"(.*ConvertCPULayerTest.*outFmts=(nhwc|nChw8c|nChw16c).*)",
+ // Issue: MFDNN-12917. The oneDNN emitter of conversion from fp32 to fp8 has rounding issue.
+ R"(.*ConvertCPULayerTest.*(\[1.1.1080.1920\]|\(2.17.5.4\))_.*_inputPRC=f32_targetPRC=f8e4m3_.*)",
// Need to generate sequence exactly in the i64 data type. Enable in scope of i64 enabling.
R"(.*RandomUniformLayerTestCPU.*OutPrc=i64.*)",
// Issue: 123815 (Tests are sensintive to available thread count on testing machines)
@@ -529,6 +531,7 @@ std::vector disabledTestPatterns() {
retVector.emplace_back(R"(.*INFERENCE_PRECISION_HINT=(F|f)16.*)");
retVector.emplace_back(R"(.*ConcatMultiQuerySDPTest.*f16.*)");
retVector.emplace_back(R"(.*ConcatSDPTest.*f16.*)");
+ retVector.emplace_back(R"(.*ConvertCPULayerTest.*f16.*)");
}
#elif defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM)
if (!ov::intel_cpu::hasHardwareSupport(ov::element::f16)) {
@@ -536,6 +539,7 @@ std::vector disabledTestPatterns() {
retVector.emplace_back(R"(.*INFERENCE_PRECISION_HINT=(F|f)16.*)");
retVector.emplace_back(R"(.*Prc=f16.*)");
retVector.emplace_back(R"(.*ConcatMultiQuerySDPTest.*f16.*HasShapeOf=1.*)");
+ retVector.emplace_back(R"(.*ConvertCPULayerTest.*f16.*)");
} else {
// Issue 117407
retVector.emplace_back(
diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/paged_attention.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/paged_attention.hpp
index f87f608597a6bb..2638f2ad60cf26 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/paged_attention.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/paged_attention.hpp
@@ -24,6 +24,10 @@ struct paged_attention : public primitive_base {
OPENVINO_ASSERT(inputs.size() == 13, "[GPU] Unexpected inputs number for PagedAttention primitive: ", inputs.size());
}
+ bool has_scores_output() const {
+ return num_outputs == 2;
+ }
+
bool operator==(const primitive& rhs) const override {
return compare_common_params(rhs);
}
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp
index 9cf1a252564934..2bc377f2c1459a 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp
@@ -63,6 +63,7 @@ struct paged_attention_impl : multi_stage_primitive {
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
+ ib >> make_data(&has_scores_output, sizeof(bool));
if (is_dynamic()) {
auto& kv_cache_update_kernel_selector = kv_cache_update_kernel_selector_t::Instance();
auto kv_cache_update_kernel_impl = kv_cache_update_kernel_selector.GetImplementation(_kernels_data[Stage::KV_CACHE_UPDATE].kernelName);
@@ -78,7 +79,45 @@ struct paged_attention_impl : multi_stage_primitive {
}
}
+ void save(BinaryOutputBuffer& ob) const override {
+ parent::save(ob);
+ ob << make_data(&has_scores_output, sizeof(bool));
+ }
+
std::vector get_internal_buffer_layouts_impl() const override {
+ /*
+ * Internal buffers allocation owners and users:
+ * +--------------------------------------+--------------------+--------------------+
+ * | Stage | Allocates & uses | Reuses |
+ * +--------------------------------------+--------------------+--------------------+
+ * | KV_CACHE_UPDATE | [0, 1, 2] | |
+ * +--------------------------------------+--------------------+--------------------+
+ * | SDPA (1st token) | | [0, 1, 2] |
+ * +--------------------------------------+--------------------+--------------------+
+ * | PA_SDPA (2nd+ token) | [5, 6, 7] | |
+ * +--------------------------------------+--------------------+--------------------+
+ * | PA_SDPA (mixed mode) | [5, 6, 7, 8] | |
+ * +--------------------------------------+--------------------+--------------------+
+ * | SDPA (1st token) + scores output | | [0, 1, 2, 3, 4] |
+ * +--------------------------------------+--------------------+--------------------+
+ * | PA_SDPA (2nd+ token) + scores output | [3, 4, 5, 6, 7] | |
+ * +--------------------------------------+--------------------+--------------------+
+ * | PA_SDPA (mixed mode) + scores output | [3, 4, 5, 6, 7, 8] | |
+ * +--------------------------------------+--------------------+--------------------+
+ *
+ * Description:
+ * 0, 1, 2 - Buffers used for proper blocks distribution for kv_cache_update and
+ * sdpa_opt (1st token calculation) block configuration over target_seq_len dimension.
+ * Filled in paged_attention_inst::on_execute() call.
+ * 3, 4 - Optional buffers used for PA scores output calculation, storing intermediate
+ * softmax values by partitions (filled in PA/SDPA kernels) and sequence length offsets
+ * for each subsequence (filled in paged_attention_inst::on_execute() call).
+ * 5, 6, 7 - Used for 2nd+ PA calculation (for softmax exp_sums, max_logits, and intermediate output).
+ * Filled in PA/SDPA kernels.
+ * 8 - Optional buffer used for mixed PA execution mode, mapping gws idx to subsequence id.
+ * Filled in paged_attention_inst::on_execute() call.
+ */
+
auto add_internal_buffers = [](std::vector& layouts, const kernel_selector::KernelData& kd) {
if (kd.internalBufferSizes.empty())
return;
@@ -133,6 +172,7 @@ struct paged_attention_impl : multi_stage_primitive {
args.outputs = { instance.output_memory_ptr(0) };
} else if (stage == Stage::PA_SDPA) {
if (kernel_idx == 0 || kernel_idx == 1) {
+ // 2nd+ token calculation or mixed stage tokens calculation
args.shape_info = instance.shape_info_memory_ptr();
args.inputs = { instance.input_memory_ptr(0),
@@ -155,7 +195,8 @@ struct paged_attention_impl : multi_stage_primitive {
if (desc->has_alibi) {
args.inputs.push_back(instance.alibi_memory_ptr());
}
- } else {
+ } else if (kernel_idx == 2 || kernel_idx == 3) {
+ // Finalization kernel or mixed stage finalization kernel
args.inputs = { instance.past_lens_memory_ptr() };
if (is_mixed_mode) {
@@ -163,17 +204,31 @@ struct paged_attention_impl : multi_stage_primitive {
// dependency
args.inputs.push_back(instance.subsequence_begins_memory_ptr());
}
+ } else if (kernel_idx == 4) {
+ // Output scores calculation kernel
+ args.inputs = { instance.past_lens_memory_ptr(),
+ instance.subsequence_begins_memory_ptr() };
}
args.outputs = { instance.output_memory_ptr(0) };
+
+ if (kernel_idx == 4) {
+ args.outputs.push_back(instance.output_memory_ptr(1));
+ }
}
return args;
}
std::set get_lockable_internal_buffers() const override {
- return std::set{ 0, 1, 2, /* SDPA and KV_CACHE_UPDATE indexes configuration */
- 6, /* PA_SDPA multiple tokens mode */ };
+ size_t mixed_mode_buffer = has_scores_output ? 8 : 6;
+
+ std::set lockable_ids = { 0, 1, 2, /* SDPA and KV_CACHE_UPDATE indexes configuration */
+ mixed_mode_buffer /* PA_SDPA multiple tokens mode */ };
+ if (has_scores_output)
+ lockable_ids.insert(4 /* Precalculated accumulated sequence length offsets for each subsequence */);
+
+ return lockable_ids;
};
void execute_stage(const std::vector& events,
@@ -194,8 +249,17 @@ struct paged_attention_impl : multi_stage_primitive {
if (stage == Stage::PA_SDPA) {
internal_buffers_offset = _kernels_data[Stage::KV_CACHE_UPDATE].internalBufferSizes.size();
internal_buffers_count = _kernels_data[Stage::PA_SDPA].internalBufferSizes.size();
- } else {
+ } else if (stage == Stage::KV_CACHE_UPDATE) {
+ internal_buffers_count = _kernels_data[Stage::KV_CACHE_UPDATE].internalBufferSizes.size();
+ } else if (stage == Stage::SDPA) {
internal_buffers_count = _kernels_data[Stage::KV_CACHE_UPDATE].internalBufferSizes.size();
+
+ const auto desc = instance.get_node().as().get_primitive();
+ if (desc->has_scores_output()) {
+ // Add intermediate buffers for PagedAttention scores calculation:
+ // softmax_results, subsequence_offsets, exp_sums, max_logits, tmp_out
+ internal_buffers_count += 5;
+ }
}
for (size_t kd_idx = 0; kd_idx < _kernels_data[stage].kernels.size(); ++kd_idx) {
@@ -216,6 +280,23 @@ struct paged_attention_impl : multi_stage_primitive {
intermediate_memories.begin() + internal_buffers_offset,
intermediate_memories.begin() + internal_buffers_offset + internal_buffers_count);
+ GPU_DEBUG_TRACE_DETAIL << "Execute stage=" << stage << " kernel=" << kd_idx << " " << _kernels_data[stage].kernelName << " start_offset="
+ << internal_buffers_offset << " count=" << internal_buffers_count << "\n";
+
+ GPU_DEBUG_TRACE_DETAIL << "Configured kernel arguments:\n";
+ for (size_t i = 0; i < _kernels_data[stage].kernels[kd_idx].params.arguments.size(); i++) {
+ GPU_DEBUG_TRACE_DETAIL << "\t" << i << ": type=" << static_cast(_kernels_data[stage].kernels[kd_idx].params.arguments[i].t) << " "
+ << "index=" << _kernels_data[stage].kernels[kd_idx].params.arguments[i].index << "\n";
+ }
+
+ GPU_DEBUG_TRACE_DETAIL << "Memory buffers:"
+ << "shape_info=" << args.shape_info << " "
+ << "inputs=" << args.inputs.size() << " "
+ << "outputs=" << args.outputs.size() << " "
+ << "intermediates=" << args.intermediates.size() << " "
+ << "weights=" << args.weights << " "
+ << "scalars=" << (args.scalars ? args.scalars->size() : 0) << "\n";
+
stream.set_arguments(*_kernels[idx_final], _kernels_data[stage].kernels[kd_idx].params, args);
const auto& gws = params.workGroups.global;
@@ -242,10 +323,13 @@ struct paged_attention_impl : multi_stage_primitive {
execute_stage(events, instance, res_events, Stage::KV_CACHE_UPDATE, is_mixed_mode);
- std::vector dep_events(res_events.begin(), res_events.end());
if (stage == PagedAttentionStage::PREFILL) {
+ std::vector dep_events(res_events.begin(), res_events.end());
execute_stage(dep_events, instance, res_events, Stage::SDPA, is_mixed_mode);
- } else if (stage == PagedAttentionStage::GENERATE || stage == PagedAttentionStage::MIXED) {
+ }
+
+ if (stage == PagedAttentionStage::GENERATE || stage == PagedAttentionStage::MIXED || has_scores_output) {
+ std::vector dep_events(res_events.begin(), res_events.end());
execute_stage(dep_events, instance, res_events, Stage::PA_SDPA, is_mixed_mode);
}
@@ -338,7 +422,7 @@ struct paged_attention_impl : multi_stage_primitive {
return aligned_seq_len;
}
- static kernel_selector::sdpa_configuration get_sdpa_configuration(const kernel_impl_params& impl_param) {
+ static kernel_selector::sdpa_configuration get_sdpa_configuration(const kernel_impl_params& impl_param, bool is_dynamic = true) {
kernel_selector::sdpa_configuration config;
const auto desc = impl_param.typed_desc();
@@ -362,37 +446,45 @@ struct paged_attention_impl : multi_stage_primitive {
config.group_size = desc->heads_num / desc->kv_heads_num;
}
+ if (desc->has_scores_output() && !is_dynamic) {
+ const auto& input_mem = impl_param.memory_deps;
+ const auto max_context_len = input_mem.at(12);
+ mem_lock max_context_len_mem_lock(max_context_len, *impl_param.strm);
+ config.paged_attention_max_len = max_context_len_mem_lock[0];
+ }
+
return config;
}
static kv_cache_update_kernel_params_t get_kv_cache_update_kernel_params(const kernel_impl_params& impl_param,
const PagedAttentionStage& stage,
+ const kernel_selector::MultiDataTensor& input_tensors,
bool is_dynamic = false) {
auto params = get_default_params(impl_param, is_dynamic);
- const auto& key_layout = impl_param.get_input_layout(1);
- const auto& value_layout = impl_param.get_input_layout(2);
- const auto& key_cache_layout = impl_param.get_input_layout(3);
- const auto& value_cache_layout = impl_param.get_input_layout(4);
- const auto& past_lens_layout = impl_param.get_input_layout(5);
- const auto& block_indices_layout = impl_param.get_input_layout(7);
- const auto& block_indices_begins_layout = impl_param.get_input_layout(8);
- const auto& subsequence_begins_layout = impl_param.get_input_layout(6);
+ const auto& key_tensor = input_tensors[1];
+ const auto& value_tensor = input_tensors[2];
+ const auto& key_cache_tensor = input_tensors[3];
+ const auto& value_cache_tensor = input_tensors[4];
+ const auto& past_lens_tensor = input_tensors[5];
+ const auto& block_indices_tensor = input_tensors[7];
+ const auto& block_indices_begins_tensor = input_tensors[8];
+ const auto& subsequence_begins_tensor = input_tensors[6];
const auto inputs_number = 6;
const auto outputs_number = 2;
params.inputs.resize(inputs_number);
params.outputs.resize(outputs_number);
- params.inputs[0] = convert_data_tensor(key_layout);
- params.inputs[1] = convert_data_tensor(value_layout);
- params.inputs[2] = convert_data_tensor(past_lens_layout);
- params.inputs[3] = convert_data_tensor(block_indices_layout);
- params.inputs[4] = convert_data_tensor(block_indices_begins_layout);
- params.inputs[5] = convert_data_tensor(subsequence_begins_layout);
- params.outputs[0] = convert_data_tensor(key_cache_layout);
- params.outputs[1] = convert_data_tensor(value_cache_layout);
+ params.inputs[0] = key_tensor;
+ params.inputs[1] = value_tensor;
+ params.inputs[2] = past_lens_tensor;
+ params.inputs[3] = block_indices_tensor;
+ params.inputs[4] = block_indices_begins_tensor;
+ params.inputs[5] = subsequence_begins_tensor;
+ params.outputs[0] = key_cache_tensor;
+ params.outputs[1] = value_cache_tensor;
- params.conf = get_sdpa_configuration(impl_param);
+ params.conf = get_sdpa_configuration(impl_param, is_dynamic);
params.is_prefill = stage == PagedAttentionStage::PREFILL || stage == PagedAttentionStage::MIXED;
@@ -418,18 +510,23 @@ struct paged_attention_impl : multi_stage_primitive {
return params;
}
- static sdpa_kernel_params_t get_sdpa_kernel_params(const kernel_impl_params& impl_param, const PagedAttentionStage& stage, bool is_dynamic = false) {
+ static sdpa_kernel_params_t get_sdpa_kernel_params(const kernel_impl_params& impl_param,
+ const PagedAttentionStage& stage,
+ const kernel_selector::MultiDataTensor& input_tensors,
+ bool is_dynamic = false) {
const auto desc = impl_param.typed_desc();
auto params = get_default_params(impl_param, is_dynamic);
- const auto& query_layout = impl_param.get_input_layout(0);
- const auto& key_layout = impl_param.get_input_layout(1);
- const auto& value_layout = impl_param.get_input_layout(2);
- const auto& subsequence_begins_layout = impl_param.get_input_layout(6);
- const auto& scale_layout = impl_param.get_input_layout(9);
- const auto& alibi_layout = impl_param.get_input_layout(11);
- const auto has_alibi = alibi_layout.count() > 0;
+ const auto& query_tensor = input_tensors[0];
+ const auto& key_tensor = input_tensors[1];
+ const auto& value_tensor = input_tensors[2];
+ const auto& subsequence_begins_tensor = input_tensors[6];
+ const auto& scale_tensor = input_tensors[9];
+ const auto& alibi_tensor = input_tensors[11];
+
+ const auto has_alibi = impl_param.get_input_layout(11).count() > 0;
const auto has_scale_input = !desc->scale_val.has_value();
+ const auto has_scores_output = desc->has_scores_output();
auto inputs_number = 4;
if (has_scale_input)
@@ -440,18 +537,23 @@ struct paged_attention_impl : multi_stage_primitive {
auto input_idx = 0;
params.inputs.resize(inputs_number);
- params.inputs[input_idx++] = convert_data_tensor(query_layout);
- params.inputs[input_idx++] = convert_data_tensor(key_layout);
- params.inputs[input_idx++] = convert_data_tensor(value_layout);
- params.inputs[input_idx++] = convert_data_tensor(subsequence_begins_layout);
+ params.inputs[input_idx++] = query_tensor;
+ params.inputs[input_idx++] = key_tensor;
+ params.inputs[input_idx++] = value_tensor;
+ params.inputs[input_idx++] = subsequence_begins_tensor;
if (has_scale_input)
- params.inputs[input_idx++] = convert_data_tensor(scale_layout);
+ params.inputs[input_idx++] = scale_tensor;
if (has_alibi)
- params.inputs[input_idx++] = convert_data_tensor(alibi_layout);
+ params.inputs[input_idx++] = alibi_tensor;
- params.conf = get_sdpa_configuration(impl_param);
+ if (has_scores_output) {
+ params.outputs.resize(2);
+ params.outputs[1] = convert_data_tensor(impl_param.get_output_layout(1));
+ }
+
+ params.conf = get_sdpa_configuration(impl_param, is_dynamic);
const auto& in_offsets_map = impl_param.in_port_to_shape_info_offset;
const auto& out_offsets_map = impl_param.out_port_to_shape_info_offset;
@@ -475,26 +577,34 @@ struct paged_attention_impl : multi_stage_primitive {
if ((stage == PagedAttentionStage::PREFILL || stage == PagedAttentionStage::MIXED) && !is_dynamic)
params.conf.paged_attention_aligned_seq_len = get_aligned_seq_len(impl_param, stage);
+ if (has_scores_output)
+ out_tensor_to_offset_map.insert({1, out_offsets_map.at(1)});
+
params.set_dynamic_shape_offsets(in_tensor_to_offset_map, out_tensor_to_offset_map);
return params;
}
- static pa_sdpa_kernel_params_t get_pa_sdpa_params(const kernel_impl_params& impl_param, const PagedAttentionStage& stage, bool is_dynamic = false) {
+ static pa_sdpa_kernel_params_t get_pa_sdpa_params(const kernel_impl_params& impl_param,
+ const PagedAttentionStage& stage,
+ const kernel_selector::MultiDataTensor& input_tensors,
+ bool is_dynamic = false) {
const auto desc = impl_param.typed_desc();
auto params = get_default_params(impl_param, is_dynamic);
- const auto& query_layout = impl_param.get_input_layout(0);
- const auto& key_cache_layout = impl_param.get_input_layout(3);
- const auto& value_cache_layout = impl_param.get_input_layout(4);
- const auto& past_lens_layout = impl_param.get_input_layout(5);
- const auto& block_indices_layout = impl_param.get_input_layout(7);
- const auto& block_indices_begins_layout = impl_param.get_input_layout(8);
- const auto& subsequence_begins_layout = impl_param.get_input_layout(6);
- const auto& scale_layout = impl_param.get_input_layout(9);
- const auto& alibi_layout = impl_param.get_input_layout(11);
- const auto has_alibi = alibi_layout.count() > 0;
+ const auto& query_tensor = input_tensors[0];
+ const auto& key_cache_tensor = input_tensors[3];
+ const auto& value_cache_tensor = input_tensors[4];
+ const auto& past_lens_tensor = input_tensors[5];
+ const auto& block_indices_tensor = input_tensors[7];
+ const auto& block_indices_begins_tensor = input_tensors[8];
+ const auto& subsequence_begins_tensor = input_tensors[6];
+ const auto& scale_tensor = input_tensors[9];
+ const auto& alibi_tensor = input_tensors[11];
+
+ const auto has_alibi = impl_param.get_input_layout(11).count() > 0;
const auto has_scale_input = !desc->scale_val.has_value();
+ const auto has_scores_output = desc->has_scores_output();
auto inputs_number = 7;
if (has_scale_input)
@@ -505,28 +615,34 @@ struct paged_attention_impl : multi_stage_primitive {
auto input_idx = 0;
params.inputs.resize(inputs_number);
- params.inputs[input_idx++] = convert_data_tensor(query_layout);
- params.inputs[input_idx++] = convert_data_tensor(key_cache_layout);
- params.inputs[input_idx++] = convert_data_tensor(value_cache_layout);
- params.inputs[input_idx++] = convert_data_tensor(past_lens_layout);
- params.inputs[input_idx++] = convert_data_tensor(block_indices_layout);
- params.inputs[input_idx++] = convert_data_tensor(block_indices_begins_layout);
- params.inputs[input_idx++] = convert_data_tensor(subsequence_begins_layout);
- params.conf = get_sdpa_configuration(impl_param);
+ params.inputs[input_idx++] = query_tensor;
+ params.inputs[input_idx++] = key_cache_tensor;
+ params.inputs[input_idx++] = value_cache_tensor;
+ params.inputs[input_idx++] = past_lens_tensor;
+ params.inputs[input_idx++] = block_indices_tensor;
+ params.inputs[input_idx++] = block_indices_begins_tensor;
+ params.inputs[input_idx++] = subsequence_begins_tensor;
+
+ params.conf = get_sdpa_configuration(impl_param, is_dynamic);
if (has_scale_input)
- params.inputs[input_idx++] = convert_data_tensor(scale_layout);
+ params.inputs[input_idx++] = scale_tensor;
if (has_alibi)
- params.inputs[input_idx++] = convert_data_tensor(alibi_layout);
+ params.inputs[input_idx++] = alibi_tensor;
- params.multi_tokens_mode = stage == PagedAttentionStage::MIXED;
+ if (has_scores_output) {
+ params.outputs.resize(2);
+ params.outputs[1] = convert_data_tensor(impl_param.get_output_layout(1));
+ }
- if ((stage == PagedAttentionStage::GENERATE || stage == PagedAttentionStage::MIXED) && !is_dynamic) {
+ params.stage = stage;
+
+ if (!has_scores_output && !is_dynamic) {
const auto& input_mem = impl_param.memory_deps;
const auto max_context_len = input_mem.at(12);
mem_lock max_context_len_mem_lock(max_context_len, *impl_param.strm);
- params.max_context_len = max_context_len_mem_lock[0];
+ params.conf.paged_attention_max_len = max_context_len_mem_lock[0];
}
const auto& in_offsets_map = impl_param.in_port_to_shape_info_offset;
@@ -552,6 +668,9 @@ struct paged_attention_impl : multi_stage_primitive {
if (has_alibi)
in_tensor_to_offset_map.insert({input_idx++, in_offsets_map.at(11)});
+ if (has_scores_output)
+ out_tensor_to_offset_map.insert({1, out_offsets_map.at(1)});
+
params.set_dynamic_shape_offsets(in_tensor_to_offset_map, out_tensor_to_offset_map);
return params;
@@ -560,14 +679,20 @@ struct paged_attention_impl : multi_stage_primitive {
void update_dispatch_data(const kernel_impl_params& impl_param) override {
const auto stage = get_paged_attention_stage(impl_param);
- auto kv_cache_update_kernel_params = get_kv_cache_update_kernel_params(impl_param, stage, impl_param.is_dynamic());
+ kernel_selector::MultiDataTensor input_tensors;
+ for (const auto& input_layout : impl_param.input_layouts)
+ input_tensors.emplace_back(convert_data_tensor(input_layout));
+
+ auto kv_cache_update_kernel_params = get_kv_cache_update_kernel_params(impl_param, stage, input_tensors, impl_param.is_dynamic());
(_kernels_data[Stage::KV_CACHE_UPDATE].update_dispatch_data_func)(kv_cache_update_kernel_params, _kernels_data[Stage::KV_CACHE_UPDATE]);
if (stage == PagedAttentionStage::PREFILL) {
- auto sdpa_kernel_params = get_sdpa_kernel_params(impl_param, stage, impl_param.is_dynamic());
+ auto sdpa_kernel_params = get_sdpa_kernel_params(impl_param, stage, input_tensors, impl_param.is_dynamic());
(_kernels_data[Stage::SDPA].update_dispatch_data_func)(sdpa_kernel_params, _kernels_data[Stage::SDPA]);
- } else if (stage == PagedAttentionStage::GENERATE || stage == PagedAttentionStage::MIXED) {
- auto pa_sdpa_kernel_params = get_pa_sdpa_params(impl_param, stage, impl_param.is_dynamic());
+ }
+
+ if (stage == PagedAttentionStage::GENERATE || stage == PagedAttentionStage::MIXED || has_scores_output) {
+ auto pa_sdpa_kernel_params = get_pa_sdpa_params(impl_param, stage, input_tensors, impl_param.is_dynamic());
(_kernels_data[Stage::PA_SDPA].update_dispatch_data_func)(pa_sdpa_kernel_params, _kernels_data[Stage::PA_SDPA]);
}
}
@@ -576,20 +701,32 @@ struct paged_attention_impl : multi_stage_primitive {
std::vector kernels_data;
const auto stage = PagedAttentionStage::UNKNOWN;
- auto kv_cache_update_kernel_params = get_kv_cache_update_kernel_params(impl_param, stage, impl_param.is_dynamic());
+ kernel_selector::MultiDataTensor input_tensors;
+ for (const auto& input_layout : impl_param.input_layouts)
+ input_tensors.emplace_back(convert_data_tensor(input_layout));
+
+ auto kv_cache_update_kernel_params = get_kv_cache_update_kernel_params(impl_param, stage, input_tensors, impl_param.is_dynamic());
auto& kv_cache_update_kernel_selector = kv_cache_update_kernel_selector_t::Instance();
kernels_data.push_back(kv_cache_update_kernel_selector.get_best_kernel(kv_cache_update_kernel_params));
- auto sdpa_kernel_params = get_sdpa_kernel_params(impl_param, stage, impl_param.is_dynamic());
+ auto sdpa_kernel_params = get_sdpa_kernel_params(impl_param, stage, input_tensors, impl_param.is_dynamic());
auto& sdpa_kernel_selector = sdpa_kernel_selector_t::Instance();
kernels_data.push_back(sdpa_kernel_selector.get_best_kernel(sdpa_kernel_params));
- auto pa_sdpa_kernel_params = get_pa_sdpa_params(impl_param, stage, impl_param.is_dynamic());
+ auto pa_sdpa_kernel_params = get_pa_sdpa_params(impl_param, stage, input_tensors, impl_param.is_dynamic());
auto& pa_sdpa_kernel_selector = pa_sdpa_kernel_selector_t::Instance();
kernels_data.push_back(pa_sdpa_kernel_selector.get_best_kernel(pa_sdpa_kernel_params));
- return cldnn::make_unique(kernels_data);
+ auto impl = cldnn::make_unique(kernels_data);
+
+ const auto& desc = impl_param.typed_desc();
+ impl->has_scores_output = desc->has_scores_output();
+
+ return impl;
}
+
+private:
+ bool has_scores_output = false;
};
namespace detail {
diff --git a/src/plugins/intel_gpu/src/graph/include/paged_attention_inst.h b/src/plugins/intel_gpu/src/graph/include/paged_attention_inst.h
index a7918ba9c3719c..675d77296aa06b 100644
--- a/src/plugins/intel_gpu/src/graph/include/paged_attention_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/paged_attention_inst.h
@@ -7,14 +7,11 @@
#include "intel_gpu/primitives/paged_attention.hpp"
#include "primitive_inst.h"
+#include "sdpa/pa_sdpa_kernel_opt.h"
+
namespace cldnn {
-enum PagedAttentionStage {
- GENERATE = 0,
- PREFILL = 1,
- MIXED = 2,
- UNKNOWN = 3
-};
+using PagedAttentionStage = kernel_selector::PagedAttentionStage;
PagedAttentionStage get_paged_attention_stage(const kernel_impl_params& impl_param);
@@ -61,6 +58,9 @@ class typed_primitive_inst : public typed_primitive_inst_base prefill_network;
diff --git a/src/plugins/intel_gpu/src/graph/paged_attention.cpp b/src/plugins/intel_gpu/src/graph/paged_attention.cpp
index 787fd184f75b6a..c761aaf63799cd 100644
--- a/src/plugins/intel_gpu/src/graph/paged_attention.cpp
+++ b/src/plugins/intel_gpu/src/graph/paged_attention.cpp
@@ -48,14 +48,38 @@ layout paged_attention_inst::calc_output_layout(const paged_attention_node& /*no
template
std::vector paged_attention_inst::calc_output_layouts(paged_attention_node const& /*node*/, kernel_impl_params const& impl_param) {
- auto out_layout = impl_param.get_input_layout(0);
+ auto data_layout = impl_param.get_input_layout(0);
const auto& key_cache_ps = impl_param.get_input_layout(3).get_partial_shape();
bool valid_block_size = key_cache_ps[3].is_dynamic() || key_cache_ps[3].get_length() == paged_attention::block_size;
OPENVINO_ASSERT(valid_block_size, "[GPU] Incorrect block size for Paged Attention operation. "
"Expected ", paged_attention::block_size, ", but got ", key_cache_ps[3].get_length());
- return {out_layout};
+ std::vector output_layouts{ data_layout };
+
+ const auto& desc = impl_param.typed_desc();
+ if (desc->has_scores_output()) {
+ const auto past_lens_idx = 5;
+ const auto output_dt = data_layout.data_type;
+ if (impl_param.get_input_layout(past_lens_idx).is_static()) {
+ const auto& memory_deps = impl_param.memory_deps;
+ const auto past_lens_mem = memory_deps.at(past_lens_idx);
+ mem_lock past_lens_mem_lock(past_lens_mem, *impl_param.strm);
+
+ long int total_size = 0;
+ for (size_t i = 0; i < past_lens_mem_lock.size(); i++) {
+ total_size += past_lens_mem_lock[i];
+ }
+
+ total_size += static_cast(impl_param.get_input_layout(0).get_shape()[0]);
+
+ output_layouts.push_back(layout{ov::PartialShape{total_size}, output_dt, format::bfyx});
+ } else {
+ output_layouts.push_back(layout{ov::PartialShape::dynamic(1), output_dt, format::bfyx});
+ }
+ }
+
+ return output_layouts;
}
template std::vector
@@ -81,45 +105,79 @@ std::string paged_attention_inst::to_string(const paged_attention_node& node) {
}
void paged_attention_inst::on_execute() {
- auto stage = get_paged_attention_stage(*_impl_params);
+ const auto& desc = _impl_params->typed_desc();
+ const bool has_scores_output = desc->has_scores_output();
+ const auto stage = get_paged_attention_stage(*_impl_params);
- if (stage == PagedAttentionStage::UNKNOWN ||
- stage == PagedAttentionStage::GENERATE)
+ if ((stage == PagedAttentionStage::UNKNOWN) ||
+ (stage == PagedAttentionStage::GENERATE && !has_scores_output))
return;
+ auto& stream = get_network().get_stream();
+ const auto past_lens_mem = past_lens_memory_ptr();
+ const auto subsequence_begins_mem = subsequence_begins_memory_ptr();
+ mem_lock past_lens_mem_lock(past_lens_mem, stream);
+ mem_lock subsequence_begins_mem_lock(subsequence_begins_mem, stream);
+ std::unique_ptr> subsequence_offsets_lock = nullptr;
+
+ if (has_scores_output) {
+ const size_t subsequence_offsets_idx = 4;
+
+ OPENVINO_ASSERT(_intermediates_memory.size() > subsequence_offsets_idx,
+ "[GPU] Unexpected number of intermediates buffers for Paged Attention for scores output calculation");
+
+ auto subsequence_offsets_mem = _intermediates_memory[subsequence_offsets_idx];
+ subsequence_offsets_lock.reset(new mem_lock(subsequence_offsets_mem, stream));
+ }
+
+ if (stage == PagedAttentionStage::GENERATE) {
+ // For the generate stage it's not necessary to configure any other intermediate
+ // buffers. Simply calculate the offsets and exit
+ size_t subsequence_offsets_acc = 0;
+ for (size_t i = 0; i < subsequence_begins_mem_lock.size() - 1; i++) {
+ const auto past_len = past_lens_mem_lock[i];
+ const auto seq_start = subsequence_begins_mem_lock[i];
+ const auto seq_end = subsequence_begins_mem_lock[i + 1];
+ const auto seq_length = seq_end - seq_start;
+
+ if (subsequence_offsets_lock) {
+ subsequence_offsets_lock->operator[](i) = static_cast(subsequence_offsets_acc);
+ subsequence_offsets_acc += seq_length + past_len;
+ }
+ }
+
+ return;
+ }
+
OPENVINO_ASSERT(_intermediates_memory.size() >= 3, "Unexpected number of intermediates buffers for Paged Attention at prefill stage");
const auto blocks_indexes_start_idx = 0;
const auto blocks_indexes_end_idx = 1;
const auto blocked_gws_subseq_mapping_idx = 2;
- const auto past_lens_mem = past_lens_memory_ptr();
- auto subsequence_begins_mem = subsequence_begins_memory_ptr();
auto blocks_indexes_start_mem = _intermediates_memory[blocks_indexes_start_idx];
auto blocks_indexes_end_mem = _intermediates_memory[blocks_indexes_end_idx];
auto blocked_gws_subseq_mapping_mem = _intermediates_memory[blocked_gws_subseq_mapping_idx];
OPENVINO_ASSERT(subsequence_begins_mem->get_layout().data_type == data_types::i32);
- auto& stream = get_network().get_stream();
- mem_lock