diff --git a/.github/dockerfiles/docker_tag b/.github/dockerfiles/docker_tag index 3783a7e8d5600a..bcfa07fb5c24b3 100644 --- a/.github/dockerfiles/docker_tag +++ b/.github/dockerfiles/docker_tag @@ -1 +1 @@ -pr-27430 +pr-27597 diff --git a/.github/dockerfiles/ov_test/debian_10_py310/Dockerfile b/.github/dockerfiles/ov_test/debian_10_py310/Dockerfile new file mode 100644 index 00000000000000..e7dbadf5a414ba --- /dev/null +++ b/.github/dockerfiles/ov_test/debian_10_py310/Dockerfile @@ -0,0 +1,76 @@ +ARG REGISTRY="docker.io" +FROM ${REGISTRY}/library/debian:10.13 + +USER root + +# APT configuration +RUN echo 'Acquire::Retries "10";' > /etc/apt/apt.conf && \ + echo 'APT::Get::Assume-Yes "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::Fix-Broken "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::no-install-recommends "true";' >> /etc/apt/apt.conf + +ENV DEBIAN_FRONTEND="noninteractive" \ + TZ="Europe/London" + +RUN apt-get update && \ + apt-get install \ + git \ + libc6-dev \ + # parallel gzip + pigz \ + # Python + python3 \ + python3-pip \ + python3-dev \ + python3-venv \ + python3-distutils \ + # To build Python 3.10 from source + build-essential \ + libffi-dev \ + libgdbm-dev \ + libc6-dev \ + libssl-dev \ + zlib1g-dev \ + libbz2-dev \ + libreadline-dev \ + libsqlite3-dev \ + libncurses5-dev \ + libncursesw5-dev \ + xz-utils \ + tk-dev \ + libxml2-dev \ + libxmlsec1-dev \ + liblzma-dev \ + wget \ + curl \ + && \ + rm -rf /var/lib/apt/lists/* + +# Install openvino dependencies +ADD scripts/install_dependencies/install_openvino_dependencies.sh /install_openvino_dependencies.sh +RUN chmod +x /install_openvino_dependencies.sh && \ + /install_openvino_dependencies.sh && \ + rm -rf /var/lib/apt/lists/* + +# Setup Python 3.10 +RUN wget https://www.python.org/ftp/python/3.10.9/Python-3.10.9.tar.xz + +RUN tar -xf Python-3.10.9.tar.xz && \ + cd Python-3.10.9 && \ + ./configure --enable-optimizations && \ + make -j 8 && \ + make altinstall + +# Setup pip +ENV PIP_VERSION="24.0" +RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ + python3.10 get-pip.py --no-cache-dir pip==${PIP_VERSION} && \ + rm -f get-pip.py + +# Use Python 3.10 as default instead of Python 3.7 +# Using venv here 'cause other methods to switch the default Python on Ubuntu 20 break both system and wheels build +RUN python3.10 -m venv venv +ENV PATH="/venv/bin:$PATH" + +ENV PIP_CACHE_DIR=/mount/caches/pip/linux/${PIP_VERSION} +ENV PIP_INSTALL_PATH=/venv/lib/python3.10/site-packages diff --git a/.github/dockerfiles/ov_test/fedora_33/Dockerfile b/.github/dockerfiles/ov_test/fedora_33/Dockerfile index c059c82c7d3cf2..6e0fcc7d35156b 100644 --- a/.github/dockerfiles/ov_test/fedora_33/Dockerfile +++ b/.github/dockerfiles/ov_test/fedora_33/Dockerfile @@ -6,7 +6,13 @@ USER root RUN yum update -y && yum install -y \ git \ curl \ - python3 + python3 \ + findutils \ + ocl-icd \ + ocl-icd-devel \ + # parallel gzip + pigz \ + xz # Install Node ENV NODE_VERSION=21.7.3 diff --git a/.github/dockerfiles/ov_test/ubuntu_20_04_x64_py313/Dockerfile b/.github/dockerfiles/ov_test/ubuntu_20_04_x64_py313/Dockerfile new file mode 100644 index 00000000000000..b6b99f81305dee --- /dev/null +++ b/.github/dockerfiles/ov_test/ubuntu_20_04_x64_py313/Dockerfile @@ -0,0 +1,52 @@ +ARG REGISTRY="docker.io" +FROM ${REGISTRY}/library/ubuntu:20.04 + +USER root + +# APT configuration +RUN echo 'Acquire::Retries "10";' > /etc/apt/apt.conf && \ + echo 'APT::Get::Assume-Yes "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::Fix-Broken "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::no-install-recommends "true";' >> /etc/apt/apt.conf + +ENV DEBIAN_FRONTEND="noninteractive" \ + TZ="Europe/London" + +RUN apt-get update && \ + apt-get install software-properties-common && \ + add-apt-repository --yes --no-update ppa:git-core/ppa && \ + add-apt-repository --yes --no-update ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install \ + curl \ + git \ + gpg-agent \ + tzdata \ + # parallel gzip + pigz \ + # Python + python3.13-dev \ + python3.13-venv \ + && \ + rm -rf /var/lib/apt/lists/* + +# Install openvino dependencies +ADD scripts/install_dependencies/install_openvino_dependencies.sh /install_openvino_dependencies.sh +RUN chmod +x /install_openvino_dependencies.sh && \ + /install_openvino_dependencies.sh && \ + rm -rf /var/lib/apt/lists/* + +# Setup pip +ENV PIP_VERSION="24.0" +RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ + python3 get-pip.py --no-cache-dir pip==${PIP_VERSION} && \ + python3.13 get-pip.py --no-cache-dir pip==${PIP_VERSION} && \ + rm -f get-pip.py + +# Use Python 3.13 as default instead of Python 3.8 +# Using venv here 'cause other methods to switch the default Python on Ubuntu 20 break both system and wheels build +RUN python3.13 -m venv venv +ENV PATH="/venv/bin:$PATH" + +ENV PIP_CACHE_DIR=/mount/caches/pip/linux/${PIP_VERSION} +ENV PIP_INSTALL_PATH=/venv/lib/python3.13/site-packages diff --git a/.github/workflows/job_python_api_tests.yml b/.github/workflows/job_python_api_tests.yml index 541a14e2b1b6df..654d634f4f56f3 100644 --- a/.github/workflows/job_python_api_tests.yml +++ b/.github/workflows/job_python_api_tests.yml @@ -101,10 +101,10 @@ jobs: --junitxml=${INSTALL_TEST_DIR}/TEST-Pyngraph.xml \ --ignore=${INSTALL_TEST_DIR}/tests/pyopenvino/tests/test_utils/test_utils.py - - name: Python API Tests -- numpy>=2.0.0 + - name: Python API Tests -- numpy<2.0.0 run: | python3 -m pip uninstall -y numpy - python3 -m pip install "numpy~=2.0.0" + python3 -m pip install "numpy~=1.26.0" python3 -m pip install -r ${INSTALL_TEST_DIR}/tests/bindings/python/requirements_test.txt # for 'template' extension export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}/tests/:$LD_LIBRARY_PATH diff --git a/.github/workflows/manylinux_2014.yml b/.github/workflows/manylinux_2014.yml index bd5da965226a50..aa0b06b6cf05bd 100644 --- a/.github/workflows/manylinux_2014.yml +++ b/.github/workflows/manylinux_2014.yml @@ -69,6 +69,11 @@ jobs: images: | ov_build/ubuntu_22_04_x64_docker ov_build/manylinux2014_x86_64 + ov_test/ubuntu_20_04_x64_py313 + ov_test/ubuntu_22_04_x64 + ov_test/ubuntu_24_04_x64 + ov_test/fedora_33 + ov_test/debian_10_py310 registry: 'openvinogithubactions.azurecr.io' dockerfiles_root_dir: '.github/dockerfiles' changed_components: ${{ needs.smart_ci.outputs.changed_components }} @@ -92,6 +97,7 @@ jobs: OPENVINO_REPO: ${{ github.workspace }}/src INSTALL_DIR: ${{ github.workspace }}/install/openvino INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels + INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests BUILD_DIR: ${{ github.workspace }}/build DOCKER_CONFIG: "/mount/.docker" CMAKE_CXX_COMPILER_LAUNCHER: sccache @@ -135,6 +141,7 @@ jobs: -v ${{ env.OPENVINO_REPO }}:/work/src \ -v ov_build_cache:/work/build \ -v ${{ env.INSTALL_DIR }}:/work/install \ + -v ${{ env.INSTALL_TEST_DIR }}:/work/api_tests \ -e SCCACHE_AZURE_BLOB_CONTAINER \ -e SCCACHE_AZURE_CONNECTION_STRING \ -e SCCACHE_SERVER_PORT \ @@ -148,16 +155,18 @@ jobs: -w /work/src \ ${{ fromJSON(needs.docker.outputs.images).ov_build.manylinux2014_x86_64 }} \ /bin/bash -c " - cmake -DENABLE_CPPLINT=OFF -DENABLE_NCC_STYLE=OFF -DCMAKE_VERBOSE_MAKEFILE=ON -DENABLE_PYTHON=OFF -DENABLE_WHEEL=OFF -S /work/src -B /work/build && + python3.12 -m pip install -r /work/src/src/bindings/python/wheel/requirements-dev.txt + cmake -DPython3_EXECUTABLE=/usr/local/bin/python3.12 -DENABLE_CPPLINT=OFF -DENABLE_NCC_STYLE=OFF -DENABLE_TESTS=ON -DCMAKE_VERBOSE_MAKEFILE=ON -DENABLE_OV_TF_FRONTEND=OFF -DENABLE_OV_TF_LITE_FRONTEND=OFF -DENABLE_OV_PADDLE_FRONTEND=OFF -DENABLE_OV_PYTORCH_FRONTEND=ON -DENABLE_OV_JAX_FRONTEND=OFF -DENABLE_OV_ONNX_FRONTEND=ON -DENABLE_PYTHON=ON -DENABLE_WHEEL=ON -S /work/src -B /work/build && cmake --build /work/build --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} && cmake --install /work/build --config ${{ env.CMAKE_BUILD_TYPE }} --prefix /work/install + cmake --install /work/build --config ${{ env.CMAKE_BUILD_TYPE }} --prefix /work/api_tests --component tests " - name: Pack Artifacts run: mkdir -p ${{ env.BUILD_DIR }} && tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/openvino_package.tar.gz working-directory: ${{ env.INSTALL_DIR }} - - name: Build Python API(Python 3.9-3.13) + - name: Build Python API (Python 3.9-3.13) run: | SUPPORTED_PYTHON_VERSIONS=("39" "310" "311" "312" "313") for PY_VER in "${SUPPORTED_PYTHON_VERSIONS[@]}"; do @@ -190,6 +199,10 @@ jobs: " done + - name: Pack openvino_tests + run: tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/openvino_tests.tar.gz + working-directory: ${{ env.INSTALL_TEST_DIR }} + # # Upload build artifacts # @@ -208,7 +221,15 @@ jobs: name: openvino_wheels path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl if-no-files-found: 'error' - + + - name: Upload openvino tests package + if: ${{ always() }} + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + with: + name: openvino_tests + path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz + if-no-files-found: 'error' + - name: Store artifacts to a shared drive id: store_artifacts if: ${{ always() }} @@ -220,10 +241,34 @@ jobs: ${{ env.INSTALL_WHEELS_DIR }}/wheels storage_dir: ${{ env.PRODUCT_TYPE }} storage_root: ${{ env.ARTIFACTS_SHARE }} - + + Python_API_Tests: + name: Python API tests + needs: [ Docker, Build, Smart_CI ] + uses: ./.github/workflows/job_python_api_tests.yml + strategy: + fail-fast: false + matrix: + include: + - python-version: "3.9" + image: ${{ fromJSON(needs.docker.outputs.images).ov_test.fedora_33 }} + - python-version: "3.10" + image: ${{ fromJSON(needs.docker.outputs.images).ov_test.debian_10_py310 }} + - python-version: "3.11" + image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_22_04_x64 }} + - python-version: "3.12" + image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_24_04_x64 }} + - python-version: "3.13" + image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_x64_py313 }} + with: + runner: 'aks-linux-4-cores-16gb' + container: '{"image": "${{ matrix.image }}", "volumes": ["/mount:/mount"]}' + python-version: ${{ matrix.python-version }} + if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test + Overall_Status: name: ci/gha_overall_status_manylinux2014 - needs: [Smart_CI, Build] + needs: [Smart_CI, Build, Python_API_Tests] if: ${{ always() }} runs-on: ubuntu-latest steps: diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst index c80dc388568004..6e0e21335e50c8 100644 --- a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst +++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst @@ -83,7 +83,7 @@ For setting up a relevant configuration, refer to the :doc:`Integrate with Customer Application <../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` topic (step 3 "Configure input and output"). -.. dropdown:: Device support across OpenVINO 2024.5 distributions +.. dropdown:: Device support across OpenVINO 2024.6 distributions =============== ========== ====== =============== ======== ============ ========== ========== ========== Device Archives PyPI APT/YUM/ZYPPER Conda Homebrew vcpkg Conan npm diff --git a/docs/articles_en/about-openvino/release-notes-openvino.rst b/docs/articles_en/about-openvino/release-notes-openvino.rst index 9e7673d7d0910d..a168d1c44a10c3 100644 --- a/docs/articles_en/about-openvino/release-notes-openvino.rst +++ b/docs/articles_en/about-openvino/release-notes-openvino.rst @@ -16,359 +16,407 @@ OpenVINO Release Notes -2024.5 - 20 November 2024 +2024.6 - 18 December 2024 ############################# :doc:`System Requirements <./release-notes-openvino/system-requirements>` | :doc:`Release policy <./release-notes-openvino/release-policy>` | :doc:`Installation Guides <./../get-started/install-openvino>` - - What's new +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* More GenAI coverage and framework integrations to minimize code changes. - - * New models supported: Llama 3.2 (1B & 3B), Gemma 2 (2B & 9B), and YOLO11. - * LLM support on NPU: Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, Qwen2-7B-Instruct and Phi-3 - Mini-Instruct. - * Noteworthy notebooks added: Sam2, Llama3.2, Llama3.2 - Vision, Wav2Lip, Whisper, and Llava. - * Preview: support for Flax, a high-performance Python neural network library based on JAX. - Its modular design allows for easy customization and accelerated inference on GPUs. - -* Broader Large Language Model (LLM) support and more model compression techniques. - - * Optimizations for built-in GPUs on Intel® Core™ Ultra Processors (Series 1) and Intel® Arc™ - Graphics include KV Cache compression for memory reduction along with improved usability, - and model load time optimizations to improve first token latency for LLMs. - * Dynamic quantization was enabled to improve first token latency for LLMs on built-in - Intel® GPUs without impacting accuracy on Intel® Core™ Ultra Processors (Series 1). Second - token latency will also improve for large batch inference. - * A new method to generate synthetic text data is implemented in the Neural Network - Compression Framework (NNCF). This will allow LLMs to be compressed more accurately using - data-aware methods without datasets. Coming soon: This feature will soon be accessible via - Optimum Intel on Hugging Face. - -* More portability and performance to run AI at the edge, in the cloud, or locally. - - * Support for - `Intel® Xeon® 6 Processors with P-cores `__ - (formerly codenamed Granite Rapids) and - `Intel® Core™ Ultra 200V series processors `__ - (formerly codenamed Arrow Lake-S). - * Preview: GenAI API enables multimodal AI deployment with support for multimodal pipelines - for improved contextual awareness, transcription pipelines for easy audio-to-text - conversions, and image generation pipelines for streamlined text-to-visual conversions. - * Speculative decoding feature added to the GenAI API for improved performance and efficient - text generation using a small draft model that is periodically corrected by the full-size - model. - * Preview: LoRA adapters are now supported in the GenAI API for developers to quickly and - efficiently customize image and text generation models for specialized tasks. - * The GenAI API now also supports LLMs on NPU allowing developers to specify NPU as the - target device, specifically for WhisperPipeline (for whisper-base, whisper-medium, and - whisper-small) and LLMPipeline (for Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, - Qwen2-7B-Instruct and Phi-3 Mini-instruct). Use driver version 32.0.100.3104 or later for - best performance. - -Now deprecated ------------------------------------------------------------------------------------------------ +* OpenVINO 2024.6 LTS release includes updates for enhanced stability and improved LLM performance. +* Introduced support for Intel® Arc™ B-Series Graphics (formerly known as Battlemage) +* Memory optimizations implemented to improve the inference time memory and LLM performance on NPUs. +* Improved LLM performance with GenAI API optimizations and bug fixes. -* Python 3.8 is no longer supported: OpenVINO™ Runtime +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -Common ------------------------------------------------------------------------------------------------ - -* Numpy 2.x has been adopted for all currently supported components, including NNCF. -* A new constant constructor has been added, enabling constants to be created from data pointer - as shared memory. Additionally, it can take ownership of a shared, or other, object, avoiding - a two-step process to wrap memory into ``ov::Tensor``. -* Asynchronous file reading with mmap library has been implemented, reducing loading times for - model files, especially for LLMs. -* CPU implementation of SliceScatter operator is now available, used for models such as Gemma, - supporting increased LLM performance. - - CPU Device Plugin ----------------------------------------------------------------------------------------------- -* Gold support of the Intel® Xeon® 6 platform with P-cores (formerly code name Granite Rapids) - has been reached. -* Support of Intel® Core™ Ultra 200V series processors (formerly codenamed Arrow Lake-S) has - been implemented. -* LLM performance has been further improved with Rotary Position Embedding optimization; Query, - Key, and Value; and multi-layer perceptron fusion optimization. -* FP16 support has been extended with SDPA and PagedAttention, improving performance of LLM via - both native APIs and the vLLM integration. -* Models with LoRA adapters are now supported. - +* KV cache now uses asymmetric U8 as the default precision, reducing memory stress for LLMs and + increasing their performance. This option can be controlled by model meta data. +* Quality and accuracy has been improved for selected models with several bug fixes. GPU Device Plugin ----------------------------------------------------------------------------------------------- -* The KV cache INT8 compression mechanism is now available for all supported GPUs. It enables a - significant reduction in memory consumption, increasing performance with a minimal impact to - accuracy (it affects systolic devices slightly more than non-systolic ones). The feature is - activated by default for non-systolic devices. -* LoRA adapters are now functionally supported on GPU. -* A new feature of GPU weightless blob caching enables caching model structure only and reusing - the weights from the original model file. Use the new OPTIMIZE_SIZE property to activate. -* Dynamic quantization with INT4 and INT8 precisions has been implemented and enabled by - default on Intel® Core™ Ultra platforms, improving LLM first token latency. - +* Device memory copy optimizations have been introduced for inference with **Intel® Arc™ B-Series + Graphics** (formerly known as Battlemage). Since it does not utilize L2 cache for copying memory + between the device and host, a dedicated `copy` operation is used, if inputs or results are + not expected in the device memory. +* ChatGLM4 inference on GPU has been optimized. NPU Device Plugin ----------------------------------------------------------------------------------------------- -* Models retrieved from the OpenVINO cache have a smaller memory footprint now. The plugin - releases the cached model (blob) after weights are loaded in NPU regions. Model export is not - available in this scenario. Memory consumption is reduced during inference execution with one - blob size. This optimization requires the latest NPU driver: 32.0.100.3104. -* A driver bug for ``ov::intel_npu::device_total_mem_size`` has been fixed. The plugin will now - report 2GB as the maximum allocatable memory for any driver that does not support graph - extension 1.8. Even if older drivers report a larger amount of memory to be available, memory - allocation would fail when 2GB are exceeded. Plugin reports the number that driver exposes - for any driver that supports graph extension 1.8 (or newer). -* A new API is used to initialize the model (available in graph extension 1.8). -* Inference request set_tensors is now supported. -* ``ov::device::LUID`` is now exposed on Windows. -* LLM-related improvements have been implemented in terms of both memory usage and performance. -* AvgPool and MaxPool operator support has been extended, adding support for more PyTorch models. - -* NOTE: for systems based on Intel® Core™ Ultra Processors Series 2, more than 16GB of RAM may - be required to use larger models, such as Llama-2-7B, Mistral-0.2-7B, and Qwen-2-7B - (exceeding 4B parameters) with prompt sizes over 1024 tokens. - - -OpenVINO Python API ------------------------------------------------------------------------------------------------ +* LLM performance and inference time has been improved with memory optimizations. -* Constant now can be created from openvino.Tensor. -* The “release_memory” method has been added for a compiled model, improving control over - memory consumption. -OpenVINO Node.js API ------------------------------------------------------------------------------------------------ -* Querying the best device to perform inference of a model with specific operations - is now available in JavaScript API. -* Contribution guidelines have been improved to make it easier for developers to contribute. -* Testing scope has been extended by inference in end-to-end tests. -* JavaScript API samples have been improved for readability and ease of running. +OpenVINO.GenAI ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +* The encrypted_model_causal_lm sample is now available, showing how to decrypt a model. -TensorFlow Framework Support ------------------------------------------------------------------------------------------------ -* TensorFlow 2.18.0, Keras 3.6.0, NumPy 2.0.2 in Python 3.12, and NumPy 1.26.4 in other Python - versions have been added to validation. -* Out-of-the-box conversion with static ranks has been improved by devising a new shape for - Switch-Merge condition sub-graphs. -* Complex type for the following operations is now supported: ExpandDims, Pack, Prod, Rsqrt, - ScatterNd, Sub. -* The following issues have been fixed: - * the corner case with one element in LinSpace to avoid division by zero, - * support FP16 and FP64 input types for LeakyRelu, - * support non-i32/i64 output index type for ArgMin/Max operations. +Other Changes and Known Issues ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Jupyter Notebooks +----------------------------- +* `Visual-language assistant with GLM-Edge-V and OpenVINO `__ +* `Local AI and OpenVINO `__ +* `Multimodal understanding and generation with Janus and OpenVINO `__ -PyTorch Framework Support ------------------------------------------------------------------------------------------------ -* PyTorch version 2.5 is now supported. -* OpenVINO Model Converter (OVC) now supports TorchScript and ExportedProgram saved on a drive. -* The issue of aten.index.Tensor conversion for indices with “None” values has been fixed, - helping to support the HF Stable Diffusion model in ExportedProgram format. -ONNX Framework Support ------------------------------------------------------------------------------------------------ -* ONNX version 1.17.0 is now used. -* Customers' models with DequantizeLinear-21, com.microsoft.MatMulNBits, and - com.microsoft.QuickGelu operations are now supported. -JAX/Flax Framework Support ------------------------------------------------------------------------------------------------ -* JAX 0.4.35 and Flax 0.10.0 has been added to validation. -* jax._src.core.ClosedJaxpr object conversion is now supported. -* Vision Transformer from google-research/vision_transformer is now supported - (with support for 37 new operations). -OpenVINO Model Server -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* The OpenAI API text embedding endpoint has been added, enabling OVMS to be used as a building - block for AI applications like RAG. - `(read more) `__ -* The rerank endpoint has been added based on Cohere API, enabling easy similarity detection - between a query and a set of documents. It is one of the building blocks for AI applications - like RAG and makes integration with frameworks such as langchain easy. - `(read more) `__ -* The following improvements have been done to LLM text generation: - - * The ``echo`` sampling parameter together with ``logprobs`` in the ``completions`` endpoint - is now supported. - * Performance has been increased on both CPU and GPU. - * Throughput in high-concurrency scenarios has been increased with dynamic_split_fuse for GPU. - * Testing coverage and stability has been improved. - * The procedure for service deployment and model repository preparation has been simplified. - -* An experimental version of a Windows binary package - native model server for Windows OS - is - available. This release includes a set of limitations and has limited tests coverage. It is - intended for testing, while the production-ready release is expected with 2025.0. All feedback - is welcome. - - -Neural Network Compression Framework -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* A new nncf.data.generate_text_data() method has been added for generating a synthetic dataset - for LLM compression. This approach helps to compress LLMs more accurately in situations when - the dataset is not available or not sufficient. - `See our example `__ - for more information about the usage. -* Support of data-free and data-aware weight compression methods - nncf.compress_weights() - - has been extended with NF4 per-channel quantization, making compressed LLMs more accurate and - faster on NPU. -* Caching of computed statistics in nncf.compress_weights() is now available, significantly - reducing compression time when performing compression of the same LLM multiple times, with - different compression parameters. To enable it, set the advanced ``statistics_path`` parameter - of nncf.compress_weights() to the desired file path location. -* The ``backup_mode`` optional parameter has been added to nncf.compress_weights(), for - specifying the data type for embeddings, convolutions, and last linear layers during 4-bit - weight compression. Available options are INT8_ASYM (default), INT8_SYM, and NONE (retains - the original floating-point precision of the model weights). In certain situations, - non-default value might give better accuracy of compressed LLMs. -* Preview support is now available for optimizing models in Torch - `FX format `__, nncf.quantize(), and - nncf.compress_weights() methods. After optimization such models can be directly executed - via torch.compile(compressed_model, backend="openvino"). For more details, see - `INT8 quantization example `__. -* Memory consumption of data-aware weight compression methods - nncf.compress_weights() – has - been reduced significantly, with some variation depending on the model and method. -* Support for the following has changed: - - * NumPy 2 added - * PyTorch upgraded to 2.5.1 - * ONNX upgraded to 1.17 - * Python 3.8 discontinued - - - -OpenVINO Tokenizers +Previous 2024 releases +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* Several operations have been introduced and optimized. -* Conversion parameters and environment info have been added to ``rt_info``, improving - reproducibility and debugging. +.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. dropdown:: 2024.5 - 20 November 2024 + :animate: fade-in-slide-down + :color: secondary -OpenVINO.GenAI -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + **What's new** -* The following has been added: + * More GenAI coverage and framework integrations to minimize code changes. - * LoRA adapter for the LLMPipeline. - * Text2ImagePipeline with LoRA adapter and text2image samples. - * VLMPipeline and visual_language_chat sample for text generation models with text and image - inputs. - * WhisperPipeline and whisper_speech_recognition sample. + * New models supported: Llama 3.2 (1B & 3B), Gemma 2 (2B & 9B), and YOLO11. + * LLM support on NPU: Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, Qwen2-7B-Instruct and Phi-3 + Mini-Instruct. + * Noteworthy notebooks added: Sam2, Llama3.2, Llama3.2 - Vision, Wav2Lip, Whisper, and Llava. + * Preview: support for Flax, a high-performance Python neural network library based on JAX. + Its modular design allows for easy customization and accelerated inference on GPUs. -* speculative_decoding_lm has been moved to LLMPipeline based implementation and is now - installed as part of the package. -* On NPU, a set of pipelines has been enabled: WhisperPipeline (for whisper-base, - whisper-medium, and whisper-small), LLMPipeline (for Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, - Qwen2-7B-Instruct, and Phi-3 Mini-instruct). Use driver version 32.0.100.3104 or later for - best performance. + * Broader Large Language Model (LLM) support and more model compression techniques. + * Optimizations for built-in GPUs on Intel® Core™ Ultra Processors (Series 1) and Intel® Arc™ + Graphics include KV Cache compression for memory reduction along with improved usability, + and model load time optimizations to improve first token latency for LLMs. + * Dynamic quantization was enabled to improve first token latency for LLMs on built-in + Intel® GPUs without impacting accuracy on Intel® Core™ Ultra Processors (Series 1). Second + token latency will also improve for large batch inference. + * A new method to generate synthetic text data is implemented in the Neural Network + Compression Framework (NNCF). This will allow LLMs to be compressed more accurately using + data-aware methods without datasets. Coming soon: This feature will soon be accessible via + Optimum Intel on Hugging Face. + * More portability and performance to run AI at the edge, in the cloud, or locally. + * Support for + `Intel® Xeon® 6 Processors with P-cores `__ + (formerly codenamed Granite Rapids) and + `Intel® Core™ Ultra 200V series processors `__ + (formerly codenamed Arrow Lake-S). + * Preview: GenAI API enables multimodal AI deployment with support for multimodal pipelines + for improved contextual awareness, transcription pipelines for easy audio-to-text + conversions, and image generation pipelines for streamlined text-to-visual conversions. + * Speculative decoding feature added to the GenAI API for improved performance and efficient + text generation using a small draft model that is periodically corrected by the full-size + model. + * Preview: LoRA adapters are now supported in the GenAI API for developers to quickly and + efficiently customize image and text generation models for specialized tasks. + * The GenAI API now also supports LLMs on NPU allowing developers to specify NPU as the + target device, specifically for WhisperPipeline (for whisper-base, whisper-medium, and + whisper-small) and LLMPipeline (for Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, + Qwen2-7B-Instruct and Phi-3 Mini-instruct). Use driver version 32.0.100.3104 or later for + best performance. + *Now deprecated* -Other Changes and Known Issues -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + * Python 3.8 is no longer supported: -Jupyter Notebooks ------------------------------ -* `Text-to-Image generation using OpenVINO GenAI `__ -* `Multi LoRA Image Generation `__ -* `Virtual Try-on using OpenVINO and CatVTON `__ -* `Visual Language Assistant using OpenVINO GenAI `__ -* `Speech recognition using OpenVINO GenAI `__ -* `YoloV11 `__ -* `Llama-3.2-vision `__ -* `Pixtral `__ -* `Segment Anything 2 `__ -* `Video Lips-sync using Wav2Lip `__ -* `Convert JAX to OpenVINO tutorial `__ + **OpenVINO™ Runtime** + *Common* -Known Issues ------------------------------ + * Numpy 2.x has been adopted for all currently supported components, including NNCF. + * A new constant constructor has been added, enabling constants to be created from data pointer + as shared memory. Additionally, it can take ownership of a shared, or other, object, avoiding + a two-step process to wrap memory into ``ov::Tensor``. + * Asynchronous file reading with mmap library has been implemented, reducing loading times for + model files, especially for LLMs. + * CPU implementation of SliceScatter operator is now available, used for models such as Gemma, + supporting increased LLM performance. -| **Component: CPU Plugin** -| ID: 155898 -| Description: -| Description: When using new version of Transformer version to convert some of LLMs - (GPT-J/GPT-NeoX or falcon-7b), the inference accuracy may be impacted on 4th or 5th - generation of Intel® Xeon® processors, due to model structure update triggering inference - precision difference in part of the model. The workaround is to use transformer version of - 4.44.2 or lower. -| **Component: GPU Plugin** -| ID: 154583 -| Description: -| LLM accuracy can be low especially on non-systolic platforms like Intel® Core™ Ultra. When - facing the low accuracy issue, user needs to manually set a config ACTIVATION_SCALING_FACOTR - with a value of 8.0 in the compile_model() function. From the next release, scaling factor - value will be automatically applied through updated IR. + *CPU Device Plugin* -| **Component: GenAI** -| ID: 156437, 148933 -| Description: -| When using Python GenAI APIs, if ONNX 17.0 and later is installed, it may encounter the - error “DLL load failed while importing onnx_cpp2py_export: A dynamic link library (DLL) - initialization routine failed.” It is due to the ONNX dependency issue - `onnx/onnx#6267 `__, - Install - `Microsoft Visual C++ Redistributable `__ - latest supported downloads to fix the issue. + * Gold support of the Intel® Xeon® 6 platform with P-cores (formerly code name Granite Rapids) + has been reached. + * Support of Intel® Core™ Ultra 200V series processors (formerly codenamed Arrow Lake-S) has + been implemented. + * LLM performance has been further improved with Rotary Position Embedding optimization; Query, + Key, and Value; and multi-layer perceptron fusion optimization. + * FP16 support has been extended with SDPA and PagedAttention, improving performance of LLM via + both native APIs and the vLLM integration. + * Models with LoRA adapters are now supported. -| **Component: GenAI** -| ID: 156944 -| Description: -| There were backward incompatible changes resulting in different text generated by LLMs like - Mistralai/Mistral-7B-Instruct-v0.2 and TinyLlama/TinyLlama-1.1B-Chat-v1.0 when using a - tokenizer converted by older openvino_tolenizers. A way to resolve the issue is to convert - tokenizer and detokenizer models using the latest openvino_tokenizers. + *GPU Device Plugin* + * The KV cache INT8 compression mechanism is now available for all supported GPUs. It enables a + significant reduction in memory consumption, increasing performance with a minimal impact to + accuracy (it affects systolic devices slightly more than non-systolic ones). The feature is + activated by default for non-systolic devices. + * LoRA adapters are now functionally supported on GPU. + * A new feature of GPU weightless blob caching enables caching model structure only and reusing + the weights from the original model file. Use the new OPTIMIZE_SIZE property to activate. + * Dynamic quantization with INT4 and INT8 precisions has been implemented and enabled by + default on Intel® Core™ Ultra platforms, improving LLM first token latency. + *NPU Device Plugin* + + * Models retrieved from the OpenVINO cache have a smaller memory footprint now. The plugin + releases the cached model (blob) after weights are loaded in NPU regions. Model export is not + available in this scenario. Memory consumption is reduced during inference execution with one + blob size. This optimization requires the latest NPU driver: 32.0.100.3104. + * A driver bug for ``ov::intel_npu::device_total_mem_size`` has been fixed. The plugin will now + report 2GB as the maximum allocatable memory for any driver that does not support graph + extension 1.8. Even if older drivers report a larger amount of memory to be available, memory + allocation would fail when 2GB are exceeded. Plugin reports the number that driver exposes + for any driver that supports graph extension 1.8 (or newer). + * A new API is used to initialize the model (available in graph extension 1.8). + * Inference request set_tensors is now supported. + * ``ov::device::LUID`` is now exposed on Windows. + * LLM-related improvements have been implemented in terms of both memory usage and performance. + * AvgPool and MaxPool operator support has been extended, adding support for more PyTorch models. + + * NOTE: for systems based on Intel® Core™ Ultra Processors Series 2, more than 16GB of RAM may + be required to use larger models, such as Llama-2-7B, Mistral-0.2-7B, and Qwen-2-7B + (exceeding 4B parameters) with prompt sizes over 1024 tokens. + + + *OpenVINO Python API* + + * Constant now can be created from openvino.Tensor. + * The “release_memory” method has been added for a compiled model, improving control over + memory consumption. + + + + *OpenVINO Node.js API* + + * Querying the best device to perform inference of a model with specific operations + is now available in JavaScript API. + * Contribution guidelines have been improved to make it easier for developers to contribute. + * Testing scope has been extended by inference in end-to-end tests. + * JavaScript API samples have been improved for readability and ease of running. + + + + *TensorFlow Framework Support* + + * TensorFlow 2.18.0, Keras 3.6.0, NumPy 2.0.2 in Python 3.12, and NumPy 1.26.4 in other Python + versions have been added to validation. + * Out-of-the-box conversion with static ranks has been improved by devising a new shape for + Switch-Merge condition sub-graphs. + * Complex type for the following operations is now supported: ExpandDims, Pack, Prod, Rsqrt, + ScatterNd, Sub. + * The following issues have been fixed: + + * the corner case with one element in LinSpace to avoid division by zero, + * support FP16 and FP64 input types for LeakyRelu, + * support non-i32/i64 output index type for ArgMin/Max operations. + + + + *PyTorch Framework Support* + + * PyTorch version 2.5 is now supported. + * OpenVINO Model Converter (OVC) now supports TorchScript and ExportedProgram saved on a drive. + * The issue of aten.index.Tensor conversion for indices with “None” values has been fixed, + helping to support the HF Stable Diffusion model in ExportedProgram format. + + + + *ONNX Framework Support* + + * ONNX version 1.17.0 is now used. + * Customers' models with DequantizeLinear-21, com.microsoft.MatMulNBits, and + com.microsoft.QuickGelu operations are now supported. + + *JAX/Flax Framework Support* + + * JAX 0.4.35 and Flax 0.10.0 has been added to validation. + * jax._src.core.ClosedJaxpr object conversion is now supported. + * Vision Transformer from google-research/vision_transformer is now supported + (with support for 37 new operations). + + + **OpenVINO Model Server** + + * The OpenAI API text embedding endpoint has been added, enabling OVMS to be used as a building + block for AI applications like RAG. + `(read more) `__ + * The rerank endpoint has been added based on Cohere API, enabling easy similarity detection + between a query and a set of documents. It is one of the building blocks for AI applications + like RAG and makes integration with frameworks such as langchain easy. + `(read more) `__ + * The following improvements have been done to LLM text generation: + + * The ``echo`` sampling parameter together with ``logprobs`` in the ``completions`` endpoint + is now supported. + * Performance has been increased on both CPU and GPU. + * Throughput in high-concurrency scenarios has been increased with dynamic_split_fuse for GPU. + * Testing coverage and stability has been improved. + * The procedure for service deployment and model repository preparation has been simplified. + + * An experimental version of a Windows binary package - native model server for Windows OS - is + available. This release includes a set of limitations and has limited tests coverage. It is + intended for testing, while the production-ready release is expected with 2025.0. All feedback + is welcome. + + + **Neural Network Compression Framework** + + * A new nncf.data.generate_text_data() method has been added for generating a synthetic dataset + for LLM compression. This approach helps to compress LLMs more accurately in situations when + the dataset is not available or not sufficient. + `See our example `__ + for more information about the usage. + * Support of data-free and data-aware weight compression methods - nncf.compress_weights() - + has been extended with NF4 per-channel quantization, making compressed LLMs more accurate and + faster on NPU. + * Caching of computed statistics in nncf.compress_weights() is now available, significantly + reducing compression time when performing compression of the same LLM multiple times, with + different compression parameters. To enable it, set the advanced ``statistics_path`` parameter + of nncf.compress_weights() to the desired file path location. + * The ``backup_mode`` optional parameter has been added to nncf.compress_weights(), for + specifying the data type for embeddings, convolutions, and last linear layers during 4-bit + weight compression. Available options are INT8_ASYM (default), INT8_SYM, and NONE (retains + the original floating-point precision of the model weights). In certain situations, + non-default value might give better accuracy of compressed LLMs. + * Preview support is now available for optimizing models in Torch + `FX format `__, nncf.quantize(), and + nncf.compress_weights() methods. After optimization such models can be directly executed + via torch.compile(compressed_model, backend="openvino"). For more details, see + `INT8 quantization example `__. + * Memory consumption of data-aware weight compression methods - nncf.compress_weights() – has + been reduced significantly, with some variation depending on the model and method. + * Support for the following has changed: + + * NumPy 2 added + * PyTorch upgraded to 2.5.1 + * ONNX upgraded to 1.17 + * Python 3.8 discontinued + + + + **OpenVINO Tokenizers** + + * Several operations have been introduced and optimized. + * Conversion parameters and environment info have been added to ``rt_info``, improving + reproducibility and debugging. + + + + **OpenVINO.GenAI** + + * The following has been added: + + * LoRA adapter for the LLMPipeline. + * Text2ImagePipeline with LoRA adapter and text2image samples. + * VLMPipeline and visual_language_chat sample for text generation models with text and image + inputs. + * WhisperPipeline and whisper_speech_recognition sample. + + * speculative_decoding_lm has been moved to LLMPipeline based implementation and is now + installed as part of the package. + * On NPU, a set of pipelines has been enabled: WhisperPipeline (for whisper-base, + whisper-medium, and whisper-small), LLMPipeline (for Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, + Qwen2-7B-Instruct, and Phi-3 Mini-instruct). Use driver version 32.0.100.3104 or later for + best performance. + + + + + + **Other Changes and Known Issues** + + *Jupyter Notebooks* + + * `Text-to-Image generation using OpenVINO GenAI `__ + * `Multi LoRA Image Generation `__ + * `Virtual Try-on using OpenVINO and CatVTON `__ + * `Visual Language Assistant using OpenVINO GenAI `__ + * `Speech recognition using OpenVINO GenAI `__ + * `YoloV11 `__ + * `Llama-3.2-vision `__ + * `Pixtral `__ + * `Segment Anything 2 `__ + * `Video Lips-sync using Wav2Lip `__ + * `Convert JAX to OpenVINO tutorial `__ + + + *Known Issues* + + | **Component: CPU Plugin** + | ID: 155898 + | Description: + | Description: When using new version of Transformer version to convert some of LLMs + (GPT-J/GPT-NeoX or falcon-7b), the inference accuracy may be impacted on 4th or 5th + generation of Intel® Xeon® processors, due to model structure update triggering inference + precision difference in part of the model. The workaround is to use transformer version of + 4.44.2 or lower. + + | **Component: GPU Plugin** + | ID: 154583 + | Description: + | LLM accuracy can be low especially on non-systolic platforms like Intel® Core™ Ultra. When + facing the low accuracy issue, user needs to manually set a config ACTIVATION_SCALING_FACOTR + with a value of 8.0 in the compile_model() function. From the next release, scaling factor + value will be automatically applied through updated IR. + + | **Component: GenAI** + | ID: 156437, 148933 + | Description: + | When using Python GenAI APIs, if ONNX 17.0 and later is installed, it may encounter the + error “DLL load failed while importing onnx_cpp2py_export: A dynamic link library (DLL) + initialization routine failed.” It is due to the ONNX dependency issue + `onnx/onnx#6267 `__, + Install + `Microsoft Visual C++ Redistributable `__ + latest supported downloads to fix the issue. + + | **Component: GenAI** + | ID: 156944 + | Description: + | There were backward incompatible changes resulting in different text generated by LLMs like + Mistralai/Mistral-7B-Instruct-v0.2 and TinyLlama/TinyLlama-1.1B-Chat-v1.0 when using a + tokenizer converted by older openvino_tolenizers. A way to resolve the issue is to convert + tokenizer and detokenizer models using the latest openvino_tokenizers. -Previous 2024 releases -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -472,20 +520,20 @@ Previous 2024 releases *NPU Device Plugin* -* `Remote Tensor API `__ - is now supported. -* You can now query the available number of tiles (ov::intel_npu::max_tiles) and force a - specific number of tiles to be used by the model, per inference request - (ov::intel_npu::tiles). **Note:** ov::intel_npu::tiles overrides the default number of tiles - selected by the compiler based on performance hints (ov::hint::performance_mode). Any tile - number other than 1 may be a problem for cross platform compatibility, if not tested - explicitly versus the max_tiles value. -* You can now bypass the model caching mechanism in the driver - (ov::intel_npu::bypass_umd_caching). Read more about driver and OpenVINO caching. -* Memory footprint at model execution has been reduced by one blob (compiled model) size. - For execution, the plugin no longer retrieves the compiled model from the driver, it uses the - level zero graph handle directly, instead. The compiled model is now retrieved from the driver - only during the export method. + * `Remote Tensor API `__ + is now supported. + * You can now query the available number of tiles (ov::intel_npu::max_tiles) and force a + specific number of tiles to be used by the model, per inference request + (ov::intel_npu::tiles). **Note:** ov::intel_npu::tiles overrides the default number of tiles + selected by the compiler based on performance hints (ov::hint::performance_mode). Any tile + number other than 1 may be a problem for cross platform compatibility, if not tested + explicitly versus the max_tiles value. + * You can now bypass the model caching mechanism in the driver + (ov::intel_npu::bypass_umd_caching). Read more about driver and OpenVINO caching. + * Memory footprint at model execution has been reduced by one blob (compiled model) size. + For execution, the plugin no longer retrieves the compiled model from the driver, it uses the + level zero graph handle directly, instead. The compiled model is now retrieved from the driver + only during the export method. *OpenVINO Python API* @@ -1811,6 +1859,4 @@ Copyright © 2024, Intel Corporation. All rights reserved. For more complete information about compiler optimizations, see our Optimization Notice. -Performance varies by use, configuration and other factors. - - +Performance varies by use, configuration and other factors. \ No newline at end of file diff --git a/docs/articles_en/documentation/openvino-extensibility.rst b/docs/articles_en/documentation/openvino-extensibility.rst index 216135009b1806..d166f1390d643d 100644 --- a/docs/articles_en/documentation/openvino-extensibility.rst +++ b/docs/articles_en/documentation/openvino-extensibility.rst @@ -45,7 +45,7 @@ The first part is required for inference. The second part is required for succes Definition of Operation Semantics ################################# -If the custom operation can be mathematically represented as a combination of exiting OpenVINO operations and such decomposition gives desired performance, then low-level operation implementation is not required. Refer to the latest OpenVINO operation set, when deciding feasibility of such decomposition. You can use any valid combination of exiting operations. The next section of this document describes the way to map a custom operation. +If the custom operation can be mathematically represented as a combination of existing OpenVINO operations and such decomposition gives desired performance, then low-level operation implementation is not required. Refer to the latest OpenVINO operation set, when deciding feasibility of such decomposition. You can use any valid combination of existing operations. The next section of this document describes the way to map a custom operation. If such decomposition is not possible or appears too bulky with a large number of constituent operations that do not perform well, then a new class for the custom operation should be implemented, as described in the :doc:`Custom Operation Guide `. diff --git a/docs/articles_en/get-started/configurations/genai-dependencies.rst b/docs/articles_en/get-started/configurations/genai-dependencies.rst index 59d29ef3108da0..4486890c3a40b8 100644 --- a/docs/articles_en/get-started/configurations/genai-dependencies.rst +++ b/docs/articles_en/get-started/configurations/genai-dependencies.rst @@ -4,12 +4,12 @@ OpenVINO™ GenAI Dependencies OpenVINO™ GenAI depends on both `OpenVINO `__ and `OpenVINO Tokenizers `__. During OpenVINO™ GenAI installation from PyPi, the same versions of OpenVINO and OpenVINO Tokenizers -are used (e.g. ``openvino==2024.5.0`` and ``openvino-tokenizers==2024.5.0.0`` are installed for -``openvino-genai==2024.5.0``). +are used (e.g. ``openvino==2024.6.0`` and ``openvino-tokenizers==2024.6.0.0`` are installed for +``openvino-genai==2024.6.0``). -Trying to update any of the dependency packages might result in a version incompatiblibty +Trying to update any of the dependency packages might result in a version incompatibility due to different Application Binary Interfaces (ABIs), which will result in errors while running -OpenVINO GenAI. Having package version in the ``...`` format, allows +OpenVINO GenAI. Having package version in the ``...`` format, enables changing the ```` portion of the full version to ensure ABI compatibility. Changing ````, ```` or ```` part of the version may break ABI. diff --git a/docs/articles_en/get-started/install-openvino.rst b/docs/articles_en/get-started/install-openvino.rst index 48ea0a434c5388..68656e554145a4 100644 --- a/docs/articles_en/get-started/install-openvino.rst +++ b/docs/articles_en/get-started/install-openvino.rst @@ -1,4 +1,4 @@ -Install OpenVINO™ 2024.5 +Install OpenVINO™ 2024.6 ========================== @@ -23,10 +23,11 @@ Install OpenVINO™ 2024.5 -OpenVINO 2024.5, described here, is not a Long-Term-Support version! +OpenVINO 2024.6, described here, is a Long-Term-Support version! All currently supported versions are: -* 2024.5 (development) +* 2025.0 (in development) +* 2024.6 (LTS) * 2023.3 (LTS) diff --git a/docs/articles_en/learn-openvino/llm_inference_guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide.rst index 5846d1a484737c..e1d643648b4be5 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide.rst @@ -20,12 +20,12 @@ Generative AI workflow Generative AI is a specific area of Deep Learning models used for producing new and “original” data, based on input in the form of image, sound, or natural language text. Due to their complexity and size, generative AI pipelines are more difficult to deploy and run efficiently. -OpenVINO simplifies the process and ensures high-performance integrations, with the following +OpenVINO™ simplifies the process and ensures high-performance integrations, with the following options: .. tab-set:: - .. tab-item:: OpenVINO GenAI + .. tab-item:: OpenVINO™ GenAI | - Suggested for production deployment for the supported use cases. | - Smaller footprint and fewer dependencies. @@ -39,6 +39,8 @@ options: text generation loop, tokenization, and scheduling, offering ease of use and high performance. + `Check out the OpenVINO GenAI Quick-start Guide [PDF] `__ + .. tab-item:: Hugging Face integration | - Suggested for prototyping and, if the use case is not covered by OpenVINO GenAI, production. @@ -54,49 +56,34 @@ options: as well as conversion on the fly. For integration with the final product it may offer lower performance, though. -`Check out the GenAI Quick-start Guide [PDF] `__ - -The advantages of using OpenVINO for LLM deployment: - -.. dropdown:: Fewer dependencies and smaller footprint - :animate: fade-in-slide-down - :color: secondary - - Less bloated than frameworks such as Hugging Face and PyTorch, with a smaller binary size and reduced - memory footprint, makes deployments easier and updates more manageable. - -.. dropdown:: Compression and precision management - :animate: fade-in-slide-down - :color: secondary - Techniques such as 8-bit and 4-bit weight compression, including embedding layers, and storage - format reduction. This includes fp16 precision for non-compressed models and int8/int4 for - compressed models, like GPTQ models from `Hugging Face `__. -.. dropdown:: Enhanced inference capabilities - :animate: fade-in-slide-down - :color: secondary +The advantages of using OpenVINO for generative model deployment: - Advanced features like in-place KV-cache, dynamic quantization, KV-cache quantization and - encapsulation, dynamic beam size configuration, and speculative sampling, and more are - available. +| **Fewer dependencies and smaller footprint** +| Less bloated than frameworks such as Hugging Face and PyTorch, with a smaller binary size and reduced + memory footprint, makes deployments easier and updates more manageable. -.. dropdown:: Stateful model optimization - :animate: fade-in-slide-down - :color: secondary +| **Compression and precision management** +| Techniques such as 8-bit and 4-bit weight compression, including embedding layers, and storage + format reduction. This includes fp16 precision for non-compressed models and int8/int4 for + compressed models, like GPTQ models from `Hugging Face `__. - Models from the Hugging Face Transformers are converted into a stateful form, optimizing - inference performance and memory usage in long-running text generation tasks by managing past - KV-cache tensors more efficiently internally. This feature is automatically activated for - many supported models, while unsupported ones remain stateless. Learn more about the - :doc:`Stateful models and State API <../openvino-workflow/running-inference/stateful-models>`. +| **Enhanced inference capabilities** +| Advanced features like in-place KV-cache, dynamic quantization, KV-cache quantization and + encapsulation, dynamic beam size configuration, and speculative sampling, and more are + available. -.. dropdown:: Optimized LLM inference - :animate: fade-in-slide-down - :color: secondary +| **Stateful model optimization** +| Models from the Hugging Face Transformers are converted into a stateful form, optimizing + inference performance and memory usage in long-running text generation tasks by managing past + KV-cache tensors more efficiently internally. This feature is automatically activated for + many supported models, while unsupported ones remain stateless. Learn more about the + :doc:`Stateful models and State API <../openvino-workflow/running-inference/stateful-models>`. - Includes a Python API for rapid development and C++ for further optimization, offering - better performance than Python-based runtimes. +| **Optimized LLM inference** +| Includes a Python API for rapid development and C++ for further optimization, offering + better performance than Python-based runtimes. Proceed to guides on: diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst index 42c1c3fb47aa42..eff30eed054295 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst @@ -28,6 +28,10 @@ make sure to :doc:`install OpenVINO with GenAI <../../get-started/install-openvi .. dropdown:: Text-to-Image Generation + OpenVINO GenAI introduces the openvino_genai.Text2ImagePipeline for inference of text-to-image + models such as: as Stable Diffusion 1.5, 2.1, XL, LCM, Flex, and more. + See the following usage example for reference. + .. tab-set:: .. tab-item:: Python @@ -130,7 +134,7 @@ make sure to :doc:`install OpenVINO with GenAI <../../get-started/install-openvi image_write("baseline.bmp", image) For more information, refer to the - `Python sample `__ + `Python sample `__ .. tab-item:: C++ :sync: cpp @@ -579,8 +583,9 @@ compression is done by NNCF at the model export stage. The exported model contai information necessary for execution, including the tokenizer/detokenizer and the generation config, ensuring that its results match those generated by Hugging Face. -The `LLMPipeline` is the main object used for decoding and handles all the necessary steps. -You can construct it directly from the folder with the converted model. +The `LLMPipeline` is the main object to setup the model for text generation. You can provide the +converted model to this object, specify the device for inference, and provide additional +parameters. .. tab-set:: @@ -911,7 +916,7 @@ running the following code: GenAI API ####################################### -The use case described here uses the following OpenVINO GenAI API methods: +The use case described here uses the following OpenVINO GenAI API classes: * generation_config - defines a configuration class for text generation, enabling customization of the generation process such as the maximum length of @@ -921,7 +926,6 @@ The use case described here uses the following OpenVINO GenAI API methods: text generation, and managing outputs with configurable options. * streamer_base - an abstract base class for creating streamers. * tokenizer - the tokenizer class for text encoding and decoding. -* visibility - controls the visibility of the GenAI library. Learn more from the `GenAI API reference `__. diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-model-preparation.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-model-preparation.rst index 53b8d5440ca855..e6d15675ea45b8 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-model-preparation.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-model-preparation.rst @@ -7,8 +7,8 @@ Generative Model Preparation -Since generative AI models tend to be big and resource-heavy, it is advisable to store them -locally and optimize for efficient inference. This article will show how to prepare +Since generative AI models tend to be big and resource-heavy, it is advisable to +optimize them for efficient inference. This article will show how to prepare LLM models for inference with OpenVINO by: * `Downloading Models from Hugging Face <#download-generative-models-from-hugging-face-hub>`__ diff --git a/docs/dev/ov_dependencies.txt b/docs/dev/ov_dependencies.txt index d9c344d2c3048d..cb64e4d5a6534c 100644 --- a/docs/dev/ov_dependencies.txt +++ b/docs/dev/ov_dependencies.txt @@ -1,6 +1,6 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -#This file provides a comprehensive list of all dependencies of OpenVINO 2024.5 +#This file provides a comprehensive list of all dependencies of OpenVINO 2024.6 #The file is part of the automation pipeline for posting OpenVINO IR models on the HuggingFace Hub, including OneBOM dependency checks. diff --git a/docs/sphinx_setup/_static/download/GenAI_Quick_Start_Guide.pdf b/docs/sphinx_setup/_static/download/GenAI_Quick_Start_Guide.pdf index 90ad7bd6b000b4..13edfc8f0b7bc2 100644 Binary files a/docs/sphinx_setup/_static/download/GenAI_Quick_Start_Guide.pdf and b/docs/sphinx_setup/_static/download/GenAI_Quick_Start_Guide.pdf differ diff --git a/docs/sphinx_setup/index.rst b/docs/sphinx_setup/index.rst index ad98be58cde1cd..1e5233ac064d0f 100644 --- a/docs/sphinx_setup/index.rst +++ b/docs/sphinx_setup/index.rst @@ -25,16 +25,16 @@ hardware and environments, on-premises and on-device, in the browser or in the c
    +
  • +

    New GenAI API

    +

    Generative AI in only a few lines of code!

    + Check out our guide +
  • OpenVINO models on Hugging Face!

    Get pre-optimized OpenVINO models, no need to convert!

    Visit Hugging Face
  • -
  • -

    New Generative AI API

    -

    Generate text with LLMs in only a few lines of code!

    - Check out our guide -
  • Improved model serving

    OpenVINO Model Server has improved parallel inferencing!

    diff --git a/src/bindings/js/node/package.json b/src/bindings/js/node/package.json index 10fc6d38bd51f4..c0e4e03ddc4df6 100644 --- a/src/bindings/js/node/package.json +++ b/src/bindings/js/node/package.json @@ -51,6 +51,17 @@ "host": "https://storage.openvinotoolkit.org" }, "keywords": [ - "OpenVINO" + "OpenVINO", + "openvino", + "openvino-node", + "openvino npm", + "openvino binding", + "openvino node.js", + "openvino library", + "intel openvino", + "openvino toolkit", + "openvino API", + "openvino SDK", + "openvino integration" ] } diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt index cc1d4514b7bbfe..4d65603a5323ab 100644 --- a/src/bindings/python/constraints.txt +++ b/src/bindings/python/constraints.txt @@ -1,5 +1,5 @@ # used in multiple components -numpy>=1.16.6,<2.2.0 # Python bindings, frontends +numpy>=1.16.6,<2.3.0 # Python bindings, frontends # pytest pytest>=5.0,<8.4 diff --git a/src/bindings/python/requirements.txt b/src/bindings/python/requirements.txt index a2d63161fe764c..febb91d5ecee55 100644 --- a/src/bindings/python/requirements.txt +++ b/src/bindings/python/requirements.txt @@ -1,3 +1,3 @@ -numpy>=1.16.6,<2.2.0 +numpy>=1.16.6,<2.3.0 openvino-telemetry>=2023.2.1 packaging diff --git a/src/bindings/python/src/openvino/__init__.py b/src/bindings/python/src/openvino/__init__.py index e4d1a247520332..69c678909b1c9e 100644 --- a/src/bindings/python/src/openvino/__init__.py +++ b/src/bindings/python/src/openvino/__init__.py @@ -27,11 +27,11 @@ from openvino import properties as properties # Import most important classes and functions from openvino.runtime -from openvino.runtime import Model -from openvino.runtime import Core -from openvino.runtime import CompiledModel -from openvino.runtime import InferRequest -from openvino.runtime import AsyncInferQueue +from openvino._ov_api import Model +from openvino._ov_api import Core +from openvino._ov_api import CompiledModel +from openvino._ov_api import InferRequest +from openvino._ov_api import AsyncInferQueue from openvino.runtime import Symbol from openvino.runtime import Dimension @@ -43,12 +43,13 @@ from openvino.runtime import Tensor from openvino.runtime import OVAny -from openvino.runtime import compile_model +# Helper functions for openvino module +from openvino.runtime.utils.data_helpers import tensor_from_file +from openvino._ov_api import compile_model from openvino.runtime import get_batch from openvino.runtime import set_batch from openvino.runtime import serialize from openvino.runtime import shutdown -from openvino.runtime import tensor_from_file from openvino.runtime import save_model from openvino.runtime import layout_helpers diff --git a/src/bindings/python/src/openvino/runtime/ie_api.py b/src/bindings/python/src/openvino/_ov_api.py similarity index 100% rename from src/bindings/python/src/openvino/runtime/ie_api.py rename to src/bindings/python/src/openvino/_ov_api.py diff --git a/src/bindings/python/src/openvino/runtime/exceptions.py b/src/bindings/python/src/openvino/exceptions.py similarity index 100% rename from src/bindings/python/src/openvino/runtime/exceptions.py rename to src/bindings/python/src/openvino/exceptions.py diff --git a/src/bindings/python/src/openvino/opset8/ops.py b/src/bindings/python/src/openvino/opset8/ops.py index 05b97390baa780..6995d55a28a776 100644 --- a/src/bindings/python/src/openvino/opset8/ops.py +++ b/src/bindings/python/src/openvino/opset8/ops.py @@ -7,7 +7,7 @@ from typing import List, Optional, Tuple import numpy as np -from openvino.runtime.exceptions import UserInputError +from openvino.exceptions import UserInputError from openvino.op import Constant, Parameter, if_op from openvino.runtime import Node from openvino.runtime.opset_utils import _get_node_factory diff --git a/src/bindings/python/src/openvino/runtime/exceptions/__init__.py b/src/bindings/python/src/openvino/runtime/exceptions/__init__.py new file mode 100644 index 00000000000000..18524a21f7d468 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/exceptions/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.exceptions import OVError +from openvino.exceptions import UserInputError +from openvino.exceptions import OVTypeError diff --git a/src/bindings/python/src/openvino/runtime/ie_api/__init__.py b/src/bindings/python/src/openvino/runtime/ie_api/__init__.py new file mode 100644 index 00000000000000..a861224b67eded --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/ie_api/__init__.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino._ov_api import Core +from openvino._ov_api import CompiledModel +from openvino._ov_api import InferRequest +from openvino._ov_api import Model +from openvino._ov_api import AsyncInferQueue + +from openvino._ov_api import tensor_from_file +from openvino._ov_api import compile_model diff --git a/src/bindings/python/src/pyopenvino/graph/preprocess/pre_post_process.cpp b/src/bindings/python/src/pyopenvino/graph/preprocess/pre_post_process.cpp index dee95c6a832d2c..a19f2b2f482337 100644 --- a/src/bindings/python/src/pyopenvino/graph/preprocess/pre_post_process.cpp +++ b/src/bindings/python/src/pyopenvino/graph/preprocess/pre_post_process.cpp @@ -191,7 +191,7 @@ static void regclass_graph_PreProcessSteps(py::module m) { :param pads_end: Number of elements matches the number of indices in data attribute. Specifies the number of padding elements at the ending of each axis. :type pads_end: 1D tensor of type T_INT. :param value: All new elements are populated with this value or with 0 if input not provided. Shouldn’t be set for other pad_mode values. - :type value: scalar tensor of type T. + :type value: scalar tensor of type T. :param mode: pad_mode specifies the method used to generate new element values. :type mode: string :return: Reference to itself, allows chaining of calls in client's code in a builder-like manner. @@ -219,7 +219,7 @@ static void regclass_graph_PreProcessSteps(py::module m) { :param pads_end: Number of elements matches the number of indices in data attribute. Specifies the number of padding elements at the ending of each axis. :type pads_end: 1D tensor of type T_INT. :param value: All new elements are populated with this value or with 0 if input not provided. Shouldn’t be set for other pad_mode values. - :type value: scalar tensor of type T. + :type value: scalar tensor of type T. :param mode: pad_mode specifies the method used to generate new element values. :type mode: string :return: Reference to itself, allows chaining of calls in client's code in a builder-like manner. @@ -308,7 +308,8 @@ static void regclass_graph_InputTensorInfo(py::module m) { }, py::arg("layout"), R"( - Set layout for input tensor info + Set layout for input tensor info + :param layout: layout to be set :type layout: Union[str, openvino.runtime.Layout] )"); @@ -422,7 +423,8 @@ static void regclass_graph_OutputTensorInfo(py::module m) { }, py::arg("layout"), R"( - Set layout for output tensor info + Set layout for output tensor info + :param layout: layout to be set :type layout: Union[str, openvino.runtime.Layout] )"); @@ -475,7 +477,8 @@ static void regclass_graph_OutputModelInfo(py::module m) { }, py::arg("layout"), R"( - Set layout for output model info + Set layout for output model info + :param layout: layout to be set :type layout: Union[str, openvino.runtime.Layout] )"); diff --git a/src/bindings/python/tests/test_runtime/test_input_node.py b/src/bindings/python/tests/test_runtime/test_input_node.py index 5e083051934afb..c12eb085317afc 100644 --- a/src/bindings/python/tests/test_runtime/test_input_node.py +++ b/src/bindings/python/tests/test_runtime/test_input_node.py @@ -75,7 +75,8 @@ def test_input_get_source_output(device): net_input = compiled_model.output(0) input_node = net_input.get_node().inputs()[0] name = input_node.get_source_output().get_node().get_friendly_name() - assert name == "relu" + # Expected ReLu node name can be changed if conversion precision applied (new Convert node added) + assert name in ("relu", "relu.0") def test_input_get_tensor(device): diff --git a/src/bindings/python/tests/test_runtime/test_ovdict.py b/src/bindings/python/tests/test_runtime/test_ovdict.py index e7a5854d66d072..cf332bb0997dfb 100644 --- a/src/bindings/python/tests/test_runtime/test_ovdict.py +++ b/src/bindings/python/tests/test_runtime/test_ovdict.py @@ -9,7 +9,7 @@ import openvino.runtime.opset13 as ops from openvino import Core, CompiledModel, InferRequest, Model from openvino.runtime import ConstOutput -from openvino.runtime.ie_api import OVDict +from openvino.runtime.utils.data_helpers import OVDict def _get_ovdict( diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index 8a2985a284769a..d5e96ddafc252f 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -8,6 +8,7 @@ #include #include "itt.hpp" +#include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "openvino/op/ops.hpp" #include "openvino/pass/constant_folding.hpp" #include "openvino/pass/manager.hpp" @@ -207,7 +208,8 @@ bool convert_function_precision(const std::shared_ptr& f, bool is_changed, bool is_subgraph, bool convert_input_output_precision, - bool store_original_precision_as_rt_attribute) { + bool store_original_precision_as_rt_attribute, + bool names_compatibility_mode) { bool is_output_precision_changed = false; ov::element::TypeVector orig_result_types; @@ -276,7 +278,8 @@ bool convert_function_precision(const std::shared_ptr& f, is_changed || is_output_precision_changed, true, true, - store_original_precision_as_rt_attribute) || + store_original_precision_as_rt_attribute, + names_compatibility_mode) || is_changed; } } @@ -324,18 +327,21 @@ bool convert_function_precision(const std::shared_ptr& f, if (result->get_input_element_type(0) != orig_result_types[i]) { auto result_input = result->input_value(0); const auto convert = std::make_shared(result_input, orig_result_types[i]); - if (result_input.get_node()->get_output_size() > 1) { - convert->set_friendly_name(result_input.get_node()->get_friendly_name() + "." + - std::to_string(result_input.get_index())); + + auto convert_f_name = result_input.get_node()->get_friendly_name(); + if (names_compatibility_mode) { + if (result_input.get_node()->get_output_size() > 1) { + convert_f_name += '.' + std::to_string(result_input.get_index()); + } else { + result_input.get_node()->set_friendly_name(""); + } + + convert->get_output_tensor(0).set_names(result_input.get_names()); } else { - convert->set_friendly_name(result_input.get_node()->get_friendly_name()); - result_input.get_node()->set_friendly_name(""); + convert_f_name += '.' + std::to_string(result_input.get_index()); } + convert->set_friendly_name(convert_f_name); - auto& convert_output_tensor = convert->get_output_tensor(0); - convert_output_tensor.set_names(result_input.get_names()); - - result_input.set_names({}); result->input(0).replace_source_output(convert->output(0)); result->revalidate_and_infer_types(); } @@ -358,6 +364,8 @@ bool convert_precision(ov::pass::PassBase& pass, // changing precision we need to understand which Constant consumers belongs // to the current ov::Model std::unordered_map>> const_to_internal_output; + + const auto names_compatibility_mode = f->has_rt_info("version") && f->get_rt_info("version") < 11; return convert_function_precision(f, type_to_fuse, type_to_extend, @@ -368,7 +376,8 @@ bool convert_precision(ov::pass::PassBase& pass, false, false, convert_input_output_precision, - store_original_precision_as_rt_attribute); + store_original_precision_as_rt_attribute, + names_compatibility_mode); } using precisions_set_t = std::unordered_set; @@ -1405,6 +1414,13 @@ bool fuse_type_to_constant(const std::shared_ptr& node, new_const->validate_and_infer_types(); new_const->set_friendly_name(constant->get_friendly_name()); ov::copy_runtime_info(constant, new_const); + + const auto& rt_info = node->get_rt_info(); + auto weightless_caching_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static()); + if (weightless_caching_attr != rt_info.end()) { + new_const->get_rt_info()[ov::WeightlessCacheAttribute::get_type_info_static()] = + weightless_caching_attr->second; + } return true; } return false; diff --git a/src/common/transformations/tests/control_flow/unroll_if_test.cpp b/src/common/transformations/tests/control_flow/unroll_if_test.cpp index 9c3ac5ea677802..28a2315ca60c0c 100644 --- a/src/common/transformations/tests/control_flow/unroll_if_test.cpp +++ b/src/common/transformations/tests/control_flow/unroll_if_test.cpp @@ -23,9 +23,15 @@ #include "transformations/init_node_info.hpp" #include "transformations/rt_info/fused_names_attribute.hpp" -using namespace ov; using namespace testing; +namespace ov { +namespace test { +using op::v0::Constant; +using op::v0::Parameter; +using op::v0::Result; +using op::v1::Add; + std::shared_ptr get_then_body() { auto Xt = std::make_shared(ov::element::f32, ov::Shape{3}); Xt->set_friendly_name("Xt"); @@ -350,3 +356,60 @@ TEST(TransformationTests, UnrollIfInsideIf) { auto res = compare_functions(f, f_ref); ASSERT_TRUE(res.first) << res.second; } + +TEST(TransformationTests, UnrollIfToParameterResultModel) { + constexpr auto et = element::f32; + std::shared_ptr model, model_ref; + + { + const auto a = std::make_shared(et, PartialShape{5, 7}); + const auto b = std::make_shared(et, PartialShape{1}); + const auto c = std::make_shared(et, PartialShape{5, 7}); + + const auto then_add = std::make_shared(a, b); + auto then_result = std::make_shared(then_add); + auto else_result = std::make_shared(c); + + const auto then_body = std::make_shared(OutputVector{then_result}, ParameterVector{a, b}); + const auto else_body = std::make_shared(OutputVector{else_result}, ParameterVector{c}); + + const auto if_input_0 = std::make_shared(et, a->get_output_partial_shape(0)); + const auto if_input_1 = std::make_shared(et, b->get_output_partial_shape(0)); + const auto condition = Constant::create(element::boolean, {1}, {false}); + const auto if_op = std::make_shared(condition); + if_op->set_then_body(then_body); + if_op->set_else_body(else_body); + if_op->set_input(if_input_0, a, c); + if_op->set_input(if_input_1, b, nullptr); + const auto if_result = if_op->set_output(then_result, else_result); + + const auto results = ResultVector{std::make_shared(if_result)}; + model = std::make_shared(results, ParameterVector{if_input_0, if_input_1}, "simple_if"); + model->input(0).set_names({"Input.0"}); + model->input(1).set_names({"Input.1"}); + model->output(0).set_names({"Output"}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(model); + + OV_ASSERT_NO_THROW(check_rt_info(model)); + } + { + const auto p = std::make_shared(et, PartialShape{5, 7}); + const auto r = std::make_shared(p); + model_ref = std::make_shared(ResultVector{r}, ParameterVector{p}, "simple_if"); + model_ref->input(0).set_names({"Input.0"}); + model_ref->output(0).set_names({"Output"}); + } + + const auto cmp_result = compare_functions(model, model_ref); + ASSERT_TRUE(cmp_result.first) << cmp_result.second; + + EXPECT_THAT(model->input(0).get_names(), UnorderedElementsAre("Input.0", "Output")); + EXPECT_THAT(model->output(0).get_names(), UnorderedElementsAre("Output")); +} + +} // namespace test +} // namespace ov diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index 318f15ab1a64dc..f4bdedf4764604 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -13,6 +13,7 @@ #include "common_test_utils/ov_test_utils.hpp" #include "openvino/core/model.hpp" +#include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "openvino/opsets/opset1.hpp" #include "openvino/opsets/opset10.hpp" #include "openvino/opsets/opset15.hpp" @@ -2196,8 +2197,9 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsForParameterAndResult) auto param_1 = make_shared(element::f64, Shape{3}); auto converted_param = make_shared(param_1, element::f32); auto sin = make_shared(converted_param); + sin->get_output_tensor(0).add_names({"sine:0"}); auto converted_sin = make_shared(sin, element::f64); - converted_sin->get_output_tensor(0).add_names({"sine:0"}); + converted_sin->set_friendly_name("sine.0"); auto result_sin = make_shared(converted_sin); model_ref = make_shared(result_sin, ParameterVector{param_1}); } @@ -2207,7 +2209,7 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsForParameterAndResult) ASSERT_TRUE(result.valid) << result.message; const auto& results = model->get_results(); - ASSERT_EQ("sine", results[0]->get_input_node_ptr(0)->get_friendly_name()); + ASSERT_EQ("sine.0", results[0]->get_input_node_ptr(0)->get_friendly_name()); } TEST(TransformationTests, ConvertPrecisionExplicitConvertsMultiParam) { @@ -2271,8 +2273,8 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsMultiParam) { auto converted_mul = make_shared(mul, element::f64); auto sin = make_shared(convert_1); - converted_add->get_output_tensor(0).add_names({"add:0"}); - converted_mul->get_output_tensor(0).add_names({"mul:0"}); + add->get_output_tensor(0).add_names({"add:0"}); + mul->get_output_tensor(0).add_names({"mul:0"}); sin->get_output_tensor(0).add_names({"sine:0"}); auto result_add = make_shared(converted_add); @@ -2288,8 +2290,8 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsMultiParam) { ASSERT_TRUE(result.valid) << result.message; const auto& results = model->get_results(); - ASSERT_EQ("add", results[0]->get_input_node_ptr(0)->get_friendly_name()); - ASSERT_EQ("mul", results[1]->get_input_node_ptr(0)->get_friendly_name()); + ASSERT_EQ("add.0", results[0]->get_input_node_ptr(0)->get_friendly_name()); + ASSERT_EQ("mul.0", results[1]->get_input_node_ptr(0)->get_friendly_name()); ASSERT_EQ("sine", results[2]->get_input_node_ptr(0)->get_friendly_name()); } @@ -2305,6 +2307,8 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsSingleNodeMultipleOutp split->get_output_tensor(1).add_names({"split:1"}); split->get_output_tensor(2).add_names({"split:2"}); model = make_shared(split->outputs(), ParameterVector{param_1}); + // set version 10 to use names compatibility mode + model->get_rt_info()["version"] = static_cast(10); type_to_fuse_map empty_type_to_fuse_map = {}; bool keep_precision_sensitive_in_fp32 = false; @@ -2321,6 +2325,9 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsSingleNodeMultipleOutp auto convert_1 = make_shared(param_1, element::f32); auto axis = opset10::Constant::create(element::i32, Shape{}, {0}); auto split = make_shared(convert_1, axis, 3); + split->get_output_tensor(0).add_names({"split:0"}); + split->get_output_tensor(1).add_names({"split:1"}); + split->get_output_tensor(2).add_names({"split:2"}); auto convert_split_0 = make_shared(split->output(0), element::f64); auto convert_split_1 = make_shared(split->output(1), element::f64); @@ -2389,6 +2396,8 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsMultiSubgraphs) { result.get_node()->set_friendly_name("if_result"); result.add_names({"if_result:0"}); model = make_shared(OutputVector{result}, ParameterVector{cond, param_1, param_2}); + // set version 10 to use names compatibility mode + model->get_rt_info()["version"] = static_cast(10); type_to_fuse_map empty_type_to_fuse_map = {}; bool keep_precision_sensitive_in_fp32 = false; @@ -2442,6 +2451,7 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsMultiSubgraphs) { if_op->set_input(convert_1, param_1_then, param_1_else); if_op->set_input(convert_2, param_2_then, param_2_else); auto result = if_op->set_output(result_then, result_else); + result.add_names({"if_result:0"}); auto converted_result = make_shared(result, element::f64); converted_result->get_output_tensor(0).add_names({"if_result:0"}); @@ -2702,3 +2712,38 @@ TEST(TransformationTests, ConvertPrecision_assign_read_value_preserve_orig_types FunctionsComparator::Result result = func_comparator(model_ref, model); ASSERT_TRUE(result.valid) << result.message; } + +TEST(TransformationTests, ConvertPrecision_assign_read_value_preserve_weightless_cache_info_as_rt_attribute) { + pass::Manager manager; + + auto some_value = opset10::Constant::create(element::f32, Shape{1}, {2}); + auto& node_rt_info = some_value->get_rt_info(); + ov::WeightlessCacheAttribute attr(element::f32.size(), 0, element::f32); + node_rt_info[ov::WeightlessCacheAttribute::get_type_info_static()] = attr; + + ov::ParameterVector inputParams; + ov::ResultVector results; + results.push_back(std::make_shared(some_value->output(0))); + auto model = std::make_shared(results, inputParams); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = false; + bool convert_input_output_precision = false; + bool store_original_precision_as_rt_attribute = true; + manager.register_pass(precisions_map{{element::f32, element::f16}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32, + convert_input_output_precision, + store_original_precision_as_rt_attribute); + manager.run_passes(model); + + const auto& ops = model->get_ops(); + auto it = std::find_if(ops.begin(), ops.end(), [](const std::shared_ptr& node) { + return ov::op::util::is_constant(node); + }); + + ASSERT_TRUE(it != ops.end()); + const auto& new_rt_info = (*it)->get_rt_info(); + auto weightless_caching_attr_it = new_rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static()); + ASSERT_TRUE(weightless_caching_attr_it != new_rt_info.end()); +} diff --git a/src/core/dev_api/openvino/core/descriptor_tensor.hpp b/src/core/dev_api/openvino/core/descriptor_tensor.hpp index 9418183a1189fc..cdd9ba4c2bbab8 100644 --- a/src/core/dev_api/openvino/core/descriptor_tensor.hpp +++ b/src/core/dev_api/openvino/core/descriptor_tensor.hpp @@ -1,12 +1,21 @@ // Copyright (C) 2018-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#pragma once -#include "openvino/core/descriptor/tensor.hpp" +#include +#include + +#include "openvino/core/partial_shape.hpp" +#include "openvino/core/type/element_type.hpp" namespace ov { namespace descriptor { +class Tensor; +class Input; +class Output; + // To change Tensor element type please change the Parameter type. OPENVINO_API void set_element_type(Tensor& tensor, const element::Type& elemenet_type); @@ -14,5 +23,70 @@ void set_element_type(Tensor& tensor, const element::Type& elemenet_type); // To change Tensor type please change the Parameter type. OPENVINO_API void set_tensor_type(Tensor& tensor, const element::Type& element_type, const PartialShape& pshape); + +/** + * @brief Set destination tensor names as copy of all names from source tensor all tensor names. + * + * @param dst The tensor descriptor to set names. + * @param src The tensor descriptor as from which names will be copied. + */ +OPENVINO_API +void copy_tensor_names(Tensor& dst, const Tensor& src); + +/** @brief Tensor descriptor interface. */ +class OPENVINO_API ITensorDescriptor { +public: + virtual const element::Type& get_element_type() const = 0; + virtual const PartialShape& get_partial_shape() const = 0; + virtual const Shape& get_shape() const = 0; + virtual void set_type_shape(const element::Type& et, const PartialShape& shape) = 0; + + virtual void set_names(const std::unordered_set& names) = 0; + virtual void add_names(const std::unordered_set& names) = 0; + virtual const std::unordered_set& get_names() const = 0; + virtual const std::unordered_set& get_all_names() const = 0; + virtual const std::string& get_any_name() const = 0; + + virtual RTMap& rt_map() = 0; + virtual const RTMap& rt_map() const = 0; + virtual size_t pointer_hash() const noexcept = 0; + +protected: + virtual ~ITensorDescriptor(); +}; + +/** @brief The TensorExtension defines developer API for ov::descriptor::Tensor. */ +struct OPENVINO_API TensorExtension { + /** + * @brief Get the tensor descriptor object + * + * @param tensor Tensor descriptor to access its implementation. + * @return Reference to Tensor description implementation. + */ + static const ITensorDescriptor& get_descriptor(const Tensor& tensor); + static std::shared_ptr& get_descriptor_ptr(Tensor& tensor); + + /** + * @brief The hasher of shared pointer Tensor descriptor. + */ + struct OPENVINO_API Hasher { + size_t operator()(const std::shared_ptr& tensor) const; + }; + + /** + * @brief The comparator of shared pointer Tensor descriptor. + */ + struct OPENVINO_API Equal { + bool operator()(const std::shared_ptr& lhs, const std::shared_ptr& rhs) const; + }; +}; + +/** + * @brief Set input descriptor as shared tensor on output descriptor. + * + * @param output_descriptor Descriptor to set shared tensor. + * @param input_descriptor Input descriptor to set in output as shared tensor. + */ +OPENVINO_API void set_shared_tensor(Output& output_descriptor, const Input& input_descriptor); } // namespace descriptor } // namespace ov diff --git a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp index fedcb030fb52cf..e3cf2609b26c8d 100644 --- a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp +++ b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp @@ -5,6 +5,7 @@ #pragma once #include "openvino/core/core_visibility.hpp" +#include "openvino/core/node.hpp" #include "openvino/core/runtime_attribute.hpp" namespace ov { @@ -25,14 +26,16 @@ class OPENVINO_API WeightlessCacheAttribute : public RuntimeAttribute { WeightlessCacheAttribute() = delete; - WeightlessCacheAttribute(size_t original_size, size_t bin_offset) + WeightlessCacheAttribute(size_t original_size, size_t bin_offset, ov::element::Type original_dtype) : original_size(original_size), - bin_offset(bin_offset) {} + bin_offset(bin_offset), + original_dtype(original_dtype) {} bool is_copyable() const override; size_t original_size; size_t bin_offset; + ov::element::Type original_dtype; }; } // namespace ov diff --git a/src/core/include/openvino/core/descriptor/input.hpp b/src/core/include/openvino/core/descriptor/input.hpp index cbedde6d73a118..2e1335a09ee5c6 100644 --- a/src/core/include/openvino/core/descriptor/input.hpp +++ b/src/core/include/openvino/core/descriptor/input.hpp @@ -13,18 +13,13 @@ namespace ov { class Node; -namespace op { -namespace v0 { -class Result; -} // namespace v0 -} // namespace op + namespace descriptor { class Output; // Describes a tensor that is an input to an op, directly or indirectly via a tuple class OPENVINO_API Input { friend class ov::Node; - friend class ov::op::v0::Result; public: /// \param node The node that owns this input @@ -111,12 +106,6 @@ class OPENVINO_API Input { Input& operator=(const Input&) = default; protected: - /// \return the tensor for the connected output - std::shared_ptr get_tensor_ptr() const; - - /// \return the tensor for the connected output - std::shared_ptr get_tensor_ptr(); - // owner of an argument node (in lieu of m_arguments) std::shared_ptr m_src_node; Node* m_node; // The node we are an input for diff --git a/src/core/include/openvino/core/descriptor/tensor.hpp b/src/core/include/openvino/core/descriptor/tensor.hpp index 9624994d8d612e..13a3826c37f291 100644 --- a/src/core/include/openvino/core/descriptor/tensor.hpp +++ b/src/core/include/openvino/core/descriptor/tensor.hpp @@ -22,99 +22,107 @@ namespace ov { class Node; /// \brief Alias for symbol tensor. using TensorSymbol = std::vector>; -/// \brief Alias for vector of symbol tensors. +/// \brief Alias for vector of symbol tensors. using TensorSymbolVector = std::vector; -namespace pass { -class ReverseShapeAndTypeInfer; -} namespace descriptor { - -class Tensor; +class ITensorDescriptor; /// \brief Compile-time descriptor of a first-class value that is a tensor. class OPENVINO_API Tensor { public: + /// \brief Creates Tensor descriptor + /// \param element_type Element type + /// \param pshape Partial shape of tensor + /// \param names Tensor names (optional default empty). Tensor(const element::Type& element_type, const PartialShape& pshape, const std::unordered_set& names = {}); + + OPENVINO_DEPRECATED("This constructor is deprecated. Will be removed in 2026.0") Tensor(const element::Type& element_type, const PartialShape& pshape, Node* node, size_t node_output_number); Tensor(const Tensor&) = delete; Tensor& operator=(const Tensor&) = delete; + /// \brief Gets any tensor name. + /// Throws if tensor has no names. const std::string& get_any_name() const; + + /// \brief Gets tensor names const std::unordered_set& get_names() const; + + /// \brief Set new names. + /// \param names Names to set. void set_names(const std::unordered_set& names); + + /// \brief Adds new names to tensor. + /// \param names new names to be added. void add_names(const std::unordered_set& names); /// \brief sets lower bound value description void set_lower_value(const ov::Tensor& value); + /// \brief sets upper bound value description void set_upper_value(const ov::Tensor& value); + /// \brief sets value symbol description void set_value_symbol(const TensorSymbol& value_symbol); + /// \brief unsets bound value descriptions void invalidate_values(); - const element::Type& get_element_type() const { - return m_element_type; - } + /// \brief Gets element type. + const element::Type& get_element_type() const; + + /// \brief Gets shape. + /// Throw if Tensor's shape is not static. const Shape& get_shape() const; - const PartialShape& get_partial_shape() const { - return m_partial_shape; - } + + /// \brief Gets partial shape. + const PartialShape& get_partial_shape() const; + /// \brief gets lower bound value description - const ov::Tensor& get_lower_value() const { - return m_lower_value; - } + const ov::Tensor& get_lower_value() const; + /// \brief gets upper bound value description - const ov::Tensor& get_upper_value() const { - return m_upper_value; - } + const ov::Tensor& get_upper_value() const; + /// \brief gets symbol value description - TensorSymbol get_value_symbol() const { - return m_value_symbol; - } + TensorSymbol get_value_symbol() const; + /// \brief checks if lower and upper bound are set and point to the same Tensor - bool has_and_set_bound() const { - return m_upper_value && m_lower_value && m_upper_value.data() == m_lower_value.data(); - } + bool has_and_set_bound() const; + + /// \brief Get Tensor size in bytes. + /// \return Size in bytes. size_t size() const; - RTMap& get_rt_info() { - return m_rt_info; - } - const RTMap& get_rt_info() const { - return m_rt_info; - } + /// \brief Gets runtime informations. + /// \return Runtime information map which can be modified. + RTMap& get_rt_info(); - void clone_from(const Tensor& old); + /// \brief Gets runtime informations. + /// \return Read only runtime information map. + const RTMap& get_rt_info() const; -protected: - element::Type m_element_type; + /// \brief Clones Tensor from the other. + /// \param other Tensor used to clone its properties. + void clone_from(const Tensor& other); - PartialShape m_partial_shape; +protected: ov::Tensor m_lower_value, m_upper_value; TensorSymbol m_value_symbol; - - std::unordered_set m_names; - std::unordered_set::const_iterator m_name_it; - RTMap m_rt_info; - - friend OPENVINO_API void set_element_type(Tensor& tensor, const element::Type& elemenet_type); - friend OPENVINO_API void set_tensor_type(Tensor& tensor, - const element::Type& element_type, - const PartialShape& pshape); + std::shared_ptr m_impl; private: - mutable std::atomic m_shape_changing{false}; - mutable bool m_shape_changed{true}; - mutable Shape m_shape; + // hidden extension API for Tensor descriptor + friend struct TensorExtension; }; OPENVINO_API std::ostream& operator<<(std::ostream&, const ov::descriptor::Tensor&); } // namespace descriptor + } // namespace ov diff --git a/src/core/include/openvino/core/graph_util.hpp b/src/core/include/openvino/core/graph_util.hpp index 66c640a62314df..f5694ca89fee51 100644 --- a/src/core/include/openvino/core/graph_util.hpp +++ b/src/core/include/openvino/core/graph_util.hpp @@ -21,6 +21,10 @@ #include "openvino/op/parameter.hpp" #include "openvino/pass/serialize.hpp" +#ifdef OPENVINO_CPP_VER_17 +# include +#endif + namespace ov { OPENVINO_API @@ -288,27 +292,45 @@ bool replace_node_update_name(const std::shared_ptr& target, const std::sh /// \param bin_path Path where .bin file will be saved (optional). /// The same name as for xml_path will be used by default. /// \param version Version of the generated IR (optional). +/// \{ OPENVINO_API void serialize(const std::shared_ptr& m, const std::string& xml_path, const std::string& bin_path = "", ov::pass::Serialize::Version version = ov::pass::Serialize::Version::UNSPECIFIED); +#ifdef OPENVINO_CPP_VER_17 +template >* = nullptr> +void serialize(const std::shared_ptr& m, + const Path& xml_path, + const Path& bin_path = {""}, + ov::pass::Serialize::Version version = ov::pass::Serialize::Version::UNSPECIFIED) { + serialize(m, xml_path.string(), bin_path.string(), version); +} +#endif +/// \} + /// \brief Save given model into IR. Floating point weights are compressed to FP16 by default. /// This method saves a model to IR applying all necessary transformations that usually applied -/// in model conversion flow provided by mo tool. Paricularly, floatting point weights are compressed to FP16. +/// in model conversion flow provided by mo tool. Particularly, floating point weights are compressed to FP16. /// \param model Model which will be converted to IR representation. /// \param output_model Path to the output model file, must have extension .xml -/// \param compress_to_fp16 Whether to compress floatting point weights to FP16 (true by default) +/// \param compress_to_fp16 Whether to compress floating point weights to FP16 (true by default) OPENVINO_API void save_model(const std::shared_ptr& model, const std::string& output_model, bool compress_to_fp16 = true); - #if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) OPENVINO_API void save_model(const std::shared_ptr& model, const std::wstring& output_model, bool compress_to_fp16 = true); #endif -} // namespace ov \ No newline at end of file + +#ifdef OPENVINO_CPP_VER_17 +template >* = nullptr> +void save_model(const std::shared_ptr& model, const Path& output_model, bool compress_to_fp16 = true) { + save_model(model, output_model.string(), compress_to_fp16); +} +#endif +} // namespace ov diff --git a/src/core/include/openvino/op/result.hpp b/src/core/include/openvino/op/result.hpp index 00e805d1f2aeb5..9cad2d9444a267 100644 --- a/src/core/include/openvino/op/result.hpp +++ b/src/core/include/openvino/op/result.hpp @@ -13,6 +13,51 @@ namespace v0 { /// \brief Result operation. /// /// \ingroup ov_ops_cpp_api +/// +/// The Result output tensor is special, it shares tensor with Result's input but requires to have dedicated properties +/// like: +/// - tensor names. +/// +/// Setting/adding Result's output names modify this specific tensor names. +/// Result's specific tensor names are added to input descriptor and transferred to new descriptor if Result's input +/// has been replaced. +/// +/// Examples 1: No specific names on Result's output +/// +/// set output names: +/// [N1] +/// ↓ +/// |----------------| [names: N1] |-----------------| +/// | Node |--------------------------->| Result | -> Model output names: N1 +/// |----------------| |-----------------| +/// +/// +/// Examples 2: Result's has got specific names +/// +/// set output names: set output names: +/// [N1] [R1, R2] +/// ↓ ↓ +/// |----------------| [names: N1, R1, R2] |-----------------| +/// | Node |--------------------------->| Result | -> Model output names: R1, R2 +/// |----------------| |-----------------| +/// +/// +/// Examples 3: Result from example 2 connected to new node +/// +/// set output names: set output names: +/// [N2] [R1, R2] +/// ↓ ↓ +/// |----------------| [names: N2, R1, R2] |-----------------| +/// | Node |--------------------------->| Result | -> Model output names: R1, R2 +/// |----------------| |-----------------| +/// +/// set output names: +/// [N1] +/// ↓ +/// |----------------| [names: N1] +/// | Node |-----------------> +/// |----------------| +/// class OPENVINO_API Result : public Op { public: OPENVINO_OP("Result", "opset1"); diff --git a/src/core/include/openvino/pass/serialize.hpp b/src/core/include/openvino/pass/serialize.hpp index fc3e743d4005dc..d0eaadde346bf6 100644 --- a/src/core/include/openvino/pass/serialize.hpp +++ b/src/core/include/openvino/pass/serialize.hpp @@ -11,6 +11,10 @@ #include "openvino/opsets/opset.hpp" #include "openvino/pass/pass.hpp" +#ifdef OPENVINO_CPP_VER_17 +# include +#endif + namespace ov { namespace pass { @@ -35,6 +39,13 @@ class OPENVINO_API Serialize : public ov::pass::ModelPass { Serialize(const std::string& xmlPath, const std::string& binPath, Version version = Version::UNSPECIFIED); +#ifdef OPENVINO_CPP_VER_17 + Serialize(const std::filesystem::path& xmlPath, + const std::filesystem::path& binPath, + Version version = Version::UNSPECIFIED) + : Serialize(xmlPath.string(), binPath.string(), version) {} +#endif + private: std::ostream* m_xmlFile; std::ostream* m_binFile; diff --git a/src/core/src/descriptor/input.cpp b/src/core/src/descriptor/input.cpp index 544abd10945806..28288002780739 100644 --- a/src/core/src/descriptor/input.cpp +++ b/src/core/src/descriptor/input.cpp @@ -71,14 +71,6 @@ ov::descriptor::Tensor& ov::descriptor::Input::get_tensor() { return m_output->get_tensor(); } -std::shared_ptr ov::descriptor::Input::get_tensor_ptr() const { - return m_output->get_tensor_ptr(); -} - -std::shared_ptr ov::descriptor::Input::get_tensor_ptr() { - return m_output->get_tensor_ptr(); -} - const ov::Shape& ov::descriptor::Input::get_shape() const { return m_output->get_shape(); } diff --git a/src/core/src/descriptor/shared_tensor.cpp b/src/core/src/descriptor/shared_tensor.cpp new file mode 100644 index 00000000000000..314aa524bcacec --- /dev/null +++ b/src/core/src/descriptor/shared_tensor.cpp @@ -0,0 +1,125 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/core/descriptor/output.hpp" +#include "openvino/core/descriptor_tensor.hpp" +#include "openvino/util/common_util.hpp" + +namespace ov { +namespace descriptor { +/** + * @brief Dedicated tensor descriptor implementation to share input descriptor. + * + * Shared tensor share input tensor but have specific properties: + * - tensor names - if set these are used as descriptor names and appended to input tensor because is same tensor + */ +class SharedTensor : public ITensorDescriptor { +public: + SharedTensor(std::shared_ptr tensor) + : m_shared_tensor{std::move(tensor)}, + m_output_names{}, + m_name_it{} { + OPENVINO_ASSERT(m_shared_tensor, "Cannot set NULL tensor descriptor"); + } + + // --- ITensorDescriptor API + virtual const element::Type& get_element_type() const override { + return m_shared_tensor->get_element_type(); + } + + virtual const PartialShape& get_partial_shape() const override { + return m_shared_tensor->get_partial_shape(); + } + + virtual const Shape& get_shape() const override { + return m_shared_tensor->get_shape(); + } + + virtual void set_type_shape(const element::Type& et, const PartialShape& shape) override { + m_shared_tensor->set_type_shape(et, shape); + } + + void set_names(const std::unordered_set& names) override { + rm_tensor_output_names(); + m_output_names = names; + m_name_it = std::min_element(m_output_names.begin(), m_output_names.end()); + m_shared_tensor->add_names(m_output_names); + } + + void add_names(const std::unordered_set& names) override { + m_output_names.insert(names.begin(), names.end()); + m_name_it = std::min_element(m_output_names.begin(), m_output_names.end()); + m_shared_tensor->add_names(names); + } + + const std::unordered_set& get_names() const override { + return m_output_names.empty() ? m_shared_tensor->get_names() : m_output_names; + } + + const std::unordered_set& get_all_names() const override { + return m_shared_tensor->get_names(); + } + + const std::string& get_any_name() const override { + return m_output_names.empty() ? m_shared_tensor->get_any_name() : *m_name_it; + } + + const RTMap& rt_map() const override { + return m_shared_tensor->rt_map(); + } + + RTMap& rt_map() override { + return m_shared_tensor->rt_map(); + } + + size_t pointer_hash() const noexcept override { + return m_shared_tensor->pointer_hash(); + } + + // --- SharedTensor specific interface + void set_tensor(std::shared_ptr tensor) { + if (tensor != m_shared_tensor) { + OPENVINO_ASSERT(tensor, "Cannot set NULL tensor descriptor"); + rm_tensor_output_names(); + auto prev_rt_map = rt_map(); + + m_shared_tensor = std::move(tensor); + m_shared_tensor->add_names(m_output_names); + rt_map().insert(std::make_move_iterator(prev_rt_map.begin()), std::make_move_iterator(prev_rt_map.end())); + } + } + +private: + void rm_tensor_output_names() { + auto names = m_shared_tensor->get_names(); + for (const auto& output_name : m_output_names) { + names.erase(output_name); + } + + m_shared_tensor->set_names(names); + } + + std::shared_ptr m_shared_tensor; + std::unordered_set m_output_names; + std::unordered_set::const_iterator m_name_it; +}; + +/** + * @brief Set output tensor descriptor with shared tensor from new input. + * + * @param output Output descriptor to be updated. + * @param input Input descriptor to set as shared tensor. + */ +void set_shared_tensor(Output& output, const Input& input) { + auto& output_descriptor = TensorExtension::get_descriptor_ptr(output.get_tensor()); + const auto& input_descriptor = TensorExtension::get_descriptor_ptr(input.get_output().get_tensor()); + if (auto* result_ptr = dynamic_cast(output_descriptor.get())) { + result_ptr->set_tensor(input_descriptor); + } else { + output_descriptor = std::make_shared(input_descriptor); + } +} + +} // namespace descriptor +} // namespace ov diff --git a/src/core/src/descriptor/tensor.cpp b/src/core/src/descriptor/tensor.cpp index ae3f7c6e77cd4f..6e85b25b2b9f8a 100644 --- a/src/core/src/descriptor/tensor.cpp +++ b/src/core/src/descriptor/tensor.cpp @@ -8,26 +8,153 @@ #include "openvino/core/descriptor_tensor.hpp" #include "openvino/core/except.hpp" #include "openvino/core/node.hpp" +#include "openvino/core/type/element_iterator.hpp" #include "openvino/op/util/symbolic_info.hpp" +#include "openvino/util/common_util.hpp" -ov::descriptor::Tensor::Tensor(const element::Type& element_type, - const PartialShape& pshape, - const std::unordered_set& names) - : m_element_type(element_type), - m_partial_shape(pshape) { - set_names(names); +namespace ov { +namespace descriptor { + +/** @brief Helper class to store Tensor shape information.*/ +class ShapeInfo { +public: + ShapeInfo() = default; + ShapeInfo(const PartialShape& shape) : m_partial_shape{shape} {} + + void set_partial_shape(PartialShape shape) { + AtomicGuard lock(m_shape_changing); + m_partial_shape = std::move(shape); + m_shape_changed = true; + } + + const PartialShape& get_partial_shape() const { + return m_partial_shape; + } + + const Shape& get_shape() const { + AtomicGuard lock(m_shape_changing); + if (m_shape_changed) { + m_shape = m_partial_shape.to_shape(); + m_shape_changed = false; + } + return m_shape; + } + +private: + PartialShape m_partial_shape{}; + mutable Shape m_shape{}; + mutable std::atomic m_shape_changing{false}; + mutable bool m_shape_changed{true}; +}; + +// --- Tensor descriptor interface +ITensorDescriptor::~ITensorDescriptor() = default; + +/** @brief Basic tensor descriptor. */ +class BasicTensor : public ITensorDescriptor { +public: + BasicTensor() = default; + + BasicTensor(const element::Type& et, const PartialShape& shape, const std::unordered_set& names) + : m_element_type{et}, + m_shape_info{shape}, + m_names{names}, + m_name_it{find_new_any_name(m_names)}, + m_rt_map{}, + m_legacy_name{} {} + + virtual const element::Type& get_element_type() const override { + return m_element_type; + } + + virtual const PartialShape& get_partial_shape() const override { + return m_shape_info.get_partial_shape(); + } + + virtual const Shape& get_shape() const override { + return m_shape_info.get_shape(); + } + + virtual void set_type_shape(const element::Type& et, const PartialShape& shape) override { + m_element_type = et; + m_shape_info.set_partial_shape(shape); + } + + void set_names(const std::unordered_set& names) override { + m_names = names; + m_name_it = find_new_any_name(m_names); + }; + + void add_names(const std::unordered_set& names) override { + m_names.insert(names.begin(), names.end()); + m_name_it = find_new_any_name(m_names); + } + + const std::unordered_set& get_names() const override { + return m_names; + } + + const std::unordered_set& get_all_names() const override { + return get_names(); + } + + const std::string& get_any_name() const override { + OPENVINO_ASSERT(!get_names().empty(), "Attempt to get a name for a Tensor without names"); + return *m_name_it; + } + + const RTMap& rt_map() const override { + return m_rt_map; + } + + RTMap& rt_map() override { + return m_rt_map; + }; + + size_t pointer_hash() const noexcept override { + return std::hash()(this); + } + +private: + element::Type m_element_type; + ShapeInfo m_shape_info; + std::unordered_set m_names; + std::unordered_set::const_iterator m_name_it; + RTMap m_rt_map; + std::string m_legacy_name; + + static decltype(m_name_it) find_new_any_name(const decltype(m_names)& names) { + return std::min_element(names.begin(), names.end()); + } +}; + +// --- TensorExtension +const ITensorDescriptor& TensorExtension::get_descriptor(const Tensor& tensor) { + return *tensor.m_impl; } -ov::descriptor::Tensor::Tensor(const element::Type& element_type, - const PartialShape& pshape, - ov::Node* node, - size_t node_output_number) - : m_element_type(element_type), - m_partial_shape(pshape) { - m_name_it = m_names.cend(); +std::shared_ptr& TensorExtension::get_descriptor_ptr(Tensor& tensor) { + return tensor.m_impl; } -void ov::descriptor::Tensor::invalidate_values() { +bool TensorExtension::Equal::operator()(const std::shared_ptr& lhs, const std::shared_ptr& rhs) const { + return TensorExtension::get_descriptor(*lhs).pointer_hash() == TensorExtension::get_descriptor(*rhs).pointer_hash(); +} + +size_t TensorExtension::Hasher::operator()(const std::shared_ptr& tensor) const { + return get_descriptor(*tensor).pointer_hash(); +} + +// --- Tensor +Tensor::Tensor(const element::Type& element_type, + const PartialShape& pshape, + const std::unordered_set& names) + : m_impl(std::make_shared(element_type, pshape, names)) {} + +Tensor::Tensor(const element::Type& element_type, const PartialShape& pshape, ov::Node* node, size_t) + : m_impl(std::make_shared(element_type, pshape, std::unordered_set{})) {} + +void Tensor::invalidate_values() { if (ov::skip_invalidation(*this)) return; m_upper_value = {}; @@ -35,110 +162,110 @@ void ov::descriptor::Tensor::invalidate_values() { m_value_symbol.clear(); } -void ov::descriptor::Tensor::set_lower_value(const ov::Tensor& value) { +void Tensor::set_lower_value(const ov::Tensor& value) { OPENVINO_ASSERT(static_cast(value)); - OPENVINO_ASSERT(m_partial_shape.same_scheme(value.get_shape())); - OPENVINO_ASSERT(m_element_type == value.get_element_type()); + OPENVINO_ASSERT(get_partial_shape().same_scheme(value.get_shape())); + OPENVINO_ASSERT(get_element_type() == value.get_element_type()); m_lower_value = value; } -void ov::descriptor::Tensor::set_upper_value(const ov::Tensor& value) { +void Tensor::set_upper_value(const ov::Tensor& value) { OPENVINO_ASSERT(static_cast(value)); - OPENVINO_ASSERT(m_partial_shape.same_scheme(value.get_shape())); - OPENVINO_ASSERT(m_element_type == value.get_element_type()); + OPENVINO_ASSERT(get_partial_shape().same_scheme(value.get_shape())); + OPENVINO_ASSERT(get_element_type() == value.get_element_type()); m_upper_value = value; } -void ov::descriptor::Tensor::set_value_symbol(const TensorSymbol& value_symbol) { +void Tensor::set_value_symbol(const TensorSymbol& value_symbol) { const auto& symbols_size = value_symbol.size(); if (symbols_size == 0) { m_value_symbol.clear(); } else { - OPENVINO_ASSERT(m_partial_shape.is_static()); - OPENVINO_ASSERT(shape_size(m_partial_shape.to_shape()) == symbols_size); + OPENVINO_ASSERT(get_partial_shape().is_static()); + OPENVINO_ASSERT(shape_size(get_partial_shape().to_shape()) == symbols_size); m_value_symbol = value_symbol; } } -const ov::Shape& ov::descriptor::Tensor::get_shape() const { - AtomicGuard lock(m_shape_changing); - if (m_shape_changed) { - m_shape = m_partial_shape.to_shape(); - m_shape_changed = false; - } - return m_shape; +const ov::Tensor& Tensor::get_lower_value() const { + return m_lower_value; } -size_t ov::descriptor::Tensor::size() const { - const bool bitwidth_less_than_byte = m_element_type.bitwidth() < 8; - return bitwidth_less_than_byte ? (shape_size(get_shape()) * m_element_type.bitwidth() + 7) >> 3 - : (shape_size(get_shape()) * m_element_type.size()); +const ov::Tensor& Tensor::get_upper_value() const { + return m_upper_value; } -const std::unordered_set& ov::descriptor::Tensor::get_names() const { - return m_names; +TensorSymbol Tensor::get_value_symbol() const { + return m_value_symbol; } -const std::string& ov::descriptor::Tensor::get_any_name() const { - if (m_name_it == m_names.cend()) { - OPENVINO_THROW("Attempt to get a name for a Tensor without names"); - } - return *m_name_it; +bool Tensor::has_and_set_bound() const { + return m_upper_value && m_lower_value && m_upper_value.data() == m_lower_value.data(); } -void ov::descriptor::Tensor::set_names(const std::unordered_set& names) { - m_names = names; - m_name_it = m_names.cbegin(); - for (auto it = m_names.cbegin(); it != m_names.cend(); it++) { - if (*it < *m_name_it) - // Update any name - m_name_it = it; - } +const element::Type& Tensor::get_element_type() const { + return m_impl->get_element_type(); } -void ov::descriptor::Tensor::add_names(const std::unordered_set& names) { - for (const auto& name : names) { - auto res = m_names.insert(name); - if (m_name_it == m_names.end() || *res.first < *m_name_it) - // Update any name - m_name_it = res.first; - } +const PartialShape& Tensor::get_partial_shape() const { + return m_impl->get_partial_shape(); +} +const Shape& Tensor::get_shape() const { + return m_impl->get_shape(); } -void ov::descriptor::Tensor::clone_from(const ov::descriptor::Tensor& old) { - { - AtomicGuard lock(m_shape_changing); - m_partial_shape = old.get_partial_shape(); - m_shape_changed = true; - } - set_names(old.get_names()); - m_element_type = old.get_element_type(); - m_lower_value = old.get_lower_value(); - m_upper_value = old.get_upper_value(); - m_value_symbol = old.get_value_symbol(); - m_rt_info = old.get_rt_info(); +size_t Tensor::size() const { + return element::get_memory_size(get_element_type(), shape_size(get_shape())); } -void ov::descriptor::set_tensor_type(ov::descriptor::Tensor& tensor, - const element::Type& element_type, - const PartialShape& pshape) { - tensor.m_element_type = element_type; - AtomicGuard lock(tensor.m_shape_changing); - tensor.m_partial_shape = pshape; - tensor.m_shape_changed = true; +const std::unordered_set& Tensor::get_names() const { + return m_impl->get_names(); } -void ov::descriptor::set_element_type(ov::descriptor::Tensor& tensor, const element::Type& element_type) { - tensor.m_element_type = element_type; +const RTMap& Tensor::get_rt_info() const { + return m_impl->rt_map(); } -std::ostream& ov::descriptor::operator<<(std::ostream& out, const ov::descriptor::Tensor& tensor) { - std::string names; - for (const auto& name : tensor.get_names()) { - if (!names.empty()) - names += ", "; - names += name; - } - out << "Tensor(" << names << ")"; +RTMap& Tensor::get_rt_info() { + return m_impl->rt_map(); +} + +const std::string& Tensor::get_any_name() const { + return m_impl->get_any_name(); +} + +void Tensor::set_names(const std::unordered_set& names) { + m_impl->set_names(names); +} + +void Tensor::add_names(const std::unordered_set& names) { + m_impl->add_names(names); +} + +void Tensor::clone_from(const Tensor& other) { + m_impl->set_type_shape(other.get_element_type(), other.get_partial_shape()); + set_names(other.get_names()); + m_lower_value = other.get_lower_value(); + m_upper_value = other.get_upper_value(); + m_value_symbol = other.get_value_symbol(); + get_rt_info() = other.get_rt_info(); +} + +void set_tensor_type(Tensor& tensor, const element::Type& element_type, const PartialShape& pshape) { + TensorExtension::get_descriptor_ptr(tensor)->set_type_shape(element_type, pshape); +} + +void set_element_type(Tensor& tensor, const element::Type& element_type) { + TensorExtension::get_descriptor_ptr(tensor)->set_type_shape(element_type, tensor.get_partial_shape()); +} + +void copy_tensor_names(Tensor& dst, const Tensor& src) { + dst.set_names(TensorExtension::get_descriptor(src).get_all_names()); +} + +std::ostream& operator<<(std::ostream& out, const Tensor& tensor) { + out << "Tensor(" << util::join(tensor.get_names()) << ")"; return out; } +} // namespace descriptor +} // namespace ov diff --git a/src/core/src/node.cpp b/src/core/src/node.cpp index ec9197a5a337cb..689e1c80af12a0 100644 --- a/src/core/src/node.cpp +++ b/src/core/src/node.cpp @@ -155,8 +155,8 @@ std::shared_ptr ov::Node::copy_with_new_inputs( for (auto& cdep : control_dependencies) { clone->add_control_dependency(cdep); } - for (size_t i = 0; i < get_output_size(); i++) { - clone->get_output_tensor(i).set_names(get_output_tensor(i).get_names()); + for (size_t i = 0; i < get_output_size(); ++i) { + descriptor::copy_tensor_names(clone->get_output_tensor(i), get_output_tensor(i)); } return clone; } @@ -218,9 +218,8 @@ ov::descriptor::Input& ov::Node::get_input_descriptor(size_t position) { ov::descriptor::Output& ov::Node::get_output_descriptor(size_t position) { while (m_outputs.size() <= position) { - size_t i = m_outputs.size(); - auto tensor_descriptor = make_shared(element::dynamic, PartialShape::dynamic(), this, i); - m_outputs.emplace_back(this, i, tensor_descriptor); + const auto i = m_outputs.size(); + m_outputs.emplace_back(this, i, make_shared(element::dynamic, PartialShape::dynamic())); } return m_outputs[position]; } @@ -468,8 +467,8 @@ ov::descriptor::Tensor& ov::Node::get_output_tensor(size_t i) const { ov::descriptor::Tensor& ov::Node::get_input_tensor(size_t i) const { OPENVINO_ASSERT(i < m_inputs.size(), idx_txt, i, out_of_range_txt); - descriptor::Input input = m_inputs[i]; - return input.get_tensor(); + auto& input = m_inputs[i]; + return input.get_output().get_tensor(); } size_t ov::Node::get_input_size() const { diff --git a/src/core/src/op/result.cpp b/src/core/src/op/result.cpp index 237d6bd7a2084a..97dc95a0e53f17 100644 --- a/src/core/src/op/result.cpp +++ b/src/core/src/op/result.cpp @@ -9,6 +9,7 @@ #include #include "itt.hpp" +#include "openvino/core/descriptor_tensor.hpp" namespace ov { namespace op { @@ -22,10 +23,8 @@ void Result::validate_and_infer_types() { OV_OP_SCOPE(v0_Result_validate_and_infer_types); NODE_VALIDATION_CHECK(this, get_input_size() == 1, "Argument has ", get_input_size(), " outputs (1 expected)."); - // Result doesn't change change in/out tensors - auto& output = get_output_descriptor(0); - auto& input = get_input_descriptor(0); - output.set_tensor_ptr(input.get_tensor_ptr()); + // Result shares input tensor but can have specific properties which are added/removed to input. + descriptor::set_shared_tensor(get_output_descriptor(0), get_input_descriptor(0)); } std::shared_ptr Result::clone_with_new_inputs(const OutputVector& new_args) const { diff --git a/src/core/src/preprocess/pre_post_process.cpp b/src/core/src/preprocess/pre_post_process.cpp index d81d48082cde04..b408755a7d85a8 100644 --- a/src/core/src/preprocess/pre_post_process.cpp +++ b/src/core/src/preprocess/pre_post_process.cpp @@ -56,6 +56,10 @@ struct PrePostProcessor::PrePostProcessorImpl { PrePostProcessorImpl() = default; explicit PrePostProcessorImpl(const std::shared_ptr& f) : m_function(f) { OPENVINO_ASSERT(f, "Model can't be nullptr for PrePostProcessor"); + + // if IR version < 11, set compatibility mode + const auto names_mode = m_function->has_rt_info("version") && m_function->get_rt_info("version") < 11; + for (size_t i = 0; i < m_function->inputs().size(); ++i) { auto info = InputInfo(); info.m_impl->m_resolved_param = m_function->get_parameters()[i]; @@ -64,6 +68,7 @@ struct PrePostProcessor::PrePostProcessorImpl { for (size_t i = 0; i < m_function->outputs().size(); ++i) { auto info = OutputInfo(); info.m_impl->m_output_node = m_function->output(i); + info.m_impl->get_tensor_data()->set_names_compatibility_mode(names_mode); m_outputs.push_back(std::move(info)); } } diff --git a/src/core/src/preprocess/preprocess_impls.cpp b/src/core/src/preprocess/preprocess_impls.cpp index cbe18a78beb575..e0cdee2e76a140 100644 --- a/src/core/src/preprocess/preprocess_impls.cpp +++ b/src/core/src/preprocess/preprocess_impls.cpp @@ -6,6 +6,7 @@ #include "layout_utils.hpp" #include "openvino/core/descriptor_tensor.hpp" +#include "openvino/util/common_util.hpp" namespace ov { namespace preprocess { @@ -325,11 +326,9 @@ void InputInfo::InputInfoImpl::dump(std::ostream& str, //----------- OutputInfoImpl ---------- void OutputInfo::OutputInfoImpl::build(ov::ResultVector& results) { - std::shared_ptr result; auto node = m_output_node; - const auto start_out_node_names = node.get_tensor().get_names(); - node.get_tensor().set_names({}); - result = std::dynamic_pointer_cast(node.get_node_shared_ptr()); + const auto result = ov::as_type_ptr(node.get_node_shared_ptr()); + // Set result layout from 'model' information if (get_model_data()->is_layout_set()) { // Overwrite existing model's layout here (fix 74065) @@ -369,49 +368,46 @@ void OutputInfo::OutputInfoImpl::build(ov::ResultVector& results) { node = std::get<0>(action_result); post_processing_applied = true; } - // Restore tensor names - node.get_tensor().set_names(start_out_node_names); + auto orig_parent = result->get_input_source_output(0).get_node_shared_ptr(); - bool reset_orig_friendly_name = false; - if (!post_processing_applied) { - return; - } - if (orig_parent->get_output_size() == 1) { - node.get_node_shared_ptr()->set_friendly_name(orig_parent->get_friendly_name()); - reset_orig_friendly_name = true; + if (get_tensor_data()->get_names_compatibility_mode()) { + // Move result tensor names from previous input to new + const auto result_input_names = result->get_input_tensor(0).get_names(); + result->get_input_tensor(0).set_names({}); + node.get_tensor().set_names(result_input_names); + + if (!post_processing_applied) { + return; + } + + if (orig_parent->get_output_size() == 1) { + node.get_node_shared_ptr()->set_friendly_name(orig_parent->get_friendly_name()); + + // Reset friendly name of input node to avoid names collision + // when there is at a new node inserted by post-processing steps + // If no new nodes are inserted by post-processing, then we need to preserve friendly name of input + // as it's required for old API correct work + result->get_input_source_output(0).get_node_shared_ptr()->set_friendly_name(""); + } else if (node.get_node_shared_ptr() != orig_parent) { + // Result node is changed - add "." suffix + node.get_node_shared_ptr()->set_friendly_name( + orig_parent->get_friendly_name() + "." + + std::to_string(result->get_input_source_output(0).get_index())); + } + result->input(0).replace_source_output(node); + result->revalidate_and_infer_types(); } else if (node.get_node_shared_ptr() != orig_parent) { // Result node is changed - add "." suffix - node.get_node_shared_ptr()->set_friendly_name(orig_parent->get_friendly_name() + "." + - std::to_string(result->get_input_source_output(0).get_index())); - } + const auto suffix = std::string(".") + std::to_string(result->get_input_source_output(0).get_index()); + node.get_node_shared_ptr()->set_friendly_name(orig_parent->get_friendly_name() + suffix); - // Reset friendly name of input node to avoid names collision - // when there is at a new node inserted by post-processing steps - // If no new nodes are inserted by post-processing, then we need to preserve friendly name of input - // as it's required for old API correct work - if (reset_orig_friendly_name) { - result->get_input_source_output(0).get_node_shared_ptr()->set_friendly_name(""); + result->input(0).replace_source_output(node); + result->revalidate_and_infer_types(); } - // Create result - auto new_result = std::make_shared(node); - new_result->set_friendly_name(result->get_friendly_name()); - - // Preserve runtime info of original result - new_result->get_rt_info() = result->get_rt_info(); - new_result->input(0).get_rt_info() = result->input(0).get_rt_info(); - new_result->output(0).get_rt_info() = result->output(0).get_rt_info(); - // Update layout if (!context.layout().empty()) { - new_result->set_layout(context.layout()); - } - - for (auto& old_result : results) { - if (result == old_result) { - old_result = new_result; - break; - } + result->set_layout(context.layout()); } } @@ -439,7 +435,7 @@ void OutputInfo::OutputInfoImpl::dump(std::ostream& str) const { str << "Output "; if (!start_out_node_names.empty()) { - str << "\"" << *start_out_node_names.begin() << "\""; + str << "\"" << util::join(start_out_node_names) << "\""; } str << ":" << std::endl; str << " Model's data tensor: "; diff --git a/src/core/src/preprocess/preprocess_impls.hpp b/src/core/src/preprocess/preprocess_impls.hpp index 87d6b5456badc3..ee74c534c361fb 100644 --- a/src/core/src/preprocess/preprocess_impls.hpp +++ b/src/core/src/preprocess/preprocess_impls.hpp @@ -122,12 +122,21 @@ class TensorInfoImplBase { return m_layout; } + void set_names_compatibility_mode(const bool compatiblity_mode) { + m_names_compatiblity_mode = compatiblity_mode; + } + + const bool get_names_compatibility_mode() const { + return m_names_compatiblity_mode; + } + protected: element::Type m_type = element::dynamic; bool m_type_set = false; Layout m_layout = Layout(); bool m_layout_set = false; + bool m_names_compatiblity_mode = false; }; class OutputTensorInfo::OutputTensorInfoImpl : public TensorInfoImplBase {}; diff --git a/src/core/tests/frontend/frontend_manager.cpp b/src/core/tests/frontend/frontend_manager.cpp index 1e42de563ddbc6..31e643e7209bdb 100644 --- a/src/core/tests/frontend/frontend_manager.cpp +++ b/src/core/tests/frontend/frontend_manager.cpp @@ -479,3 +479,29 @@ TEST(FrontEndManagerTest, Exception_Safety_Input_Model_set_tensor_value) { TEST(FrontEndManagerTest, Exception_Safety_Input_Model_set_tensor_partial_value) { CHECK_EXCEPTION_INPUT_MODEL(input_model->set_tensor_partial_value({}, {}, {})) } + +#ifdef OPENVINO_CPP_VER_17 + +TEST(FrontEndManagerTest, testFEMDestroy_InputModelHolderUsingPath) { + InputModel::Ptr input_model; + { + std::shared_ptr model; + FrontEndManager fem; + fem.register_front_end("mock1", mock_fe_path()); + auto fe = fem.load_by_framework("mock1"); + input_model = fe->load(std::filesystem::path("test")); + model = fe->convert(input_model); + EXPECT_EQ(model->get_friendly_name(), "mock1_model"); + } + ASSERT_TRUE(input_model); +} + +TEST(FrontEndManagerTest, Exception_Safety_FrontEnd_Supported_By_Path) { + EXPECT_ANY_THROW({ + FrontEndManager fem; + fem.register_front_end("mock1", mock_fe_path()); + auto fe = fem.load_by_framework("mock1"); + fe->supported(std::filesystem::path("throw_now")); + }); +} +#endif diff --git a/src/core/tests/pass/serialization/deterministicity.cpp b/src/core/tests/pass/serialization/deterministicity.cpp index 8441da501eb9bf..a93f092889d2a1 100644 --- a/src/core/tests/pass/serialization/deterministicity.cpp +++ b/src/core/tests/pass/serialization/deterministicity.cpp @@ -296,6 +296,47 @@ TEST_P(SerializationDeterministicityInputOutputTest, FromIrModel) { EXPECT_TRUE(files_equal(xml_2, xml_1)); } +#ifdef OPENVINO_CPP_VER_17 +TEST_P(SerializationDeterministicityInputOutputTest, FromOvModelBybPath) { + auto irVersion = GetParam(); + + std::shared_ptr modelRef; + { + auto parameter0 = std::make_shared(ov::element::f32, ov::Shape{1, 3, 22, 22}); + parameter0->set_friendly_name("input0"); + auto result0 = std::make_shared(parameter0); + result0->set_friendly_name("output0"); + auto parameter1 = std::make_shared(ov::element::f32, ov::Shape{1, 3, 22, 22}); + parameter1->set_friendly_name("input1"); + auto result1 = std::make_shared(parameter1); + result1->set_friendly_name("output1"); + modelRef = + std::make_shared(ov::NodeVector{result0, result1}, ov::ParameterVector{parameter0, parameter1}); + } + + auto& expected1 = modelRef; + const auto out_xml_path = std::filesystem::path(m_out_xml_path_1); + const auto out_bin_path = std::filesystem::path(m_out_bin_path_1); + ov::pass::Serialize(out_xml_path, out_bin_path, irVersion).run_on_model(modelRef); + auto expected2 = ov::test::readModel(m_out_xml_path_1, m_out_bin_path_1); + + ov::pass::Serialize(m_out_xml_path_2, m_out_bin_path_2, irVersion).run_on_model(expected2); + + EXPECT_EQ(input0Name, expected1->input(0).get_node()->get_friendly_name()); + EXPECT_EQ(input1Name, expected1->input(1).get_node()->get_friendly_name()); + EXPECT_EQ(output0Name, expected1->output(0).get_node()->get_friendly_name()); + EXPECT_EQ(output1Name, expected1->output(1).get_node()->get_friendly_name()); + EXPECT_EQ(input0Name, expected2->input(0).get_node()->get_friendly_name()); + EXPECT_EQ(input1Name, expected2->input(1).get_node()->get_friendly_name()); + EXPECT_EQ(output0Name, expected2->output(0).get_node()->get_friendly_name()); + EXPECT_EQ(output1Name, expected2->output(1).get_node()->get_friendly_name()); + + std::ifstream xml_1(m_out_xml_path_1, std::ios::in | std::ios::binary); + std::ifstream xml_2(m_out_xml_path_2, std::ios::in | std::ios::binary); + EXPECT_TRUE(files_equal(xml_1, xml_2)); +} +#endif + INSTANTIATE_TEST_SUITE_P(DeterministicityInputOutput, SerializationDeterministicityInputOutputTest, ::testing::Values(ov::pass::Serialize::Version::IR_V10, ov::pass::Serialize::Version::IR_V11)); diff --git a/src/core/tests/pass/serialization/serialize.cpp b/src/core/tests/pass/serialization/serialize.cpp index e45d5d1d1434ff..5cb1965feebdd7 100644 --- a/src/core/tests/pass/serialization/serialize.cpp +++ b/src/core/tests/pass/serialization/serialize.cpp @@ -74,6 +74,23 @@ TEST_P(SerializationTest, SaveModel) { }); } +#ifdef OPENVINO_CPP_VER_17 +TEST_P(SerializationTest, CompareFunctionsByPath) { + const auto out_xml_path = std::filesystem::path(m_out_xml_path); + const auto out_bin_path = std::filesystem::path(m_out_bin_path); + CompareSerialized([&out_xml_path, &out_bin_path](const auto& m) { + ov::pass::Serialize(out_xml_path, out_bin_path).run_on_model(m); + }); +} + +TEST_P(SerializationTest, SaveModelByPath) { + const auto out_xml_path = std::filesystem::path(m_out_xml_path); + CompareSerialized([&out_xml_path](const auto& m) { + ov::save_model(m, out_xml_path, false); + }); +} +#endif + INSTANTIATE_TEST_SUITE_P( IRSerialization, SerializationTest, diff --git a/src/core/tests/preprocess.cpp b/src/core/tests/preprocess.cpp index 0cec67c3031288..99f2789b217b6d 100644 --- a/src/core/tests/preprocess.cpp +++ b/src/core/tests/preprocess.cpp @@ -57,6 +57,12 @@ static std::shared_ptr create_n_inputs(element::Type type, const PartialS return std::make_shared(res, params); } +namespace { +void set_model_as_v10(ov::Model& model) { + model.get_rt_info()["version"] = static_cast(10); +} +} // namespace + TEST(pre_post_process, simple_mean_scale) { auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2}); auto p = PrePostProcessor(f); @@ -1531,7 +1537,7 @@ TEST(pre_post_process, postprocess_convert_element_type_explicit) { auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2}); auto name = f->output().get_node_shared_ptr()->get_friendly_name(); auto name_last_op = f->get_results().front()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name(); - auto old_names = f->output().get_tensor().get_names(); + auto old_names = std::unordered_set{"tensor_output1"}; auto p = PrePostProcessor(f); p.output().postprocess().convert_element_type(element::u8); @@ -1539,7 +1545,6 @@ TEST(pre_post_process, postprocess_convert_element_type_explicit) { EXPECT_EQ(f->get_results().size(), 1); EXPECT_EQ(f->get_results()[0]->get_element_type(), element::u8); EXPECT_EQ(f->output().get_tensor().get_names(), old_names); - EXPECT_EQ(old_names.count("tensor_output1"), 1); auto ops = f->get_ordered_ops(); auto res_count = std::count_if(ops.begin(), ops.end(), [](const std::shared_ptr& n) { return std::dynamic_pointer_cast(n) != nullptr; @@ -1548,9 +1553,37 @@ TEST(pre_post_process, postprocess_convert_element_type_explicit) { auto names_count = std::count_if(ops.begin(), ops.end(), [](std::shared_ptr n) { return n->output(0).get_tensor().get_names().count("tensor_output1") > 0; }); - EXPECT_EQ(names_count, 2); // last node + result referencing to it + EXPECT_EQ(names_count, 2); // result + node connected to it has same name referencing to it EXPECT_EQ(name, f->output().get_node_shared_ptr()->get_friendly_name()); - EXPECT_EQ(name_last_op, + EXPECT_EQ(name_last_op + ".0", + f->get_results().front()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name()); +} + +TEST(pre_post_process, trivial_model_convert_element_type_explicit) { + const auto f = create_trivial(element::f32, Shape{1, 3, 2, 2}); + const auto name = f->output().get_node_shared_ptr()->get_friendly_name(); + const auto name_last_op = + f->get_results().front()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name(); + const auto old_names = std::unordered_set{"tensor_output1"}; + const auto n = f->output().get_tensor().get_names(); + auto p = PrePostProcessor(f); + + p.output().postprocess().convert_element_type(element::u8); + p.build(); + EXPECT_EQ(f->get_results().size(), 1); + EXPECT_EQ(f->get_results()[0]->get_element_type(), element::u8); + EXPECT_THAT(f->output().get_tensor().get_names(), old_names); + const auto ops = f->get_ordered_ops(); + const auto res_count = std::count_if(ops.begin(), ops.end(), [](const std::shared_ptr& n) { + return std::dynamic_pointer_cast(n) != nullptr; + }); + EXPECT_EQ(res_count, 1); + const auto names_count = std::count_if(ops.begin(), ops.end(), [](std::shared_ptr n) { + return n->output(0).get_tensor().get_names().count("tensor_output1") > 0; + }); + EXPECT_EQ(names_count, 2); // result + node connected to it has same name referencing to it + EXPECT_EQ(name, f->output().get_node_shared_ptr()->get_friendly_name()); + EXPECT_EQ(name_last_op + ".0", f->get_results().front()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name()); } @@ -1776,25 +1809,43 @@ TEST(pre_post_process, postprocess_convert_layout_invalid_dims_dyn_shape) { TEST(pre_post_process, postprocess_keep_friendly_names_compatibility) { auto f = create_simple_function(element::f32, Shape{1, 3, 10, 10}); - auto result_fr_name = f->get_results()[0]->get_friendly_name(); - auto node_before_result_old = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); - auto node_name = node_before_result_old->get_friendly_name(); + const auto result_fr_name = f->get_results()[0]->get_friendly_name(); + const auto node_before_result_old = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); + const auto node_name = node_before_result_old->get_friendly_name(); + set_model_as_v10(*f); auto p = PrePostProcessor(f); p.output().postprocess().convert_element_type(element::u8); f = p.build(); EXPECT_EQ(f->get_results()[0]->get_friendly_name(), result_fr_name); - auto node_before_result_new = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); + const auto node_before_result_new = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); // Compatibility check: verify that old name is assigned to new 'output' node EXPECT_EQ(node_before_result_new->get_friendly_name(), node_name); // Compatibility check: Verify that old name is not set for old 'output' node anymore EXPECT_NE(node_before_result_old->get_friendly_name(), node_name); } +TEST(pre_post_process, postprocess_keep_friendly_names) { + auto f = create_simple_function(element::f32, Shape{1, 3, 10, 10}); + auto result_fr_name = f->get_results()[0]->get_friendly_name(); + auto node_before_result_old = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); + auto node_name = node_before_result_old->get_friendly_name(); + auto p = PrePostProcessor(f); + p.output().postprocess().convert_element_type(element::u8); + f = p.build(); + EXPECT_EQ(f->get_results()[0]->get_friendly_name(), result_fr_name); + auto node_before_result_new = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); + // Compatibility check: verify that old name + index is assigned to new 'output' node + EXPECT_EQ(node_before_result_new->get_friendly_name(), node_name + ".0"); + // Compatibility check: Verify that old name is not changed + EXPECT_EQ(node_before_result_old->get_friendly_name(), node_name); +} + TEST(pre_post_process, postprocess_keep_friendly_names_compatibility_implicit) { auto f = create_simple_function(element::f32, Shape{1, 3, 10, 10}); auto result_fr_name = f->get_results()[0]->get_friendly_name(); auto node_before_result_old = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); auto node_name = node_before_result_old->get_friendly_name(); + set_model_as_v10(*f); auto p = PrePostProcessor(f); p.output().model().set_layout("NCHW"); p.output().tensor().set_layout("NHWC"); @@ -1807,6 +1858,21 @@ TEST(pre_post_process, postprocess_keep_friendly_names_compatibility_implicit) { EXPECT_NE(node_before_result_old->get_friendly_name(), node_name); } +TEST(pre_post_process, postprocess_keep_friendly_names_implicit) { + auto f = create_simple_function(element::f32, Shape{1, 3, 10, 10}); + const auto result_fr_name = f->get_results()[0]->get_friendly_name(); + const auto node_before_result_old = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); + const auto node_name = node_before_result_old->get_friendly_name(); + auto p = PrePostProcessor(f); + p.output().model().set_layout("NCHW"); + p.output().postprocess().convert_layout("NHWC"); + f = p.build(); + EXPECT_EQ(f->get_results()[0]->get_friendly_name(), result_fr_name); + const auto node_before_result_new = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); + EXPECT_EQ(node_before_result_new->get_friendly_name(), node_name + ".0"); + EXPECT_EQ(node_before_result_old->get_friendly_name(), node_name); +} + // --- PostProcess - convert color format --- TEST(pre_post_process, postprocess_convert_color_format_BGR_RGB) { auto f = create_simple_function(element::f32, Shape{5, 30, 20, 3}); @@ -2017,7 +2083,11 @@ TEST(pre_post_process, postprocess_one_node_many_outputs) { results.emplace_back(res); } auto model = std::make_shared(ResultVector{results}, ParameterVector{data1}); - EXPECT_EQ(model->output(0).get_tensor().get_names().count("tensor_Split0"), 1); + // Set tensor name to model output 0 + model->output(0).set_names({"output_split0"}); + EXPECT_EQ(model->output(0).get_tensor().get_names().count("output_split0"), 1); + // Result input has still tensor_split0 names from split op + EXPECT_EQ(model->output(0).get_node()->get_input_tensor(0).get_names().count("tensor_Split0"), 1); EXPECT_EQ(model->output(1).get_tensor().get_names().count("tensor_Split1"), 1); EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 1); @@ -2026,9 +2096,12 @@ TEST(pre_post_process, postprocess_one_node_many_outputs) { p.output(2).tensor().set_element_type(element::f32); model = p.build(); EXPECT_EQ(model->get_results().size(), 3); - EXPECT_EQ(model->output(0).get_tensor().get_names().count("tensor_Split0"), 1); + // Tensor names on output is lost as origin named tensor is before convert op + // New result has different precision means different tensor. + EXPECT_EQ(model->output(0).get_tensor().get_names().count("tensor_Split0"), 0); + EXPECT_EQ(model->output(0).get_tensor().get_names().count("output_split0"), 1); EXPECT_EQ(model->output(1).get_tensor().get_names().count("tensor_Split1"), 1); - EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 1); + EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 0); EXPECT_EQ(model->get_results()[0]->input(0).get_source_output().get_node()->get_friendly_name(), "Split.0"); EXPECT_EQ(model->get_results()[1]->input(0).get_source_output().get_node()->get_friendly_name(), "Split"); EXPECT_EQ(model->get_results()[2]->input(0).get_source_output().get_node()->get_friendly_name(), "Split.2"); diff --git a/src/core/tests/type_prop/result.cpp b/src/core/tests/type_prop/result.cpp index f0c0eecc285004..9776768df052a0 100644 --- a/src/core/tests/type_prop/result.cpp +++ b/src/core/tests/type_prop/result.cpp @@ -7,8 +7,12 @@ #include "common_test_utils/type_prop.hpp" #include "openvino/op/constant.hpp" -using namespace std; -using namespace ov; +namespace ov { +namespace test { + +using ov::op::v0::Parameter; +using std::make_shared; +using testing::UnorderedElementsAre; TEST(type_prop, result) { const auto arg_shape = Shape{1, 2, 3, 4, 5}; @@ -51,3 +55,101 @@ TEST(type_prop, result_layout_invalid) { result->output(0).get_rt_info()[ov::LayoutAttribute::get_type_info_static()] = "NCHW"; // incorrect way ASSERT_THROW(result->get_layout(), ov::Exception); } + +using TypePropResultV0Test = TypePropOpTest; + +TEST_F(TypePropResultV0Test, set_specific_output_name_by_output) { + auto a = std::make_shared(element::f32, PartialShape::dynamic()); + a->get_output_tensor(0).set_names({"input"}); + + auto result = make_op(a); + + EXPECT_THAT(result->output(0).get_names(), UnorderedElementsAre("input")); + EXPECT_THAT(result->get_output_tensor(0).get_names(), UnorderedElementsAre("input")); + + result->output(0).set_names({"out"}); + EXPECT_THAT(result->output(0).get_names(), UnorderedElementsAre("out")); + EXPECT_THAT(result->get_output_tensor(0).get_names(), UnorderedElementsAre("out")); + EXPECT_THAT(a->output(0).get_names(), UnorderedElementsAre("input", "out")); + EXPECT_THAT(a->get_output_tensor(0).get_names(), UnorderedElementsAre("input", "out")); +} + +TEST_F(TypePropResultV0Test, set_specific_output_name_by_tensor_desc) { + auto a = std::make_shared(element::f32, PartialShape::dynamic()); + a->get_output_tensor(0).set_names({"input"}); + + auto result = make_op(a); + + EXPECT_THAT(result->get_output_tensor(0).get_names(), UnorderedElementsAre("input")); + + result->get_output_tensor(0).set_names({"out"}); + EXPECT_THAT(result->output(0).get_names(), UnorderedElementsAre("out")); + EXPECT_THAT(result->get_output_tensor(0).get_names(), UnorderedElementsAre("out")); + EXPECT_THAT(a->output(0).get_names(), UnorderedElementsAre("input", "out")); + EXPECT_THAT(a->get_output_tensor(0).get_names(), UnorderedElementsAre("input", "out")); +} + +TEST_F(TypePropResultV0Test, change_specific_output_name) { + auto a = std::make_shared(element::f32, PartialShape::dynamic()); + a->get_output_tensor(0).set_names({"input"}); + + auto result = make_op(a); + + EXPECT_THAT(result->output(0).get_names(), UnorderedElementsAre("input")); + + result->get_output_tensor(0).set_names({"out"}); + + EXPECT_THAT(result->output(0).get_names(), UnorderedElementsAre("out")); + EXPECT_THAT(result->get_output_tensor(0).get_names(), UnorderedElementsAre("out")); + EXPECT_THAT(a->output(0).get_names(), UnorderedElementsAre("input", "out")); + EXPECT_THAT(a->get_output_tensor(0).get_names(), UnorderedElementsAre("input", "out")); + + result->output(0).set_names({"new output"}); + + EXPECT_THAT(result->output(0).get_names(), UnorderedElementsAre("new output")); + EXPECT_THAT(result->get_output_tensor(0).get_names(), UnorderedElementsAre("new output")); + EXPECT_THAT(a->output(0).get_names(), UnorderedElementsAre("input", "new output")); + EXPECT_THAT(a->get_output_tensor(0).get_names(), UnorderedElementsAre("input", "new output")); +} + +TEST_F(TypePropResultV0Test, add_specific_output_name) { + auto a = std::make_shared(element::f32, PartialShape::dynamic()); + a->get_output_tensor(0).set_names({"input"}); + + auto result = make_op(a); + + EXPECT_THAT(result->output(0).get_names(), UnorderedElementsAre("input")); + + result->output(0).set_names({"out"}); + result->get_output_tensor(0).add_names({"extra output name", "o1"}); + result->output(0).add_names({"extra output name", "o2"}); + + EXPECT_THAT(result->output(0).get_names(), UnorderedElementsAre("out", "extra output name", "o1", "o2")); + EXPECT_THAT(result->get_output_tensor(0).get_names(), UnorderedElementsAre("out", "extra output name", "o1", "o2")); + EXPECT_THAT(a->output(0).get_names(), UnorderedElementsAre("input", "out", "extra output name", "o1", "o2")); + EXPECT_THAT(a->get_output_tensor(0).get_names(), + UnorderedElementsAre("input", "out", "extra output name", "o1", "o2")); +} + +TEST_F(TypePropResultV0Test, preserve_specific_name_on_input_replace) { + const auto a = std::make_shared(element::f32, PartialShape::dynamic()); + a->get_output_tensor(0).set_names({"input a"}); + + const auto result = make_op(a); + result->output(0).set_names({"out"}); + + EXPECT_THAT(result->input(0).get_tensor().get_names(), UnorderedElementsAre("out", "input a")); + EXPECT_THAT(result->output(0).get_names(), UnorderedElementsAre("out")); + + const auto b = std::make_shared(element::f32, PartialShape::dynamic()); + b->get_output_tensor(0).set_names({"input b"}); + + result->input(0).replace_source_output(b); + result->validate_and_infer_types(); + + EXPECT_THAT(result->input(0).get_tensor().get_names(), UnorderedElementsAre("input b", "out")); + EXPECT_THAT(result->output(0).get_names(), UnorderedElementsAre("out")); + EXPECT_THAT(a->output(0).get_names(), UnorderedElementsAre("input a")); +} +} // namespace test +} // namespace ov diff --git a/src/frontends/common/include/openvino/frontend/frontend.hpp b/src/frontends/common/include/openvino/frontend/frontend.hpp index 0035382fe20c5f..bc944c17dbc0dd 100644 --- a/src/frontends/common/include/openvino/frontend/frontend.hpp +++ b/src/frontends/common/include/openvino/frontend/frontend.hpp @@ -15,6 +15,10 @@ #include "openvino/frontend/input_model.hpp" #include "openvino/frontend/visibility.hpp" +#ifdef OPENVINO_CPP_VER_17 +# include +#endif + namespace ov { namespace frontend { /// \brief An interface for identifying a frontend for a particular framework. @@ -50,7 +54,12 @@ class FRONTEND_API FrontEnd { /// \return true if model recognized, false - otherwise. template inline bool supported(const Types&... vars) const { - return supported_impl({ov::Any(vars)...}); +#ifdef OPENVINO_CPP_VER_17 + if constexpr ((std::is_same_v || ...)) { + return supported_impl({path_as_str_or_forward(vars)...}); + } else +#endif + return supported_impl({ov::Any(vars)...}); } inline bool supported(const ov::AnyVector& vars) const { return supported_impl(vars); @@ -65,7 +74,12 @@ class FRONTEND_API FrontEnd { /// \return Loaded input model. template inline InputModel::Ptr load(const Types&... vars) const { - return load_impl({ov::Any{vars}...}); +#ifdef OPENVINO_CPP_VER_17 + if constexpr ((std::is_same_v || ...)) { + return load_impl({path_as_str_or_forward(vars)...}); + } else +#endif + return load_impl({ov::Any{vars}...}); } inline InputModel::Ptr load(const ov::AnyVector& vars) const { @@ -118,8 +132,16 @@ class FRONTEND_API FrontEnd { /// \brief Registers extension /// \param library_path path to library with ov::Extension + /// \{ void add_extension(const std::string& library_path); +#ifdef OPENVINO_CPP_VER_17 + void add_extension(const std::filesystem::path& library_path) { + add_extension(library_path.string()); + } +#endif + /// \} + #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT /// \brief Registers extension @@ -162,6 +184,17 @@ class FRONTEND_API FrontEnd { private: static std::shared_ptr create_copy(const std::shared_ptr& ov_model, const std::shared_ptr& shared_object); + +#ifdef OPENVINO_CPP_VER_17 + template + static constexpr auto path_as_str_or_forward(T&& p) { + if constexpr (std::is_same_v>) { + return p.string(); + } else { + return std::forward(p); + } + } +#endif }; template <> diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp index 2d1dfba956ea72..d7e250f9916302 100644 --- a/src/frontends/ir/src/ir_deserializer.cpp +++ b/src/frontends/ir/src/ir_deserializer.cpp @@ -950,10 +950,12 @@ std::shared_ptr ov::XmlDeserializer::create_node(const std::vector(pugixml::get_uint64_attr(dn, "size")), - static_cast(pugixml::get_uint64_attr(dn, "offset"))); + static_cast(pugixml::get_uint64_attr(dn, "offset")), + ov::element::Type(pugixml::get_str_attr(dn, "element_type"))); } } diff --git a/src/frontends/tests/frontend/shared/src/conversion.cpp b/src/frontends/tests/frontend/shared/src/conversion.cpp index 34e4f2fd62719a..058d5534965436 100644 --- a/src/frontends/tests/frontend/shared/src/conversion.cpp +++ b/src/frontends/tests/frontend/shared/src/conversion.cpp @@ -95,3 +95,17 @@ TEST_P(FrontEndConversionExtensionTest, TestConversionExtensionViaSO) { OV_ASSERT_NO_THROW(model = frontend->convert(input_model)); ASSERT_NE(model, nullptr); } + +#ifdef OPENVINO_CPP_VER_17 +TEST_P(FrontEndConversionExtensionTest, TestConversionExtensionViaSOByPath) { + auto frontend = m_param.m_frontend; + const std::filesystem::path lib_path = get_lib_path("test_builtin_extensions"); + frontend->add_extension(lib_path); + std::shared_ptr input_model; + OV_ASSERT_NO_THROW(input_model = frontend->load(m_param.m_modelName)); + ASSERT_NE(input_model, nullptr); + std::shared_ptr model; + OV_ASSERT_NO_THROW(model = frontend->convert(input_model)); + ASSERT_NE(model, nullptr); +} +#endif diff --git a/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp b/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp index bbe81ee1c9602d..b80bf32958e4ac 100644 --- a/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp +++ b/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp @@ -15,6 +15,7 @@ #include #include "openvino/core/descriptor/tensor.hpp" +#include "openvino/core/descriptor_tensor.hpp" #include "openvino/runtime/common.hpp" #include "openvino/runtime/iinfer_request.hpp" #include "openvino/runtime/profiling_info.hpp" @@ -162,7 +163,11 @@ class OPENVINO_RUNTIME_API ISyncInferRequest : public IInferRequest { private: std::shared_ptr m_compiled_model; // Mutable to return reference to ov::Tensor - mutable std::unordered_map, ov::SoPtr> m_tensors; + mutable std::unordered_map, + ov::SoPtr, + descriptor::TensorExtension::Hasher, + descriptor::TensorExtension::Equal> + m_tensors; // Cache ports mutable std::unordered_map m_cached_ports; mutable std::mutex m_cache_mutex; diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 5674c75dd546d7..8baea3ed408656 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -801,6 +801,8 @@ struct EncryptionCallbacks { * when loading from the cache. This property is set in core.compile_model only. * - First value of the struct is encryption function. * - Second value of the struct is decryption function. + * @note GPU Plugin: encrypts whole blob, not only model structure. Only used when ov::cache_mode property is set to + * "OPTIMIZE_SIZE". * @ingroup ov_runtime_cpp_prop_api */ static constexpr Property cache_encryption_callbacks{ diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 673f6fd569a11e..f332c7c999a548 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -38,6 +38,18 @@ ov::ICore::~ICore() = default; +namespace ov { +namespace util { +template +constexpr std::array< + typename std::conditional::value, typename std::common_type::type, T>::type, + sizeof...(Args)> +make_array(Args&&... args) { + return {std::forward(args)...}; +} +} // namespace util +} // namespace ov + namespace { #ifdef PROXY_PLUGIN_ENABLED @@ -205,6 +217,18 @@ void clean_batch_properties(const std::string& deviceName, ov::AnyMap& config, c } } } + +static const auto core_properties_names = + ov::util::make_array(ov::cache_dir.name(), ov::enable_mmap.name(), ov::force_tbb_terminate.name()); + +static const auto auto_batch_properties_names = + ov::util::make_array(ov::auto_batch_timeout.name(), ov::hint::allow_auto_batching.name()); + +void remove_core_properties(ov::AnyMap& properties) { + for (const auto& name : core_properties_names) { + properties.erase(name); + } +} } // namespace bool ov::is_config_applicable(const std::string& user_device_name, const std::string& subprop_device_name) { @@ -239,22 +263,21 @@ bool ov::is_config_applicable(const std::string& user_device_name, const std::st return false; } -ov::Parsed ov::parseDeviceNameIntoConfig(const std::string& deviceName, - const AnyMap& config, - const bool keep_core_property) { +namespace { +ov::Parsed parse_device_config(const std::string& device_name, + const ov::CoreConfig& core_config, + const ov::AnyMap& properties, + const bool keep_auto_batch_property) { // check to the validity of device name - auto bracket_pos = deviceName.find(")"); + auto bracket_pos = device_name.find(")"); while (bracket_pos != std::string::npos) { - if (bracket_pos < deviceName.length() - 1 && - (deviceName[bracket_pos + 1] != ',' || bracket_pos + 1 == deviceName.length() - 1)) { - OPENVINO_THROW("Device with \"", deviceName, "\" name is illegal in the OpenVINO Runtime"); + if (bracket_pos < device_name.length() - 1 && + (device_name[bracket_pos + 1] != ',' || bracket_pos + 1 == device_name.length() - 1)) { + OPENVINO_THROW("Device with \"", device_name, "\" name is illegal in the OpenVINO Runtime"); } - bracket_pos = deviceName.find(")", bracket_pos + 1); + bracket_pos = device_name.find(")", bracket_pos + 1); } - auto updated_config = config; - auto updated_device_name = deviceName; - /** Note: auto-batching is already applied by this time, so the call: * core.compile_model("GPU", ov::device::properties("BATCH", ov::auto_batch_timeout(400))); * is transformed and we have here: @@ -268,17 +291,19 @@ ov::Parsed ov::parseDeviceNameIntoConfig(const std::string& deviceName, * So, if one day, we want to add more options in form of ov::allow_, we need to apply it before * 'flatten_sub_properties' call to have proper behavior */ + ov::Parsed parsed{device_name, flatten_sub_properties(device_name, properties), core_config}; + auto& updated_device_name = parsed._deviceName; + auto& updated_config = parsed._config; - updated_config = flatten_sub_properties(deviceName, updated_config); std::string parsed_device_priority; // try to find ':' to extract name of virtual device - auto pos = deviceName.find_first_of(':'); + auto pos = device_name.find_first_of(':'); if (pos != std::string::npos) { - updated_device_name = deviceName.substr(0, pos); - parsed_device_priority = deviceName.substr(pos + 1); + updated_device_name = device_name.substr(0, pos); + parsed_device_priority = device_name.substr(pos + 1); } else { - ov::DeviceIDParser parser(deviceName); + ov::DeviceIDParser parser(device_name); updated_device_name = parser.get_device_name(); parsed_device_priority = parser.get_device_id(); } @@ -295,20 +320,44 @@ ov::Parsed ov::parseDeviceNameIntoConfig(const std::string& deviceName, OPENVINO_THROW("Device priority / ID mismatch: ", parsed_device_priority, " (from ", - deviceName, + device_name, ") vs ", it->second.as(), " (from config)"); } }; + parsed._core_config.set(updated_config); // keep batch property only when called from query_supported_property - if (!keep_core_property) { - clean_batch_properties(updated_device_name, updated_config, ov::hint::allow_auto_batching); - clean_batch_properties(updated_device_name, updated_config, ov::auto_batch_timeout); + if (!keep_auto_batch_property) { + for (const auto& name : auto_batch_properties_names) { + clean_batch_properties(updated_device_name, updated_config, name); + } } + return parsed; +} +} // namespace + +ov::Parsed ov::parseDeviceNameIntoConfig(const std::string& deviceName, + const AnyMap& config, + const bool keep_auto_batch_property) { + return parseDeviceNameIntoConfig(deviceName, CoreConfig{}, config, keep_auto_batch_property); +} - return {std::move(updated_device_name), std::move(updated_config)}; +ov::Parsed ov::parseDeviceNameIntoConfig(const std::string& deviceName, + const CoreConfig& coreConfig, + const AnyMap& config, + const bool keep_auto_batch_property) { + auto parsed = parse_device_config(deviceName, coreConfig, config, keep_auto_batch_property); + + // remove core properties for HW devices + if (!is_virtual_device(parsed._deviceName)) { + for (const auto& name : {ov::enable_mmap.name(), ov::force_tbb_terminate.name()}) { + // note: ov::cache_dir kept as plugin may require it + parsed._config.erase(name); + } + } + return parsed; } ov::CoreImpl::CoreImpl() { @@ -663,8 +712,7 @@ ov::Plugin ov::CoreImpl::get_plugin(const std::string& pluginName) const { { OPENVINO_SUPPRESS_DEPRECATED_START if (device_supports_cache_dir(plugin)) { - ov::AnyMap empty_map; - auto cacheConfig = coreConfig.get_cache_config_for_device(plugin, empty_map); + auto cacheConfig = coreConfig.get_cache_config_for_device(plugin); if (cacheConfig._cacheManager) { desc.defaultConfig[ov::cache_dir.name()] = cacheConfig._cacheDir; } @@ -737,13 +785,14 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::shared_ptr< // if auto-batching is applicable, the below function will patch the device name and config accordingly: auto model = apply_auto_batching(model_, deviceName, config_with_batch); - auto parsed = parseDeviceNameIntoConfig(deviceName, config_with_batch, is_proxy_device(device_name)); + auto parsed = parseDeviceNameIntoConfig(deviceName, coreConfig, config_with_batch, is_proxy_device(deviceName)); auto plugin = get_plugin(parsed._deviceName); ov::SoPtr res; - auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager; + // will consume ov::cache_dir if plugin not support it + auto cacheManager = parsed._core_config.get_cache_config_for_device(plugin, parsed._config)._cacheManager; // Skip caching for proxy plugin. HW plugin will load network from the cache if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) { - CacheContent cacheContent{cacheManager}; + CacheContent cacheContent{cacheManager, parsed._core_config.get_enable_mmap()}; cacheContent.blobId = ov::ModelCache::compute_hash(model, create_compile_config(plugin, parsed._config)); std::unique_ptr lock = cacheGuard.get_hash_lock(cacheContent.blobId); res = load_model_from_cache(cacheContent, plugin, parsed._config, ov::SoPtr{}, [&]() { @@ -770,13 +819,14 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::shared_ptr< // if auto-batching is applicable, the below function will patch the device name and config accordingly: auto model = apply_auto_batching(model_, deviceName, config_with_batch); - auto parsed = parseDeviceNameIntoConfig(deviceName, config_with_batch, is_proxy_device(deviceName)); + auto parsed = parseDeviceNameIntoConfig(deviceName, coreConfig, config_with_batch, is_proxy_device(deviceName)); auto plugin = get_plugin(parsed._deviceName); ov::SoPtr res; - auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager; + // will consume ov::cache_dir if plugin not support it + auto cacheManager = parsed._core_config.get_cache_config_for_device(plugin, parsed._config)._cacheManager; // Skip caching for proxy plugin. HW plugin will load network from the cache if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) { - CacheContent cacheContent{cacheManager}; + CacheContent cacheContent{cacheManager, parsed._core_config.get_enable_mmap()}; cacheContent.blobId = ov::ModelCache::compute_hash(model, create_compile_config(plugin, parsed._config)); std::unique_ptr lock = cacheGuard.get_hash_lock(cacheContent.blobId); res = load_model_from_cache(cacheContent, plugin, parsed._config, context, [&]() { @@ -792,21 +842,22 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::string& mod const std::string& device_name, const ov::AnyMap& config) const { OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::compile_model::Path"); - auto parsed = parseDeviceNameIntoConfig(device_name, config); + auto parsed = parseDeviceNameIntoConfig(device_name, coreConfig, config); // in case of compile_model(file_name), we need to clear-up core-level properties auto plugin = get_plugin(parsed._deviceName); ov::SoPtr compiled_model; - - auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager; + // will consume ov::cache_dir if plugin not support it + auto cacheManager = parsed._core_config.get_cache_config_for_device(plugin, parsed._config)._cacheManager; if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) { // Skip caching for proxy plugin. HW plugin will load network from the cache - CacheContent cacheContent{cacheManager, model_path}; + CacheContent cacheContent{cacheManager, parsed._core_config.get_enable_mmap(), model_path}; cacheContent.blobId = ov::ModelCache::compute_hash(model_path, create_compile_config(plugin, parsed._config)); std::unique_ptr lock = cacheGuard.get_hash_lock(cacheContent.blobId); compiled_model = load_model_from_cache(cacheContent, plugin, parsed._config, ov::SoPtr{}, [&]() { - auto model = read_model(model_path, std::string{}); + auto model = + ov::util::read_model(model_path, std::string{}, extensions, parsed._core_config.get_enable_mmap()); return compile_model_and_cache(plugin, model, parsed._config, {}, cacheContent); }); } else { @@ -820,15 +871,14 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::string& mod const std::string& device_name, const ov::AnyMap& config) const { OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "Core::compile_model::from_memory"); - auto parsed = parseDeviceNameIntoConfig(device_name, config); - // in case of compile_model(file_name), we need to clear-up core-level properties + auto parsed = parseDeviceNameIntoConfig(device_name, coreConfig, config); auto plugin = get_plugin(parsed._deviceName); ov::SoPtr compiled_model; - - auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager; + // will consume ov::cache_dir if plugin not support it + auto cacheManager = parsed._core_config.get_cache_config_for_device(plugin, parsed._config)._cacheManager; // Skip caching for proxy plugin. HW plugin will load network from the cache if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) { - CacheContent cacheContent{cacheManager}; + CacheContent cacheContent{cacheManager, parsed._core_config.get_enable_mmap()}; cacheContent.blobId = ov::ModelCache::compute_hash(model_str, weights, create_compile_config(plugin, parsed._config)); std::unique_ptr lock = cacheGuard.get_hash_lock(cacheContent.blobId); @@ -948,7 +998,7 @@ ov::AnyMap ov::CoreImpl::get_supported_property(const std::string& full_device_n // ov::device::priority cannot be shared, because it's specific for current virtual // plugin. So, we need to remove ov::device::priorities from the list, because it's // supposed to be set for current virtual plugin and cannot be propagated down - ov::AnyMap return_properties = user_properties; + auto return_properties = user_properties; auto device_priorities_it = return_properties.find(ov::device::priorities.name()); if (device_priorities_it != return_properties.end()) { return_properties.erase(device_priorities_it); @@ -957,30 +1007,24 @@ ov::AnyMap ov::CoreImpl::get_supported_property(const std::string& full_device_n return return_properties; } - static const std::vector core_level_properties = { - ov::cache_dir.name(), - ov::force_tbb_terminate.name(), - // auto-batch properties are also treated as core-level - ov::auto_batch_timeout.name(), - ov::hint::allow_auto_batching.name(), - }; - - const auto flattened = ov::parseDeviceNameIntoConfig(full_device_name, user_properties, true); - const std::string& device_name = flattened._deviceName; + const auto flattened = parse_device_config(full_device_name, {}, user_properties, keep_core_property); const auto& flattened_config = flattened._config; + const auto& device_name = flattened._deviceName; // virtual plugins should bypass core-level properties to HW plugins // so, we need to report them as supported std::vector supported_config_keys; + auto key_inserter = std::back_inserter(supported_config_keys); if (keep_core_property) { - supported_config_keys = core_level_properties; + key_inserter = std::copy(core_properties_names.begin(), core_properties_names.end(), key_inserter); + key_inserter = std::copy(auto_batch_properties_names.begin(), auto_batch_properties_names.end(), key_inserter); } // try to search against OV API 2.0' mutable supported_properties try { for (auto&& property : ICore::get_property(device_name, ov::supported_properties, {})) { if (property.is_mutable()) { - supported_config_keys.emplace_back(std::move(property)); + *key_inserter = std::move(property); } } } catch (ov::Exception&) { @@ -990,7 +1034,7 @@ ov::AnyMap ov::CoreImpl::get_supported_property(const std::string& full_device_n try { for (auto&& property : ICore::get_property(device_name, ov::internal::supported_properties, {})) { if (property.is_mutable()) { - supported_config_keys.emplace_back(std::move(property)); + *key_inserter = std::move(property); } } } catch (ov::Exception&) { @@ -1160,8 +1204,7 @@ ov::Any ov::CoreImpl::get_property(const std::string& device_name, if (parsed._deviceName.empty()) { return get_property_for_core(name); } else if (name == ov::cache_dir.name()) { - ov::AnyMap empty_map; - return coreConfig.get_cache_config_for_device(get_plugin(parsed._deviceName), empty_map)._cacheDir; + return coreConfig.get_cache_config_for_device(get_plugin(parsed._deviceName))._cacheDir; } return get_plugin(parsed._deviceName).get_property(name, parsed._config); } @@ -1299,9 +1342,7 @@ void ov::CoreImpl::set_property_for_device(const ov::AnyMap& configMap, const st { OPENVINO_SUPPRESS_DEPRECATED_START if (device_supports_cache_dir(plugin.second)) { - ov::AnyMap empty_map = {}; - configCopy[ov::cache_dir.name()] = - coreConfig.get_cache_config_for_device(plugin.second, empty_map)._cacheDir; + configCopy[ov::cache_dir.name()] = coreConfig.get_cache_config_for_device(plugin.second)._cacheDir; } else if (configCopy.count(ov::cache_dir.name()) > 0) { // Remove "CACHE_DIR" from config if it is not supported by plugin configCopy.erase(ov::cache_dir.name()); @@ -1411,8 +1452,8 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( try { cacheContent.cacheManager->read_cache_entry( cacheContent.blobId, - coreConfig.get_enable_mmap() && ov::util::contains(plugin.get_property(ov::internal::supported_properties), - ov::internal::caching_with_mmap), + cacheContent.mmap_enabled && ov::util::contains(plugin.get_property(ov::internal::supported_properties), + ov::internal::caching_with_mmap), [&](std::istream& networkStream, std::shared_ptr model_buffer) { OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, @@ -1516,7 +1557,16 @@ ov::AnyMap ov::CoreImpl::create_compile_config(const ov::Plugin& plugin, const o return compile_config; } -void ov::CoreImpl::CoreConfig::set_and_update(ov::AnyMap& config) { +ov::CoreConfig::CoreConfig(const CoreConfig& other) { + { + std::lock_guard lock(other._cacheConfigMutex); + _cacheConfig = other._cacheConfig; + _cacheConfigPerDevice = other._cacheConfigPerDevice; + } + _flag_enable_mmap = other._flag_enable_mmap; +} + +void ov::CoreConfig::set(const ov::AnyMap& config) { auto it = config.find(ov::cache_dir.name()); if (it != config.end()) { std::lock_guard lock(_cacheConfigMutex); @@ -1526,43 +1576,44 @@ void ov::CoreImpl::CoreConfig::set_and_update(ov::AnyMap& config) { for (auto& deviceCfg : _cacheConfigPerDevice) { deviceCfg.second = CoreConfig::CacheConfig::create(it->second.as()); } - config.erase(it); } it = config.find(ov::force_tbb_terminate.name()); if (it != config.end()) { auto flag = it->second.as(); ov::threading::executor_manager()->set_property({{it->first, flag}}); - config.erase(it); } it = config.find(ov::enable_mmap.name()); if (it != config.end()) { auto flag = it->second.as(); _flag_enable_mmap = flag; - config.erase(it); } } -void ov::CoreImpl::CoreConfig::set_cache_dir_for_device(const std::string& dir, const std::string& name) { +void ov::CoreConfig::set_and_update(ov::AnyMap& config) { + set(config); + remove_core_properties(config); +} + +void ov::CoreConfig::set_cache_dir_for_device(const std::string& dir, const std::string& name) { std::lock_guard lock(_cacheConfigMutex); _cacheConfigPerDevice[name] = CoreConfig::CacheConfig::create(dir); } -std::string ov::CoreImpl::CoreConfig::get_cache_dir() const { +std::string ov::CoreConfig::get_cache_dir() const { std::lock_guard lock(_cacheConfigMutex); return _cacheConfig._cacheDir; } -bool ov::CoreImpl::CoreConfig::get_enable_mmap() const { +bool ov::CoreConfig::get_enable_mmap() const { return _flag_enable_mmap; } // Creating thread-safe copy of config including shared_ptr to ICacheManager // Passing empty or not-existing name will return global cache config -ov::CoreImpl::CoreConfig::CacheConfig ov::CoreImpl::CoreConfig::get_cache_config_for_device( - const ov::Plugin& plugin, - ov::AnyMap& parsedConfig) const { +ov::CoreConfig::CacheConfig ov::CoreConfig::get_cache_config_for_device(const ov::Plugin& plugin, + ov::AnyMap& parsedConfig) const { // cache_dir is enabled locally in compile_model only if (parsedConfig.count(ov::cache_dir.name())) { const auto& cache_dir_val = parsedConfig.at(ov::cache_dir.name()).as(); @@ -1575,16 +1626,16 @@ ov::CoreImpl::CoreConfig::CacheConfig ov::CoreImpl::CoreConfig::get_cache_config } return tempConfig; } else { // cache_dir is set to Core globally or for the specific device - std::lock_guard lock(_cacheConfigMutex); - if (_cacheConfigPerDevice.count(plugin.get_name()) > 0) { - return _cacheConfigPerDevice.at(plugin.get_name()); - } else { - return _cacheConfig; - } + return get_cache_config_for_device(plugin); } } -ov::CoreImpl::CoreConfig::CacheConfig ov::CoreImpl::CoreConfig::CacheConfig::create(const std::string& dir) { +ov::CoreConfig::CacheConfig ov::CoreConfig::get_cache_config_for_device(const ov::Plugin& plugin) const { + std::lock_guard lock(_cacheConfigMutex); + return _cacheConfigPerDevice.count(plugin.get_name()) ? _cacheConfigPerDevice.at(plugin.get_name()) : _cacheConfig; +} + +ov::CoreConfig::CacheConfig ov::CoreConfig::CacheConfig::create(const std::string& dir) { std::shared_ptr cache_manager = nullptr; if (!dir.empty()) { diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp index 7cf12f3ba3280c..7bbab14e4d8c14 100644 --- a/src/inference/src/dev/core_impl.hpp +++ b/src/inference/src/dev/core_impl.hpp @@ -22,14 +22,91 @@ using CreatePluginEngineFunc = void(std::shared_ptr<::ov::IPlugin>&); const std::string DEFAULT_DEVICE_NAME = "DEFAULT_DEVICE"; +class CoreConfig final { +public: + CoreConfig() = default; + CoreConfig(const CoreConfig& other); + CoreConfig& operator=(const CoreConfig&) = delete; + + struct CacheConfig { + std::string _cacheDir; + std::shared_ptr _cacheManager; + + static CacheConfig create(const std::string& dir); + }; + + void set(const ov::AnyMap& config); + + /** + * @brief Removes core-level properties from config and triggers new state for core config + * @param config - config to be updated + */ + void set_and_update(ov::AnyMap& config); + + OPENVINO_DEPRECATED("Don't use this method, it will be removed soon") + void set_cache_dir_for_device(const std::string& dir, const std::string& name); + + std::string get_cache_dir() const; + + bool get_enable_mmap() const; + + CacheConfig get_cache_config_for_device(const ov::Plugin& plugin, ov::AnyMap& parsedConfig) const; + + // Creating thread-safe copy of global config including shared_ptr to ICacheManager + CacheConfig get_cache_config_for_device(const ov::Plugin& plugin) const; + +private: + mutable std::mutex _cacheConfigMutex; + CacheConfig _cacheConfig; + std::map _cacheConfigPerDevice; + bool _flag_enable_mmap = true; +}; + struct Parsed { std::string _deviceName; AnyMap _config; + CoreConfig _core_config; }; +/** + * @brief Provides Parsed device name and configuration. + * + * Uses default core configuration updated with user properties from config. + * The core properties are removed from user configuration for HW devices only. + * @note The `CACHE_DIR` is not removed from compiled configuration. + * + * @param deviceName Device name to be parsed + * @param config User configuration to be parsed. + * @param keep_auto_batch_property If set keep auto batch properties in compile properties. + * @return Parsed: + * - device name + * - compile properties + * - core configuration + */ +Parsed parseDeviceNameIntoConfig(const std::string& deviceName, + const AnyMap& config = {}, + const bool keep_auto_batch_property = false); + +/** + * @brief Provides Parsed device name and configuration. + * + * Uses user core configuration which is updated with user properties from config. + * The core properties are removed from user configuration for HW devices only. + * @note The `CACHE_DIR` is not removed from compiled configuration. + * + * @param deviceName Device name to be parsed + * @param coreConfig Core configuration used as base for parsed output. + * @param config User configuration to be parsed. + * @param keep_auto_batch_property If set keep auto batch properties in compile properties. + * @return Parsed: + * - device name + * - compile properties + * - core configuration + */ Parsed parseDeviceNameIntoConfig(const std::string& deviceName, + const CoreConfig& coreConfig, const AnyMap& config = {}, - const bool keep_core_property = false); + const bool keep_auto_batch_property = false); /** * @brief Checks whether config is applicable for device with 'device_name' @@ -61,47 +138,17 @@ class CoreImpl : public ov::ICore, public std::enable_shared_from_this _cacheManager; - - static CacheConfig create(const std::string& dir); - }; - - /** - * @brief Removes core-level properties from config and triggers new state for core config - * @param config - config to be updated - */ - void set_and_update(ov::AnyMap& config); - - OPENVINO_DEPRECATED("Don't use this method, it will be removed soon") - void set_cache_dir_for_device(const std::string& dir, const std::string& name); - - std::string get_cache_dir() const; - - bool get_enable_mmap() const; - - // Creating thread-safe copy of config including shared_ptr to ICacheManager - // Passing empty or not-existing name will return global cache config - CacheConfig get_cache_config_for_device(const ov::Plugin& plugin, ov::AnyMap& parsedConfig) const; - - private: - mutable std::mutex _cacheConfigMutex; - CacheConfig _cacheConfig; - std::map _cacheConfigPerDevice; - bool _flag_enable_mmap = true; - }; - struct CacheContent { explicit CacheContent(const std::shared_ptr& cache_manager, + bool mmap_enabled = false, const std::string model_path = {}) : cacheManager(cache_manager), - modelPath(model_path) {} + modelPath(model_path), + mmap_enabled{mmap_enabled} {} std::shared_ptr cacheManager; std::string blobId = {}; std::string modelPath = {}; + bool mmap_enabled = false; }; // Core settings (cache config, etc) @@ -291,7 +338,9 @@ class CoreImpl : public ov::ICore, public std::enable_shared_from_this create_context(const std::string& device_name, const AnyMap& args) const override; - ov::AnyMap get_supported_property(const std::string& device_name, const ov::AnyMap& config, const bool keep_core_property = true) const override; + ov::AnyMap get_supported_property(const std::string& device_name, + const ov::AnyMap& config, + const bool keep_core_property = true) const override; ov::SoPtr get_default_context(const std::string& device_name) const override; diff --git a/src/inference/src/dev/icompiled_model.cpp b/src/inference/src/dev/icompiled_model.cpp index f452dd3a330a17..3f4a8d397ab4d9 100644 --- a/src/inference/src/dev/icompiled_model.cpp +++ b/src/inference/src/dev/icompiled_model.cpp @@ -51,8 +51,11 @@ ov::ICompiledModel::ICompiledModel(const std::shared_ptr& model } } } - - std::unordered_map, std::shared_ptr> tensor_map; + std::unordered_map, + std::shared_ptr, + descriptor::TensorExtension::Hasher, + descriptor::TensorExtension::Equal> + tensor_map; for (const auto& param : model->get_parameters()) { const auto& param_name = param->get_friendly_name(); auto new_param = ov::as_type_ptr(param->copy_with_new_inputs({})); diff --git a/src/inference/src/model_reader.cpp b/src/inference/src/model_reader.cpp index aaf620ea0f803a..7babef019b5802 100644 --- a/src/inference/src/model_reader.cpp +++ b/src/inference/src/model_reader.cpp @@ -86,7 +86,7 @@ void update_v10_model(std::shared_ptr& model, bool frontendMode = fal "Model operation names have collisions with tensor names.", " Please use MO to generate new IR version, it should allow to avoid the issue"); leaf_names.emplace(res_name, nullptr); - result->output(0).get_tensor().add_names({std::move(res_name)}); + result->input(0).get_tensor().add_names({std::move(res_name)}); } for (const auto& param : model->get_parameters()) { const auto& param_name = param->get_friendly_name(); diff --git a/src/plugins/auto/tests/functional/behavior/caching_test.cpp b/src/plugins/auto/tests/functional/behavior/caching_test.cpp index 1ef107cd59991f..196d2519250a5d 100644 --- a/src/plugins/auto/tests/functional/behavior/caching_test.cpp +++ b/src/plugins/auto/tests/functional/behavior/caching_test.cpp @@ -190,4 +190,4 @@ TEST_F(AutoFuncTests, compiled_with_cache_enabled_batch_enabled) { ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 5); core.set_property(ov::cache_dir("")); #endif -} \ No newline at end of file +} diff --git a/src/plugins/auto_batch/src/plugin.hpp b/src/plugins/auto_batch/src/plugin.hpp index 37a777cc970b6a..563ba4487ee3ec 100644 --- a/src/plugins/auto_batch/src/plugin.hpp +++ b/src/plugins/auto_batch/src/plugin.hpp @@ -68,4 +68,4 @@ class Plugin : public ov::IPlugin { mutable ov::AnyMap m_plugin_config; }; } // namespace autobatch_plugin -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index 39a2d20c092835..05a0e0a2cf6a0e 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -1329,6 +1329,58 @@ std::set> jit_logical_and_emitter::get_supported_prec return {{element::f32, element::f32}}; } +/// LOGICAL_OR /// +jit_logical_or_emitter::jit_logical_or_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node) + : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) { + prepare_table(); +} + +jit_logical_or_emitter::jit_logical_or_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc) + : jit_emitter(host, host_isa, exec_prc) { + prepare_table(); +} + +size_t jit_logical_or_emitter::get_inputs_count() const { return 2; } + +size_t jit_logical_or_emitter::get_aux_vecs_count() const { return 1; } + +size_t jit_logical_or_emitter::get_aux_gprs_count() const { return 1; } + +void jit_logical_or_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else { + OV_CPU_JIT_EMITTER_THROW("Can't create jit eltwise kernel"); + } +} + +template +void jit_logical_or_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + OV_CPU_JIT_EMITTER_ASSERT(exec_prc_ == ov::element::f32, "unsupported precision: " + exec_prc_.to_string()); + + using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits::TReg; + const TReg src1 = TReg(in_vec_idxs[0]); + const TReg src2 = TReg(in_vec_idxs[1]); + const TReg dst = TReg(out_vec_idxs[0]); + const TReg aux = TReg(aux_vec_idxs[0]); + + h->orr(dst.b16, src1.b16, src2.b16); + h->ld1r(aux.s, table_val2("one")); + h->and_(dst.b16, dst.b16, aux.b16); +} + +void jit_logical_or_emitter::register_table_entries() { + push_arg_entry_of("one", 0x3f800000, true); +} + +std::set> jit_logical_or_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + /// LOGICAL_NOT /// jit_logical_not_emitter::jit_logical_not_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp index 2173a1487f1057..be4e51cd0b759d 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp @@ -628,6 +628,34 @@ class jit_logical_and_emitter : public jit_emitter { void register_table_entries() override; }; +class jit_logical_or_emitter : public jit_emitter { +public: + jit_logical_or_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc = ov::element::f32); + + jit_logical_or_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& n); + + size_t get_inputs_count() const override; + + size_t get_aux_vecs_count() const override; + + size_t get_aux_gprs_count() const override; + + static std::set> get_supported_precisions( + const std::shared_ptr& node = nullptr); + +private: + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; + + template + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + + void register_table_entries() override; +}; + class jit_logical_not_emitter : public jit_emitter { public: jit_logical_not_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp index 0374888e3d7fcb..912fe23fcd1fcf 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp @@ -38,6 +38,7 @@ bool JitEltwiseExecutor::isSupported( Algorithm::EltwiseIsNaN, Algorithm::EltwiseLessEqual, Algorithm::EltwiseLogicalAnd, + Algorithm::EltwiseLogicalOr, Algorithm::EltwiseLogicalNot, Algorithm::EltwiseLogicalXor, Algorithm::EltwiseMaximum, diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp index cfe36f78cc40f9..b3fe7018d23677 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp @@ -655,6 +655,7 @@ std::shared_ptr jit_uni_eltwise_generic::create_eltwise_emitte OV_CASE(Algorithm::EltwiseIsInf, ov::intel_cpu::aarch64::jit_is_inf_emitter), OV_CASE(Algorithm::EltwiseLessEqual, ov::intel_cpu::aarch64::jit_less_equal_emitter), OV_CASE(Algorithm::EltwiseLogicalAnd, ov::intel_cpu::aarch64::jit_logical_and_emitter), + OV_CASE(Algorithm::EltwiseLogicalOr, ov::intel_cpu::aarch64::jit_logical_or_emitter), OV_CASE(Algorithm::EltwiseLogicalNot, ov::intel_cpu::aarch64::jit_logical_not_emitter), OV_CASE(Algorithm::EltwiseLogicalXor, ov::intel_cpu::aarch64::jit_logical_xor_emitter), OV_CASE(Algorithm::EltwiseIsNaN, ov::intel_cpu::aarch64::jit_is_nan_emitter), @@ -845,6 +846,7 @@ std::set> eltwise_precision_helper::get_supported_pre OV_CASE(Algorithm::EltwiseIsNaN, jit_is_nan_emitter), OV_CASE(Algorithm::EltwiseLessEqual, jit_less_equal_emitter), OV_CASE(Algorithm::EltwiseLogicalAnd, jit_logical_and_emitter), + OV_CASE(Algorithm::EltwiseLogicalOr, jit_logical_or_emitter), OV_CASE(Algorithm::EltwiseLogicalNot, jit_logical_not_emitter), OV_CASE(Algorithm::EltwiseLogicalXor, jit_logical_xor_emitter), OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter), diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp index 2e8bde43abeed4..8e1be37c91b1ef 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp @@ -20,12 +20,17 @@ class BinaryOutputBuffer : public OutputBuffer { BinaryOutputBuffer(std::ostream& stream) : OutputBuffer(this), stream(stream), _impl_params(nullptr), _strm(nullptr) {} - void write(void const * data, std::streamsize size) { + virtual ~BinaryOutputBuffer() = default; + + virtual void write(void const* data, std::streamsize size) { auto const written_size = stream.rdbuf()->sputn(reinterpret_cast(data), size); OPENVINO_ASSERT(written_size == size, - "[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + std::to_string(written_size)); + "[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + + std::to_string(written_size)); } + virtual void flush() {} + void setKernelImplParams(void* impl_params) { _impl_params = impl_params; } void* getKernelImplParams() const { return _impl_params; } void set_stream(void* strm) { _strm = strm; } @@ -42,7 +47,9 @@ class BinaryInputBuffer : public InputBuffer { BinaryInputBuffer(std::istream& stream, engine& engine) : InputBuffer(this, engine), _stream(stream), _impl_params(nullptr) {} - void read(void* const data, std::streamsize size) { + virtual ~BinaryInputBuffer() = default; + + virtual void read(void* const data, std::streamsize size) { auto const read_size = _stream.rdbuf()->sgetn(reinterpret_cast(data), size); OPENVINO_ASSERT(read_size == size, "[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size)); @@ -51,14 +58,73 @@ class BinaryInputBuffer : public InputBuffer { void setKernelImplParams(void* impl_params) { _impl_params = impl_params; } void* getKernelImplParams() const { return _impl_params; } - std::streampos tellg() { return _stream.tellg(); } - void seekg(std::streampos pos) { _stream.seekg(pos); } - private: std::istream& _stream; void* _impl_params; }; +class EncryptedBinaryOutputBuffer : public BinaryOutputBuffer { +public: + EncryptedBinaryOutputBuffer(std::ostream& stream, std::function encrypt) + : BinaryOutputBuffer(stream), + encrypt(encrypt) { + OPENVINO_ASSERT(encrypt); + } + + ~EncryptedBinaryOutputBuffer() override = default; + + void write(void const* data, std::streamsize size) override { + plaintext_str.append(reinterpret_cast(data), size); + } + + void flush() override { + auto encrypted_str = encrypt(plaintext_str); + size_t bytes = encrypted_str.size(); + BinaryOutputBuffer::write(make_data(&bytes, sizeof(bytes)).data, sizeof(bytes)); + BinaryOutputBuffer::write(make_data(encrypted_str.c_str(), encrypted_str.size()).data, encrypted_str.size()); + } + +private: + std::string + plaintext_str; // Not using stringstream here because passing to encrypt() would produce an additional copy. + std::function encrypt; +}; + +class EncryptedBinaryInputBuffer : public BinaryInputBuffer { +public: + EncryptedBinaryInputBuffer(std::istream& stream, + engine& engine, + std::function decrypt) + : BinaryInputBuffer(stream, engine), + decrypt(decrypt) { + OPENVINO_ASSERT(decrypt); + + size_t bytes; + BinaryInputBuffer::read(make_data(&bytes, sizeof(bytes)).data, sizeof(bytes)); + + // Not reading directly to plaintext_stream because decrypt(plaintext_stream.str()) would create an additional + // copy. + std::string str(bytes, 0); + BinaryInputBuffer::read( + make_data(const_cast(reinterpret_cast(str.c_str())), str.size()).data, + str.size()); + plaintext_stream.str(decrypt(str)); + } + + ~EncryptedBinaryInputBuffer() override = default; + + void read(void* const data, std::streamsize size) override { + auto const read_size = plaintext_stream.rdbuf()->sgetn(reinterpret_cast(data), size); + OPENVINO_ASSERT( + read_size == size, + "[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size)); + } + +private: + std::stringstream plaintext_stream; + std::function decrypt; +}; + template class Serializer::value>::type> { public: diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp index 461f063ec26bc5..8a9a35b1e92fe9 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp @@ -4,15 +4,170 @@ #pragma once #include +#include #include "intel_gpu/runtime/engine.hpp" #include "intel_gpu/runtime/memory.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/util/op_types.hpp" +#include "openvino/pass/manager.hpp" #include "openvino/runtime/shared_buffer.hpp" #include "openvino/util/mmap_object.hpp" #include "primitive.hpp" +#include "transformations/convert_precision.hpp" namespace cldnn { +struct weights_mem { + std::shared_ptr>> shared_buf = nullptr; + std::shared_ptr transformed_constant = nullptr; + + const uint8_t* get_loaded_data() { + if (transformed_constant) { + return reinterpret_cast(transformed_constant->get_data_ptr()); + } + OPENVINO_ASSERT(shared_buf); + return shared_buf->get_ptr(); + } +}; + +struct weightless_cache_manager { + void set_constant_info(size_t bin_offset, + size_t original_size, + ov::element::Type original_dtype, + ov::element::Type curr_dtype, + ov::Shape shape) { + this->bin_offset = bin_offset; + this->original_size = original_size; + this->original_dtype = original_dtype; + this->curr_dtype = curr_dtype; + this->shape = shape; + do_weightless_caching = true; + + if (original_dtype != curr_dtype) { + do_precision_conversion = true; + } + } + + void invalidate() { + do_weightless_caching = false; + } + + void set_new_dtype(ov::element::Type curr_dtype) { + this->curr_dtype = curr_dtype; + do_precision_conversion = original_dtype != curr_dtype; + } + + bool save(BinaryOutputBuffer& ob, size_t data_size) const { + if (!do_weightless_caching) { + ob << false; + return false; + } + + ob << true; + ob << bin_offset; + ob << do_precision_conversion; + if (do_precision_conversion) { + ob << original_size; + ob << make_data(&original_dtype, sizeof(ov::element::Type)); + ob << make_data(&curr_dtype, sizeof(ov::element::Type)); + + size_t num_dims = shape.size(); + ob << make_data(&num_dims, sizeof(size_t)); + ob << make_data(shape.data(), num_dims * sizeof(ov::Shape::value_type)); + } + return true; + } + + std::shared_ptr load(BinaryInputBuffer& ib, + std::shared_ptr mapped_weights, + size_t data_size) { + ib >> do_weightless_caching; + if (!do_weightless_caching) { + return nullptr; + } + + OPENVINO_ASSERT(mapped_weights != nullptr, "mmap object is null"); + + ib >> bin_offset; + ib >> do_precision_conversion; + if (do_precision_conversion) { + ib >> original_size; + ib >> make_data(&original_dtype, sizeof(ov::element::Type)); + ib >> make_data(&curr_dtype, sizeof(ov::element::Type)); + + size_t num_dims = 0; + ib >> make_data(&num_dims, sizeof(size_t)); + shape.resize(num_dims); + ib >> make_data(shape.data(), num_dims * sizeof(ov::Shape::value_type)); + } else { + original_size = data_size; + } + + auto mem_obj = std::make_shared(); + mem_obj->shared_buf = std::make_shared>>( + mapped_weights->data() + bin_offset, + original_size, + mapped_weights); + + if (should_run_transformations()) { + run_transformations(mem_obj); + } + return mem_obj; + } + +private: + bool do_weightless_caching = false; + bool do_precision_conversion = false; + + size_t bin_offset = SIZE_MAX; + size_t original_size = SIZE_MAX; + ov::element::Type original_dtype = ov::element::Type_t::undefined; + ov::element::Type curr_dtype = ov::element::Type_t::undefined; + ov::Shape shape; + + bool should_run_transformations() { + return do_precision_conversion; + } + + void run_transformations(std::shared_ptr mem_obj) { + auto orig_constant = std::make_shared(original_dtype, + shape, + mem_obj->shared_buf->get_ptr(), + mem_obj->shared_buf); + + ov::ParameterVector inputParams; + ov::ResultVector results; + results.push_back(std::make_shared(orig_constant->output(0))); + auto model = std::make_shared(results, inputParams, "aux"); + + ov::pass::Manager manager("Plugin:GPU:weightless_cache_transformations"); + + if (do_precision_conversion) { + precisions_map fp_convert_precision_map = { + {original_dtype, curr_dtype}}; + type_to_fuse_map empty_fuse_map = {}; + const bool keep_precision_sensitive_in_fp32 = false; + const bool convert_input_output_precision = false; + const bool store_original_precision_as_rt_attribute = true; + manager.register_pass(fp_convert_precision_map, + empty_fuse_map, + keep_precision_sensitive_in_fp32, + convert_input_output_precision, + store_original_precision_as_rt_attribute); + } + + manager.run_passes(model); + const auto& ops = model->get_ops(); + auto it = std::find_if(ops.begin(), ops.end(), [](const std::shared_ptr& node) { + return ov::op::util::is_constant(node); + }); + OPENVINO_ASSERT(it != ops.end()); + mem_obj->transformed_constant = std::dynamic_pointer_cast(*it); + OPENVINO_ASSERT(mem_obj->transformed_constant->get_element_type() == curr_dtype); + } +}; + /// @brief Provides input data to topology. /// @details This primitive allows to pass data which is known at topology creation. /// For example, weights and biases for scoring networks. @@ -20,21 +175,32 @@ namespace cldnn { struct data : public primitive_base { CLDNN_DECLARE_PRIMITIVE(data) - data() : primitive_base("", {}) {} + data() : primitive_base("", {}) { + cache_info = std::make_shared(); + } /// @brief Constructs data primitive. /// @param id This primitive id. /// @param mem @ref memory object which contains data. /// @note If memory is attached by memory::attach(), the attached buffer should be valid till network build. - data(const primitive_id& id, memory::ptr mem) - : primitive_base(id, {}), mem(std::move(mem)) {} + data(const primitive_id& id, memory::ptr mem) : primitive_base(id, {}), mem(std::move(mem)) { + cache_info = std::make_shared(); + } + + data(const primitive_id& id, memory::ptr mem, std::shared_ptr cache_info) + : primitive_base(id, {}), + mem(std::move(mem)), + cache_info(cache_info) { + if (!cache_info) { + this->cache_info = std::make_shared(); + } + } /// @brief @ref memory object which contains data. /// @note If memory is attached by memory::attach(), the attached buffer should be valid till network build. memory::ptr mem; - size_t original_size = SIZE_MAX; - size_t bin_offset = SIZE_MAX; + std::shared_ptr cache_info; size_t hash() const override { size_t seed = primitive::hash(); @@ -53,13 +219,8 @@ struct data : public primitive_base { size_t data_size = mem->size(); ob << make_data(&data_size, sizeof(size_t)); - bool is_cache_without_weights = bin_offset != SIZE_MAX && data_size == original_size; - - if (is_cache_without_weights) { - ob << true; - ob << bin_offset; - } else { - ob << false; + bool do_weightless_caching = cache_info->save(ob, data_size); + if (!do_weightless_caching) { if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { ob << make_data(mem->buffer_ptr(), data_size); } else { @@ -88,26 +249,12 @@ struct data : public primitive_base { mem = ib.get_engine().allocate_memory(output_layout, _allocation_type, false); - bool is_cache_without_weights; - ib >> is_cache_without_weights; - if (is_cache_without_weights && mapped_weights == nullptr) { - OPENVINO_THROW("mmap object is null"); - } - - std::shared_ptr>> shared_buf; - if (is_cache_without_weights) { - ib >> bin_offset; - original_size = data_size; - - shared_buf = std::make_shared>>( - mapped_weights->data() + bin_offset, - data_size, - mapped_weights); - } + auto mem_obj = cache_info->load(ib, mapped_weights, data_size); + bool is_weightless_caching_enabled = mem_obj != nullptr; if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { - if (is_cache_without_weights) { - std::memcpy(reinterpret_cast(mem->buffer_ptr()), shared_buf->get_ptr(), data_size); + if (is_weightless_caching_enabled) { + std::memcpy(reinterpret_cast(mem->buffer_ptr()), mem_obj->get_loaded_data(), data_size); } else { ib >> make_data(mem->buffer_ptr(), data_size); } @@ -116,8 +263,8 @@ struct data : public primitive_base { auto& strm = ib.get_engine().get_service_stream(); if (data_size < DATA_BLOCK_SIZE || output_layout.format.is_image_2d()) { std::vector _buf(data_size); - if (is_cache_without_weights) { - std::memcpy(reinterpret_cast(_buf.data()), shared_buf->get_ptr(), data_size); + if (is_weightless_caching_enabled) { + std::memcpy(reinterpret_cast(_buf.data()), mem_obj->get_loaded_data(), data_size); } else { ib >> make_data(_buf.data(), data_size); } @@ -135,9 +282,9 @@ struct data : public primitive_base { size_t copy_size = (data_size > (dst_offset + DATA_BLOCK_SIZE)) ? DATA_BLOCK_SIZE : (data_size - dst_offset); if (buf_flag) { - if (is_cache_without_weights) { + if (is_weightless_caching_enabled) { std::memcpy(reinterpret_cast(_buf1.data()), - shared_buf->get_ptr() + dst_offset, + mem_obj->get_loaded_data() + dst_offset, copy_size); } else { ib >> make_data(_buf1.data(), copy_size); @@ -148,9 +295,9 @@ struct data : public primitive_base { } ev1 = mem->copy_from(strm, _buf1.data(), src_offset, dst_offset, copy_size, is_blocking); } else { - if (is_cache_without_weights) { + if (is_weightless_caching_enabled) { std::memcpy(reinterpret_cast(_buf2.data()), - shared_buf->get_ptr() + dst_offset, + mem_obj->get_loaded_data() + dst_offset, copy_size); } else { ib >> make_data(_buf2.data(), copy_size); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp index 85173e9eb33e7c..a4129800733875 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp @@ -74,11 +74,14 @@ void propagate_constants::run(program& p) { // replace all constant nodes which are relevant for inference (either used by non-const user or marked as output) // with recomputed cldnn::data for (auto& cout : to_replace) { - auto& id_to_replace = cout.first; - auto mem_impl = cout.second; - - auto const_data = - std::make_shared("_cldnn_const_prop_" + id_to_replace, mem_impl /* <<< REMOVE ME WHEN POSSIBLE */); + auto& id_to_replace = std::get<0>(cout); + auto mem_impl = std::get<1>(cout); + auto cache_info = std::get<2>(cout); + auto in_layout = std::get<3>(cout); + + auto const_data = std::make_shared("_cldnn_const_prop_" + id_to_replace, + mem_impl, /* <<< REMOVE ME WHEN POSSIBLE */ + cache_info); auto& new_node = p.get_or_create(const_data); auto& curr_node = p.get_node(id_to_replace); @@ -92,6 +95,25 @@ void propagate_constants::run(program& p) { } } + auto is_reorder_with_only_dtype_change = [&](program_node& dst) { + if (!in_layout) { + return false; + } + auto& dst_layout = dst.get_output_layout(); + if (in_layout->data_type == dst_layout.data_type) { + return false; + } + + auto aux_layout = dst_layout; + aux_layout.data_type = in_layout->data_type; + return aux_layout == *in_layout.get(); + }; + if (is_reorder_with_only_dtype_change(new_node)) { + new_node.as().get_primitive()->cache_info->set_new_dtype(new_node.get_output_layout().data_type); + } else { + new_node.as().get_primitive()->cache_info->invalidate(); + } + curr_node.dependencies.clear(); // remove all constant users (as they will be either removed or replaced by cldnn::data which does not have any // dependencies) @@ -113,9 +135,10 @@ bool propagate_constants::has_non_const_user(program_node& node) const { return false; } -std::list> propagate_constants::calculate(engine& engine, - const ExecutionConfig& config, - std::shared_ptr task_executor) { +std::list, std::shared_ptr>> +propagate_constants::calculate(engine& engine, + const ExecutionConfig& config, + std::shared_ptr task_executor) { if (!has_non_trivial_constants) return {}; @@ -123,15 +146,37 @@ std::list> propagate_constants::calculate(e cf_config.set_property(ov::intel_gpu::optimize_data(false)); cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs)); network::ptr net = network::build_network(engine, nodes, cf_config, task_executor, true); - for (auto& cin : const_inputs) + std::map, std::shared_ptr>> + weightless_cache_map; + for (auto& cin : const_inputs) { net->set_input_data(cin->id(), cin->get_attached_memory_ptr()); + auto users = cin->get_users(); + if (users.size() == 1 && users.front()->is_type()) { + auto rprim = users.front()->as().get_primitive(); + auto id = rprim->id; + auto cache_ptr = cin->as().get_primitive()->cache_info; + auto layout_ptr = std::make_shared(cin->get_output_layout()); + weightless_cache_map.emplace(id, std::make_pair(cache_ptr, layout_ptr)); + } + } + net->execute({}); net->reset_execution(true); // wait for computations to complete auto outputs = net->get_outputs(); - std::list> ret; - for (auto& out : outputs) ret.push_back({out->id(), out->output_memory_ptr()}); + std::list, std::shared_ptr>> + ret; + for (auto& out : outputs) { + std::shared_ptr cache_ptr = nullptr; + std::shared_ptr layout_ptr = nullptr; + auto it = weightless_cache_map.find(out->id()); + if (it != weightless_cache_map.end()) { + cache_ptr = it->second.first; + layout_ptr = it->second.second; + } + ret.push_back({out->id(), out->output_memory_ptr(), cache_ptr, layout_ptr}); + } return ret; } diff --git a/src/plugins/intel_gpu/src/graph/include/pass_manager.h b/src/plugins/intel_gpu/src/graph/include/pass_manager.h index 490076a37f788e..0b7c3d85c37e27 100644 --- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h +++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h @@ -211,9 +211,10 @@ class propagate_constants : public base_pass { private: void run(program& p) override; - std::list> calculate(engine& engine, - const ExecutionConfig& config, - std::shared_ptr task_executor); + std::list, std::shared_ptr>> + calculate(engine& engine, + const ExecutionConfig& config, + std::shared_ptr task_executor); bool has_non_const_user(program_node& node) const; void handle_constant(program& prog, program_node& node); void add_constant(program& prog, program_node& node); diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 18e7a88fc42f3e..810353fe626c19 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -179,7 +179,16 @@ void CompiledModel::export_model(std::ostream& model) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model"); OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded"); - cldnn::BinaryOutputBuffer ob(model); + const ov::EncryptionCallbacks encryption_callbacks = m_config.get_property(ov::cache_encryption_callbacks); + + // Do not allow encryption for CacheMode::OPTIMIZE_SPEED - the cache size may cause severe memory penalty. + const bool encryption_enabled = encryption_callbacks.encrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; + std::unique_ptr ob_ptr = + encryption_enabled + ? cldnn::make_unique(model, encryption_callbacks.encrypt) + : cldnn::make_unique(model); + auto& ob = *ob_ptr; + ob << cldnn::make_data(&cache_mode, sizeof(ov::CacheMode)); // Inputs @@ -222,6 +231,7 @@ void CompiledModel::export_model(std::ostream& model) const { } get_graph(0)->export_model(ob); + ob.flush(); } std::shared_ptr CompiledModel::get_runtime_model() const { diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index f2fa9bcdeeab1b..f6c15bc2e8943a 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -339,12 +339,21 @@ std::shared_ptr Plugin::import_model(std::istream& model, config.set_user_property(_orig_config); config.apply_user_properties(context_impl->get_engine().get_device_info()); - cldnn::BinaryInputBuffer ib(model, context_impl->get_engine()); + ov::CacheMode cache_mode = config.get_property(ov::cache_mode); + ov::EncryptionCallbacks encryption_callbacks = config.get_property(ov::cache_encryption_callbacks); + const bool encryption_enabled = encryption_callbacks.decrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; - ov::CacheMode cache_mode = ov::CacheMode::OPTIMIZE_SPEED; - ib >> cldnn::make_data(&cache_mode, sizeof(ov::CacheMode)); + std::unique_ptr ib_ptr = + encryption_enabled ? cldnn::make_unique(model, + context_impl->get_engine(), + encryption_callbacks.decrypt) + : cldnn::make_unique(model, context_impl->get_engine()); + auto& ib = *ib_ptr; - if (cache_mode != config.get_property(ov::cache_mode)) { + ov::CacheMode loaded_cache_mode = ov::CacheMode::OPTIMIZE_SPEED; + ib >> cldnn::make_data(&loaded_cache_mode, sizeof(ov::CacheMode)); + + if (loaded_cache_mode != cache_mode) { return nullptr; } @@ -608,6 +617,8 @@ std::vector Plugin::get_supported_properties() const { ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW}, ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW}, ov::PropertyName{ov::weights_path.name(), PropertyMutability::RW}, + ov::PropertyName{ov::cache_encryption_callbacks.name(), PropertyMutability::RW}, + ov::PropertyName{ov::hint::kv_cache_precision.name(), PropertyMutability::RW}, }; return supported_properties; diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index 368e25abe2ddac..a9bb813d0ce587 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -14,6 +14,7 @@ #include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/plugin/program_builder.hpp" +#include "intel_gpu/primitives/data.hpp" #include "intel_gpu/runtime/itt.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" #include "intel_gpu/primitives/mutable_data.hpp" @@ -311,11 +312,15 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrm_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) { if (auto data_prim = dynamic_cast(prim.get())) { auto rt_info = op.get_rt_info(); + auto weightless_cache_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static()); if (weightless_cache_attr != rt_info.end()) { - data_prim->bin_offset = weightless_cache_attr->second.as().bin_offset; - data_prim->original_size = - weightless_cache_attr->second.as().original_size; + auto& attr = weightless_cache_attr->second.as(); + data_prim->cache_info->set_constant_info(attr.bin_offset, + attr.original_size, + attr.original_dtype, + op.get_output_element_type(0), + op.get_output_shape(0)); } } } diff --git a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp index 839b2640ca180c..1f911d4a0f2070 100644 --- a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp +++ b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp @@ -8,48 +8,44 @@ #include "common_test_utils/common_utils.hpp" #include "common_test_utils/file_utils.hpp" #include "common_test_utils/ov_tensor_utils.hpp" -#include "common_test_utils/subgraph_builders/2_input_subtract.hpp" -#include "common_test_utils/subgraph_builders/concat_with_params.hpp" -#include "common_test_utils/subgraph_builders/conv_bias.hpp" -#include "common_test_utils/subgraph_builders/conv_pool_relu.hpp" -#include "common_test_utils/subgraph_builders/conv_pool_relu_no_reshapes.hpp" -#include "common_test_utils/subgraph_builders/conv_pool_relu_non_zero.hpp" -#include "common_test_utils/subgraph_builders/convert_transpose.hpp" -#include "common_test_utils/subgraph_builders/detection_output.hpp" -#include "common_test_utils/subgraph_builders/kso_func.hpp" -#include "common_test_utils/subgraph_builders/matmul_bias.hpp" -#include "common_test_utils/subgraph_builders/multi_single_conv.hpp" -#include "common_test_utils/subgraph_builders/multiple_input_outpput_double_concat.hpp" -#include "common_test_utils/subgraph_builders/nested_branch_conv_concat.hpp" -#include "common_test_utils/subgraph_builders/nested_split_conv_concat.hpp" #include "common_test_utils/subgraph_builders/read_concat_split_assign.hpp" #include "common_test_utils/subgraph_builders/single_concat_with_constant.hpp" -#include "common_test_utils/subgraph_builders/single_conv.hpp" -#include "common_test_utils/subgraph_builders/single_split.hpp" -#include "common_test_utils/subgraph_builders/split_concat.hpp" -#include "common_test_utils/subgraph_builders/split_conv_concat.hpp" -#include "common_test_utils/subgraph_builders/split_multi_conv_concat.hpp" #include "common_test_utils/subgraph_builders/ti_with_lstm_cell.hpp" #include "common_test_utils/test_common.hpp" #include "openvino/pass/serialize.hpp" +#include "openvino/util/codec_xor.hpp" namespace { -class CheckWeightlessCacheAccuracy : public ::testing::Test, - public ::testing::WithParamInterface { +typedef std::tuple testParams; + +class CheckWeightlessCacheAccuracy : public ::testing::Test, public ::testing::WithParamInterface { public: - static std::string get_test_case_name(::testing::TestParamInfo obj) { - bool use_compile_model_api = obj.param; + static std::string get_test_case_name(::testing::TestParamInfo obj) { + bool use_compile_model_api_; + bool do_encryption_; + ov::element::Type inference_mode_; + ov::element::Type model_dtype_; + std::tie(use_compile_model_api_, do_encryption_, inference_mode_, model_dtype_) = obj.param; std::ostringstream result; - result << "use_compile_model_api=" << use_compile_model_api; + const char separator = '_'; + result << "use_compile_model_api=" << use_compile_model_api_ << separator; + result << "_do_encryption=" << do_encryption_; + result << "inference_mode=" << inference_mode_ << separator; + result << "model_dtype=" << model_dtype_; return result.str(); } + protected: std::shared_ptr model; std::string xml_path; std::string bin_path; std::string cache_path; - bool use_compile_model_api; // for loading from cache + std::string cache_dir; + bool use_compile_model_api; // for loading from cache + bool do_encryption; + ov::element::Type inference_mode; + ov::element::Type model_dtype; void SetUp() override; void TearDown() override; @@ -61,36 +57,54 @@ void CheckWeightlessCacheAccuracy::SetUp() { xml_path = filePrefix + ".xml"; bin_path = filePrefix + ".bin"; cache_path = filePrefix + ".blob"; - use_compile_model_api = GetParam(); + cache_dir = filePrefix + "_cache_dir"; + + std::tie(use_compile_model_api, do_encryption, inference_mode, model_dtype) = GetParam(); } void CheckWeightlessCacheAccuracy::TearDown() { std::remove(xml_path.c_str()); std::remove(bin_path.c_str()); std::remove(cache_path.c_str()); + + ov::test::utils::removeFilesWithExt(cache_dir, "blob"); + ov::test::utils::removeFilesWithExt(cache_dir, "cl_cache"); + ov::test::utils::removeDir(cache_dir); } void CheckWeightlessCacheAccuracy::run() { - ov::AnyMap config = { ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE) }; - ov::AnyMap config_with_weights_path = { ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), ov::weights_path(bin_path) }; + ov::AnyMap config = {ov::cache_dir(cache_dir), + ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), + ov::hint::inference_precision(inference_mode)}; + ov::AnyMap config_with_weights_path = {ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), + ov::weights_path(bin_path), + ov::hint::inference_precision(inference_mode)}; + + if (do_encryption) { + ov::EncryptionCallbacks encryption_callbacks; + encryption_callbacks.encrypt = ov::util::codec_xor; + encryption_callbacks.decrypt = ov::util::codec_xor; + config.insert(ov::cache_encryption_callbacks(encryption_callbacks)); + config_with_weights_path.insert(ov::cache_encryption_callbacks(encryption_callbacks)); + } auto core = ov::test::utils::PluginCache::get().core(); ov::pass::Serialize(xml_path, bin_path).run_on_model(model); ov::CompiledModel compiled_model; - OV_ASSERT_NO_THROW(compiled_model = core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config)); + compiled_model = core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config); - auto ofstr = std::ofstream(cache_path, std::ofstream::binary); - OV_ASSERT_NO_THROW(compiled_model.export_model(ofstr)); - ofstr.close(); + if (!use_compile_model_api) { + auto ofstr = std::ofstream(cache_path, std::ofstream::binary); + compiled_model.export_model(ofstr); + ofstr.close(); + } auto ifstr = std::ifstream(cache_path, std::ifstream::binary); ov::CompiledModel imported_model; if (use_compile_model_api) { - OV_ASSERT_NO_THROW(imported_model = - core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config)); + imported_model = core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config); } else { - OV_ASSERT_NO_THROW(imported_model = - core->import_model(ifstr, ov::test::utils::DEVICE_GPU, config_with_weights_path)); + imported_model = core->import_model(ifstr, ov::test::utils::DEVICE_GPU, config_with_weights_path); } ifstr.close(); @@ -99,39 +113,58 @@ void CheckWeightlessCacheAccuracy::run() { for (size_t param_idx = 0; param_idx < model->get_parameters().size(); ++param_idx) { auto input = model->get_parameters().at(param_idx); - auto tensor = ov::test::utils::create_and_fill_tensor(input->get_element_type(), input->get_shape()); + auto tensor = ov::test::utils::create_and_fill_tensor_real_distribution(input->get_element_type(), + input->get_shape(), + -100, + 100, + param_idx); orig_req.set_tensor(input, tensor); new_req.set_tensor(input, tensor); } - OV_ASSERT_NO_THROW(orig_req.infer()); - OV_ASSERT_NO_THROW(new_req.infer()); + orig_req.infer(); + new_req.infer(); auto result_vector = model->get_results(); for (auto& res : result_vector) { auto orig_out = orig_req.get_tensor(res); auto new_out = new_req.get_tensor(res); - ov::test::utils::compare(orig_out, new_out); + ov::test::utils::compare(orig_out, new_out, inference_mode); } } TEST_P(CheckWeightlessCacheAccuracy, ReadConcatSplitAssign) { - model = ov::test::utils::make_read_concat_split_assign({1, 1, 2, 4}, ov::element::f16); - run(); + OV_ASSERT_NO_THROW(model = ov::test::utils::make_read_concat_split_assign({1, 1, 2, 4}, model_dtype)); + OV_ASSERT_NO_THROW(run()); } TEST_P(CheckWeightlessCacheAccuracy, SingleConcatWithConstant) { - model = ov::test::utils::make_single_concat_with_constant({1, 1, 2, 4}, ov::element::f16); - run(); + OV_ASSERT_NO_THROW(model = ov::test::utils::make_single_concat_with_constant({1, 1, 2, 4}, model_dtype)); + OV_ASSERT_NO_THROW(run()); } TEST_P(CheckWeightlessCacheAccuracy, TiWithLstmCell) { - model = ov::test::utils::make_ti_with_lstm_cell(ov::element::f16); - run(); + OV_ASSERT_NO_THROW(model = ov::test::utils::make_ti_with_lstm_cell(model_dtype)); + OV_ASSERT_NO_THROW(run()); } -INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy, CheckWeightlessCacheAccuracy, - ::testing::Bool(), +const std::vector inference_modes = { + ov::element::f32, + ov::element::f16, +}; + +const std::vector model_dtypes = { + ov::element::f32, + ov::element::f16, + ov::element::bf16, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy, + CheckWeightlessCacheAccuracy, + ::testing::Combine(::testing::Bool(), + ::testing::Bool(), + ::testing::ValuesIn(inference_modes), + ::testing::ValuesIn(model_dtypes)), CheckWeightlessCacheAccuracy::get_test_case_name); } // namespace diff --git a/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp b/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp index 7abdbcb8c2fc52..7b4f27b5af05b4 100644 --- a/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp @@ -23,11 +23,11 @@ using namespace ov; namespace shape_infer_tests { struct eltwise_test_params { - layout input1_layout; - layout input2_layout; + cldnn::layout input1_layout; + cldnn::layout input2_layout; eltwise_mode mode; AutoBroadcastSpec auto_broadcast_spec; - layout expected_layout; + cldnn::layout expected_layout; std::vector stride; }; diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/multiclass_nms_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/multiclass_nms_gpu_test.cpp index b39c13c209f434..0b23c42e97c1e1 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/multiclass_nms_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/multiclass_nms_gpu_test.cpp @@ -787,20 +787,6 @@ std::vector> getParamsForBlockedLayout(bool is_cac getValues({0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0, - 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, - 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, - 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0, - 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, - 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, - 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, - 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, - 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0, - 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, - 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, - 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0, - 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, - 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, - 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, }), getValues({0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}), std::vector{1, 1}, diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/random_uniform_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/random_uniform_gpu_test.cpp index a6bcc0258afc93..3c95eb19dd7993 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/random_uniform_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/random_uniform_gpu_test.cpp @@ -37,11 +37,15 @@ struct random_uniform_gpu_test : public ::testing::TestWithParam(params.output_shape.size())}, data_type, format}); + {{1, 1, 1, static_cast(params.output_shape.size())}, ov::element::Type_t::i32, format}); auto min_val = engine.allocate_memory(layout(data_type, format::bfyx, {1, 1, 1, 1})); auto max_val = engine.allocate_memory(layout(data_type, format::bfyx, {1, 1, 1, 1})); - set_values(shape, params.output_shape); + std::vector out_shapes; + for (auto x : params.output_shape) + out_shapes.push_back(static_cast(x)); + + set_values(shape, out_shapes); set_values(min_val, {params.min_val}); set_values(max_val, {params.max_val}); diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp index db9666b9485546..5abe4b39fd44f2 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp @@ -386,7 +386,8 @@ DQMatMulGQ2i::DQMatMulGQ2i(Context::Ref ctx) { auto qcoeff = opp::wrap_type(); auto qcvtw = opp::wrap_type({qweight}); auto qmuls = opp::wrap_type({qcvtw, qcoeff}); - auto qreshp = opp::wrap_type({qmuls, opp::any_input()}); + auto qcvtm = opp::optional({qmuls->output(0)}); + auto qreshp = opp::wrap_type({qcvtm, opp::any_input()}); auto qcvtr = opp::optional({qreshp->output(0)}); auto qmmi = opp::any_input(); auto qmm = opp::wrap_type({qmmi, qcvtr}); @@ -398,6 +399,10 @@ DQMatMulGQ2i::DQMatMulGQ2i(Context::Ref ctx) { auto matched_node_qweight = node_to_output.at(qweight).get_node_shared_ptr(); auto matched_node_qcoeff = node_to_output.at(qcoeff).get_node_shared_ptr(); auto matched_node_qmuls = node_to_output.at(qmuls).get_node_shared_ptr(); + std::shared_ptr matched_node_qcvtm = nullptr; + if (node_to_output.count(qcvtm)) { + matched_node_qcvtm = node_to_output.at(qcvtm).get_node_shared_ptr(); + } auto matched_node_matmul = node_to_output.at(qmm).get_node_shared_ptr(); auto matched_node_qreshp = node_to_output.at(qreshp).get_node_shared_ptr(); auto matched_out_mmi = node_to_output.at(qmmi); @@ -426,6 +431,9 @@ DQMatMulGQ2i::DQMatMulGQ2i(Context::Ref ctx) { auto new_transpose_order_c = std::make_shared(ov::element::i32, ov::Shape{3}, new_transpose_order); auto new_transpose = std::make_shared(matched_node_qmuls, new_transpose_order_c); + if (matched_node_qcvtm) { + new_transpose = std::make_shared(matched_node_qcvtm, new_transpose_order_c); + } matched_node_qreshp->input(0).replace_source_output(new_transpose); matched_node_qreshp->validate_and_infer_types(); matched_matmul->validate_and_infer_types(); @@ -660,10 +668,11 @@ DQMatMulGQ2iP::DQMatMulGQ2iP(Context::Ref ctx) { auto qcoeff = opp::wrap_type(); auto qcvtw = opp::wrap_type({qweight}); auto qmuls = opp::wrap_type({qcvtw, qcoeff}); - auto qreshp = opp::wrap_type({qmuls, opp::any_input()}); - auto qcvtm = opp::optional({qreshp->output(0)}); + auto qcvtm = opp::optional({qmuls->output(0)}); + auto qreshp = opp::wrap_type({qcvtm, opp::any_input()}); + auto qcvtr = opp::optional({qreshp->output(0)}); auto qmmi = opp::any_input(); - auto qmm = opp::wrap_type({qmmi, qcvtm}); + auto qmm = opp::wrap_type({qmmi, qcvtr}); // Note: Use [=] to make sure the above objects stay alive in the callback auto callback = [=](ov::pass::pattern::Matcher& m) { @@ -672,6 +681,10 @@ DQMatMulGQ2iP::DQMatMulGQ2iP(Context::Ref ctx) { auto matched_node_qweight = node_to_output.at(qweight).get_node_shared_ptr(); auto matched_node_qcoeff = node_to_output.at(qcoeff).get_node_shared_ptr(); auto matched_node_qmuls = node_to_output.at(qmuls).get_node_shared_ptr(); + std::shared_ptr matched_node_qcvtm = nullptr; + if (node_to_output.count(qcvtm)) { + matched_node_qcvtm = node_to_output.at(qcvtm).get_node_shared_ptr(); + } auto matched_node_matmul = node_to_output.at(qmm).get_node_shared_ptr(); auto matched_node_qreshp = node_to_output.at(qreshp).get_node_shared_ptr(); auto matched_out_mmi = node_to_output.at(qmmi); @@ -703,6 +716,9 @@ DQMatMulGQ2iP::DQMatMulGQ2iP(Context::Ref ctx) { auto new_transpose_order_c = std::make_shared(ov::element::i32, ov::Shape{3}, new_transpose_order); auto new_transpose = std::make_shared(matched_node_qmuls, new_transpose_order_c); + if (matched_node_qcvtm) { + new_transpose = std::make_shared(matched_node_qcvtm, new_transpose_order_c); + } matched_node_qreshp->input(0).replace_source_output(new_transpose); matched_node_qreshp->validate_and_infer_types(); matched_matmul->validate_and_infer_types(); @@ -798,7 +814,8 @@ DQParMMGQ::DQParMMGQ(Context::Ref ctx) { auto qreshp = opp::wrap_type({qmuls, opp::any_input()}); auto qmmi = opp::wrap_type({opp::any_input(), opp::any_input()}); auto qcvtr = opp::optional({qreshp->output(0)}); - auto qmm = opp::wrap_type({qmmi, qcvtr}); + auto qcvtm = opp::optional({qmmi->output(0)}); + auto qmm = opp::wrap_type({qcvtm, qcvtr}); // Note: Use [=] to make sure the above objects stay alive in the callback auto callback = [=](ov::pass::pattern::Matcher& m) { diff --git a/src/plugins/intel_npu/tools/protopipe/README.md b/src/plugins/intel_npu/tools/protopipe/README.md index 00849ad8bddc9a..a6e3e00f94db57 100644 --- a/src/plugins/intel_npu/tools/protopipe/README.md +++ b/src/plugins/intel_npu/tools/protopipe/README.md @@ -60,6 +60,7 @@ log_level: INFO - `ol` - **Optional**. Output layer layout. - `iml` - **Optional**. Input model layout. - `oml` - **Optional**. Output model layout. +- `reshape` - **Optional**. Set shape for input layers. For example, "input1: [1,3,224,224], input2: [1,4]" or "[1,3,224,224]" in case of one input layer. Examples: ``` @@ -515,67 +516,72 @@ Iteration : ## How to build ### Prerequisites -1. Clone `npu-plugin` repository -2. Build OpenCV G-API with OpenVINO/ONNXRT support -#### Build OpenCV G-API with OpenVINO/ONNXRT support -1. Clone OpenCV repo: +1. Build OpenCV G-API with OpenVINO/ONNXRT support +- Clone and build [OpenVINO](https://github.com/openvinotoolkit/openvino) from sources + ``` + mkdir "build" && cd "build" + cmake ../ -DCMAKE_BUILD_TYPE=Release ^ + -DENABLE_PLUGINS_XML=ON ^ + -DCMAKE_INSTALL_PREFIX=install ^ + -DENABLE_DEBUG_CAPS=ON ^ + -DENABLE_NPU_DEBUG_CAPS=ON .. + + cmake --build . --config Release --target install --parallel + ``` +- Init OpenVINO enviroment + ``` + "/setupvars.bat" + ``` +2. Build OpenCV +- Clone OpenCV repo: ``` git clone https://github.com/opencv/opencv - cd opencv && git checkout 78195bc3df + cd opencv && git checkout 3919f33e21 ``` -2. Build OpenCV G-API: - ``` - mkdir -p build && cd build - cmake ../ -DBUILD_LIST=gapi \ - -DCMAKE_BUILD_TYPE=Release \ - -DWITH_OPENVINO=ON \ - -DOpenVINO_DIR= \ - -DWITH_ONNX=ON \ - -DORT_INSTALL_DIR= +- Build OpenCV G-API: + ``` + mkdir "build" && cd "build" + cmake .. -DBUILD_LIST=gapi ^ + -DCMAKE_BUILD_TYPE=Release ^ + -DWITH_OPENVINO=ON cmake --build . --config Release --target opencv_gapi --parallel - ``` -### In-plugin build - -1. Clone and build [OpenVINO](https://github.com/openvinotoolkit/openvino) from sources -2. Build OpenCV G-API with OpenVINO / ONNXRT support -3. Clone `npu-plugin` repository - ``` - git clone https://github.com/openvinotoolkit/npu_plugin - git submodule update --init --recursive - ``` -4. Build `Protopipe` as part of the `npu-plugin` build: - ``` - mkdir build && cd build - cmake ../ -DOpenCV_DIR= -DOpenVINODeveloperPackage_DIR= - cmake --build . --config Release --target protopipe --parallel - ``` - + ``` + If ONNX support is needed build OpenCV G-API with ONNX support: + ``` + mkdir "build" && cd "build" + cmake .. -DBUILD_LIST=gapi ^ + -DCMAKE_BUILD_TYPE=Release ^ + -DWITH_OPENVINO=ON ^ + -DWITH_ONNX=ON ^ + -DORT_INSTALL_DIR= + cmake --build . --config Release --target opencv_gapi --parallel + ``` +### Build Protopipe inside OpenVINO +1. Build protopipe + ``` + cd + mkdir "src/plugins/intel_npu/tools/protopipe/build" && cd "src/plugins/intel_npu/tools/protopipe/build" + cmake ../ -DOpenCV_DIR= -DCMAKE_BUILD_TYPE=Release + cmake --build . --config Release --target protopipe --parallel + ``` ### Standalone build -1. Build `yaml-cpp` - ``` - mkdir -p yaml-cpp_build cd && yaml-cpp_build - cmake ..//thirdparty/yaml-cpp -DCMAKE_INSTALL_PREFIX=install - cmake --build . --config Release --target install --parallel - ``` -2. Build `gflags` - ``` - git clone https://github.com/gflags/gflags - cd gflags - mkdir -p gflags_build cd && gflags_build - cmake ../ -DCMAKE_INSTALL_PREFIX=install - cmake --build . --config Release --target install --parallel - ``` -3. Build `Protopipe` - ``` - mkdir -b protopipe_build && cd protopipe_build - cmake /tools/protopipe/ \ - -DOpenCV_DIR= \ - -Dgflags_DIR= \ - -DOpenVINO_DIR= \ - - cmake --build . --config Release --target protopipe --parallel - ``` +1. Build `gflags` + ``` + git clone https://github.com/gflags/gflags + cd gflags + mkdir "gflags_build" && cd "gflags_build" + cmake ../ -DCMAKE_INSTALL_PREFIX=install + cmake --build . --config Release --target install --parallel + ``` +2. Build `Protopipe` + ``` + mkdir "protopipe_build" && cd "protopipe_build" + cmake /src/plugins/intel_npu/tools/protopipe ^ + -DOpenCV_DIR= ^ + -Dgflags_DIR= + + cmake --build . --config Release --target protopipe --parallel + ``` ### Verify the installation **Note**: Make sure `opencv_*` libraries are visible in the environment: - Windows: diff --git a/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp b/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp index c2a1bd6415d595..b9f03a97ba3f69 100644 --- a/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp +++ b/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp @@ -345,6 +345,10 @@ struct convert { params.output_model_layout = node["oml"].as>(); } + if (node["reshape"]) { + params.reshape = node["reshape"].as>> (); + } + if (node["config"]) { params.config = node["config"].as>(); } diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp index e4568c671438bc..f9c8877b05c53e 100644 --- a/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp @@ -91,6 +91,7 @@ struct OpenVINOParams { LayerVariantAttr output_layout; LayerVariantAttr input_model_layout; LayerVariantAttr output_model_layout; + LayerVariantAttr> reshape; std::map config; size_t nireq = 1u; }; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp index 57527cef0cc4aa..33e01e36404570 100644 --- a/src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp @@ -128,6 +128,15 @@ static void cfgOutputPostproc(ov::preprocess::PrePostProcessor& ppp, const std:: } } +static void cfgReshape(const std::shared_ptr& model, + const AttrMap> reshape_map) { + std::map partial_shapes; + for (const auto& [layer_name, shape] : reshape_map) { + partial_shapes.emplace(layer_name, shape); + } + model->reshape(partial_shapes); +} + static std::vector extractLayerNames(const std::vector>& nodes) { std::vector names; std::transform(nodes.begin(), nodes.end(), std::back_inserter(names), [](const auto& node) { @@ -148,6 +157,9 @@ InOutLayers OpenVINOLayersReader::Impl::readFromModel(const std::string& model_p const auto iml_map = unpackLayerAttr(params.input_model_layout, input_names, "input model layout"); cfgInputPreproc(ppp, model, ip_map, il_map, iml_map); + const auto reshape_map = unpackLayerAttr(params.reshape, input_names, "reshape"); + cfgReshape(model, reshape_map); + const auto& output_names = extractLayerNames(model->outputs()); const auto op_map = unpackLayerAttr(params.output_precision, output_names, "output precision"); const auto ol_map = unpackLayerAttr(params.output_layout, output_names, "output layout"); diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp index 5b1743651b6ef1..5e57ebf8e75e88 100644 --- a/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp @@ -57,6 +57,12 @@ static cv::gapi::GNetPackage getNetPackage(const std::string& tag, const OpenVIN } else if (std::holds_alternative>(params.output_model_layout)) { network->cfgOutputModelLayout(std::get>(params.output_model_layout)); } + + if (std::holds_alternative>>(params.reshape)) { + network->cfgReshape(std::get>>(params.reshape)); + } else if (std::holds_alternative>(params.reshape)) { + network->cfgReshape(std::get>(params.reshape)); + } } return cv::gapi::networks(*network); } diff --git a/src/plugins/template/src/plugin.cpp b/src/plugins/template/src/plugin.cpp index f66df99c7b1c43..20d72f0fad5a60 100644 --- a/src/plugins/template/src/plugin.cpp +++ b/src/plugins/template/src/plugin.cpp @@ -257,15 +257,18 @@ ov::Any ov::template_plugin::Plugin::get_property(const std::string& name, const return ro_properties; }; const auto& default_rw_properties = []() { - std::vector rw_properties{ov::device::id, - ov::enable_profiling, - ov::hint::performance_mode, - ov::hint::num_requests, - ov::hint::inference_precision, - ov::hint::execution_mode, - ov::num_streams, - ov::template_plugin::disable_transformations, - ov::log::level}; + std::vector rw_properties{ + ov::device::id, + ov::enable_profiling, + ov::hint::performance_mode, + ov::hint::num_requests, + ov::hint::inference_precision, + ov::hint::execution_mode, + ov::num_streams, + ov::template_plugin::disable_transformations, + ov::log::level, + ov::hint::model_priority, + }; return rw_properties; }; if (ov::supported_properties == name) { @@ -280,7 +283,9 @@ ov::Any ov::template_plugin::Plugin::get_property(const std::string& name, const } else if (ov::internal::supported_properties == name) { return decltype(ov::internal::supported_properties)::value_type{ ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO}, - ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}}; + ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::inference_num_threads.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::internal::threads_per_stream.name(), ov::PropertyMutability::RW}}; } else if (ov::available_devices == name) { // TODO: fill list of available devices return decltype(ov::available_devices)::value_type{{""}}; diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_infer_request/properties_tests.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_infer_request/properties_tests.hpp index 76b110e9a5e655..26ba7f59245c2f 100644 --- a/src/tests/functional/plugin/shared/include/behavior/ov_infer_request/properties_tests.hpp +++ b/src/tests/functional/plugin/shared/include/behavior/ov_infer_request/properties_tests.hpp @@ -121,6 +121,19 @@ TEST_P(InferRequestPropertiesTest, ReusableCPUStreamsExecutor) { } } } + +TEST_P(InferRequestPropertiesTest, ConfigHasUnsupportedPluginProperty) { + configuration.insert({ov::enable_mmap(false)}); + if (target_device.find(ov::test::utils::DEVICE_AUTO) == std::string::npos && + target_device.find(ov::test::utils::DEVICE_MULTI) == std::string::npos && + target_device.find(ov::test::utils::DEVICE_HETERO) == std::string::npos && + target_device.find(ov::test::utils::DEVICE_BATCH) == std::string::npos) { + OV_ASSERT_NO_THROW(core->set_property(target_device, configuration)); + } + // Compile model to target plugins + execNet = core->compile_model(function, target_device, configuration); + OV_ASSERT_NO_THROW(execNet.create_infer_request()); +} } // namespace behavior } // namespace test } // namespace ov diff --git a/tests/model_hub_tests/transformation_tests/test_stateful_to_stateless_transformation.py b/tests/model_hub_tests/transformation_tests/test_stateful_to_stateless_transformation.py index c3aaa082322fdf..461b5f157814ef 100644 --- a/tests/model_hub_tests/transformation_tests/test_stateful_to_stateless_transformation.py +++ b/tests/model_hub_tests/transformation_tests/test_stateful_to_stateless_transformation.py @@ -8,21 +8,46 @@ import models_hub_common.utils as utils import pytest import os +import re def get_read_value_ops(model: ov.Model): return [op for op in model.get_ops() if op.get_type_name() == 'ReadValue'] -def check_desc_tensors(tensors1, tensors2): - # order of tensors may not match, comparing by the total amount and names - assert len(tensors1) == len(tensors2) - assert set(tuple(t.names) for t in tensors1) == set(tuple(t.names) for t in tensors2) - for t1 in tensors1: - t2_candidates = [t for t in tensors2 if t1.names & t.names] - assert len(t2_candidates) == 1 - t2 = t2_candidates[0] - assert t1.names == t2.names - assert t1.get_partial_shape() == t2.get_partial_shape() - assert t1.get_element_type() == t2.get_element_type() +def check_desc_tensors(expected_tensors, tensors): + # checks if tensors descriptors are same as expected + pattern_input = re.compile(R"input_restored.((past_key_values|present)\.(\d+)\.(key|value))") + + assert len(expected_tensors) == len(tensors) + for expected in expected_tensors: + # The `patch_stateful` in optimum use any name instead found key/value names OV will use names restore path + # Restore expected names to find tensor for compare (can be removed when HG optimum updated) + expected_names = {m[1] if m else name for m, name in ((pattern_input.match(name), name) for name in expected.names)} + # tensor names check is relaxed the expected is sub-set of final names + t_candidates = [t for t in tensors if expected_names.issubset(t.names)] + assert len(t_candidates) == 1 + tensor = t_candidates[0] + assert expected.get_element_type() == tensor.get_element_type() + assert expected.get_partial_shape() == tensor.get_partial_shape() + + +def check_result_desc_tensors(expected_tensors, tensors): + # checks if Result tensors descriptors are same as expected + pattern_restore_output = re.compile(R"output_restored.((past_key_values|present)\.(\d+)\.(key|value))") + pattern_output = re.compile(R"(present\.(\d+)\.(key|value))") + + assert len(expected_tensors) == len(tensors) + for expected in expected_tensors: + # The `patch_stateful` in optimum use any name instead found key/value names OV will use names restore path + # Restore expected names to find tensor for compare (can be removed when HG optimum updated) + expected_names = {name for name in expected.names if not pattern_restore_output.match(name)} + expected_o_names = {name for name in expected.names if pattern_output.match(name)} + expected_names = expected_o_names if expected_o_names else expected_names + t_candidates = [t for t in tensors if expected_names.issubset(t.names)] + assert len(t_candidates) == 1 + tensor = t_candidates[0] + assert expected.get_element_type() == tensor.get_element_type() + assert expected.get_partial_shape() == tensor.get_partial_shape() + @retry(3, exceptions=(OSError,), delay=1) def run_stateful_to_stateless_in_runtime(tmp_path, model_id, model_link): @@ -42,7 +67,7 @@ def run_stateful_to_stateless_in_runtime(tmp_path, model_id, model_link): print(model.model) print(stateless_model.model) check_desc_tensors(model.model.inputs, stateless_model.model.inputs) - check_desc_tensors(model.model.outputs, stateless_model.model.outputs) + check_result_desc_tensors(model.model.outputs, stateless_model.model.outputs) core = ov.Core() core.compile_model(model.model, 'CPU') @@ -57,4 +82,4 @@ def test_stateful_to_stateless_precommit(tmp_path, model_name, model_link, mark, pytest.skip(reason) elif mark == 'xfail': pytest.xfail(reason) - run_stateful_to_stateless_in_runtime(tmp_path, model_name, model_link) \ No newline at end of file + run_stateful_to_stateless_in_runtime(tmp_path, model_name, model_link) diff --git a/tools/benchmark_tool/openvino/__init__.py b/tools/benchmark_tool/openvino/__init__.py index e4d1a247520332..69c678909b1c9e 100644 --- a/tools/benchmark_tool/openvino/__init__.py +++ b/tools/benchmark_tool/openvino/__init__.py @@ -27,11 +27,11 @@ from openvino import properties as properties # Import most important classes and functions from openvino.runtime -from openvino.runtime import Model -from openvino.runtime import Core -from openvino.runtime import CompiledModel -from openvino.runtime import InferRequest -from openvino.runtime import AsyncInferQueue +from openvino._ov_api import Model +from openvino._ov_api import Core +from openvino._ov_api import CompiledModel +from openvino._ov_api import InferRequest +from openvino._ov_api import AsyncInferQueue from openvino.runtime import Symbol from openvino.runtime import Dimension @@ -43,12 +43,13 @@ from openvino.runtime import Tensor from openvino.runtime import OVAny -from openvino.runtime import compile_model +# Helper functions for openvino module +from openvino.runtime.utils.data_helpers import tensor_from_file +from openvino._ov_api import compile_model from openvino.runtime import get_batch from openvino.runtime import set_batch from openvino.runtime import serialize from openvino.runtime import shutdown -from openvino.runtime import tensor_from_file from openvino.runtime import save_model from openvino.runtime import layout_helpers diff --git a/tools/ovc/openvino/__init__.py b/tools/ovc/openvino/__init__.py index e4d1a247520332..69c678909b1c9e 100644 --- a/tools/ovc/openvino/__init__.py +++ b/tools/ovc/openvino/__init__.py @@ -27,11 +27,11 @@ from openvino import properties as properties # Import most important classes and functions from openvino.runtime -from openvino.runtime import Model -from openvino.runtime import Core -from openvino.runtime import CompiledModel -from openvino.runtime import InferRequest -from openvino.runtime import AsyncInferQueue +from openvino._ov_api import Model +from openvino._ov_api import Core +from openvino._ov_api import CompiledModel +from openvino._ov_api import InferRequest +from openvino._ov_api import AsyncInferQueue from openvino.runtime import Symbol from openvino.runtime import Dimension @@ -43,12 +43,13 @@ from openvino.runtime import Tensor from openvino.runtime import OVAny -from openvino.runtime import compile_model +# Helper functions for openvino module +from openvino.runtime.utils.data_helpers import tensor_from_file +from openvino._ov_api import compile_model from openvino.runtime import get_batch from openvino.runtime import set_batch from openvino.runtime import serialize from openvino.runtime import shutdown -from openvino.runtime import tensor_from_file from openvino.runtime import save_model from openvino.runtime import layout_helpers