Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DOCS] Add weightless caching docs #28253

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions docs/articles_en/assets/snippets/ov_caching.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,36 @@ bool cachingSupported = std::find(caps.begin(), caps.end(), ov::device::capabili
}

void part4() {
std::string modelPath = "/tmp/myModel.xml";
std::string device = "GPU";
ov::Core core; // Step 1: create ov::Core object
bool hasGPU = false; // Step 1a: Check if GPU is available
auto devices = core.get_available_devices();
for (auto&& supported : devices) {
hasGPU |= supported.find(device) != std::string::npos;
}
if(!hasGPU) {
return;
}
core.set_property(ov::cache_dir("/path/to/cache/dir")); // Step 1b: Enable caching
//! [ov:caching:part4]
// Note: model path needs to point to the *.xml file, not *.bin when using the IR model format.
auto compiled = core.compile_model(modelPath,
device,
ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE));
//! [ov:caching:part4]
if (!compiled) {
throw std::runtime_error("error");
}
}

void part5() {
std::string modelPath = "/tmp/myModel.xml";
std::string device = "CPU";
ov::Core core; // Step 1: create ov::Core object
core.set_property(ov::cache_dir("/path/to/cache/dir")); // Step 1b: Enable caching
auto model = core.read_model(modelPath); // Step 2: Read Model
//! [ov:caching:part4]
//! [ov:caching:part5]
ov::AnyMap config;
ov::EncryptionCallbacks encryption_callbacks;
static const char codec_key[] = {0x30, 0x60, 0x70, 0x02, 0x04, 0x08, 0x3F, 0x6F, 0x72, 0x74, 0x78, 0x7F};
Expand All @@ -84,13 +108,13 @@ encryption_callbacks.encrypt = codec_xor;
encryption_callbacks.decrypt = codec_xor;
config.insert(ov::cache_encryption_callbacks(encryption_callbacks)); // Step 4: Set device configuration
auto compiled = core.compile_model(model, device, config); // Step 5: LoadNetwork
//! [ov:caching:part4]
//! [ov:caching:part5]
if (!compiled) {
throw std::runtime_error("error");
}
}

void part5() {
void part6() {
std::string modelPath = "/tmp/myModel.xml";
std::string device = "GPU";
ov::Core core; // Step 1: create ov::Core object
Expand All @@ -103,7 +127,7 @@ void part5() {
return;
}
core.set_property(ov::cache_dir("/path/to/cache/dir")); // Step 1b: Enable caching
//! [ov:caching:part5]
//! [ov:caching:part6]
static const char codec_key[] = {0x30, 0x60, 0x70, 0x02, 0x04, 0x08, 0x3F, 0x6F, 0x72, 0x74, 0x78, 0x7F};
auto codec_xor = [&](const std::string& source_str) {
auto key_size = sizeof(codec_key);
Expand All @@ -119,7 +143,7 @@ auto compiled = core.compile_model(modelPath,
device,
ov::cache_encryption_callbacks(ov::EncryptionCallbacks{codec_xor, codec_xor}),
ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE)); // Step 5: Compile model
//! [ov:caching:part5]
//! [ov:caching:part6]
if (!compiled) {
throw std::runtime_error("error");
}
Expand All @@ -133,6 +157,7 @@ int main() {
part3();
part4();
part5();
part6();
} catch (...) {
}
return 0;
Expand Down
16 changes: 13 additions & 3 deletions docs/articles_en/assets/snippets/ov_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@
# ! [ov:caching:part3]

# ! [ov:caching:part4]
core = ov.Core()
if "GPU" in core.available_devices:
core.set_property({props.cache_dir: path_to_cache_dir})
config_cache = {}
config_cache["CACHE_MODE"] = "OPTIMIZE_SIZE"
# Note: model path needs to point to the *.xml file, not *.bin when using the IR model format.
compiled_model = core.compile_model(model=model_path, device_name='GPU', config=config_cache)
# ! [ov:caching:part4]

# ! [ov:caching:part5]
import base64

def encrypt_base64(src):
Expand All @@ -58,9 +68,9 @@ def decrypt_base64(src):
config_cache["CACHE_ENCRYPTION_CALLBACKS"] = [encrypt_base64, decrypt_base64]
model = core.read_model(model=model_path)
compiled_model = core.compile_model(model=model, device_name=device_name, config=config_cache)
# ! [ov:caching:part4]

# ! [ov:caching:part5]

# ! [ov:caching:part6]
import base64

def encrypt_base64(src):
Expand All @@ -76,4 +86,4 @@ def decrypt_base64(src):
config_cache["CACHE_ENCRYPTION_CALLBACKS"] = [encrypt_base64, decrypt_base64]
config_cache["CACHE_MODE"] = "OPTIMIZE_SIZE"
compiled_model = core.compile_model(model=model_path, device_name='GPU', config=config_cache)
# ! [ov:caching:part5]
# ! [ov:caching:part6]
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,10 @@ To check in advance if a particular device supports model caching, your applicat
:language: cpp
:fragment: [ov:caching:part3]

Set "cache_encryption_callbacks" config option to enable cache encryption
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Set ``CacheMode`` property to ``OPTIMIZE_SIZE`` to enable weightless caching
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

If model caching is enabled in the CPU Plugin, the model topology can be encrypted while it is saved to the cache and decrypted when it is loaded from the cache. Currently, this property can be set only in ``compile_model``.
Weightless caching is a feature that allows you to create a cache file which doesn't contain the weights of the model. Instead, the weights are loaded from the original model file. This helps to reduce the size of the cache file.

.. tab-set::

Expand All @@ -157,7 +157,18 @@ If model caching is enabled in the CPU Plugin, the model topology can be encrypt
:language: cpp
:fragment: [ov:caching:part4]

If model caching is enabled in the GPU Plugin, the model topology can be encrypted while it is saved to the cache and decrypted when it is loaded from the cache. Full encryption only works when the ``CacheMode`` property is set to ``OPTIMIZE_SIZE``.
.. important::

Currently, this property is supported only by the GPU Plugin and IR model format.

.. important::

Some weights which undergo transformations during model compilation may not be eligible for weightless caching. In such cases, the cache file will contain these weights while still using the weightless caching mechanism for the rest. The feature supports some of the common transformations and replicates them after loading the model from the cache.

Set "cache_encryption_callbacks" config option to enable cache encryption
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

If model caching is enabled in the CPU Plugin, the model topology can be encrypted while it is saved to the cache and decrypted when it is loaded from the cache. Currently, this property can be set only in ``compile_model``.

.. tab-set::

Expand All @@ -175,6 +186,24 @@ If model caching is enabled in the GPU Plugin, the model topology can be encrypt
:language: cpp
:fragment: [ov:caching:part5]

If model caching is enabled in the GPU Plugin, the model topology can be encrypted while it is saved to the cache and decrypted when it is loaded from the cache. Full encryption only works when the ``CacheMode`` property is set to ``OPTIMIZE_SIZE``.

.. tab-set::

.. tab-item:: Python
:sync: py

.. doxygensnippet:: docs/articles_en/assets/snippets/ov_caching.py
:language: py
:fragment: [ov:caching:part6]

.. tab-item:: C++
:sync: cpp

.. doxygensnippet:: docs/articles_en/assets/snippets/ov_caching.cpp
:language: cpp
:fragment: [ov:caching:part6]

.. important::

Currently, this property is supported only by the CPU and GPU plugins. For other HW plugins, setting this property will not encrypt/decrypt the model topology in cache and will not affect performance.
Loading