diff --git a/docs/articles_en/assets/snippets/ov_caching.cpp b/docs/articles_en/assets/snippets/ov_caching.cpp index f3113438e20642..6b36090e546768 100644 --- a/docs/articles_en/assets/snippets/ov_caching.cpp +++ b/docs/articles_en/assets/snippets/ov_caching.cpp @@ -61,12 +61,36 @@ bool cachingSupported = std::find(caps.begin(), caps.end(), ov::device::capabili } void part4() { + std::string modelPath = "/tmp/myModel.xml"; + std::string device = "GPU"; + ov::Core core; // Step 1: create ov::Core object + bool hasGPU = false; // Step 1a: Check if GPU is available + auto devices = core.get_available_devices(); + for (auto&& supported : devices) { + hasGPU |= supported.find(device) != std::string::npos; + } + if(!hasGPU) { + return; + } + core.set_property(ov::cache_dir("/path/to/cache/dir")); // Step 1b: Enable caching +//! [ov:caching:part4] +// Note: model path needs to point to the *.xml file, not *.bin when using the IR model format. +auto compiled = core.compile_model(modelPath, + device, + ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE)); +//! [ov:caching:part4] + if (!compiled) { + throw std::runtime_error("error"); + } +} + +void part5() { std::string modelPath = "/tmp/myModel.xml"; std::string device = "CPU"; ov::Core core; // Step 1: create ov::Core object core.set_property(ov::cache_dir("/path/to/cache/dir")); // Step 1b: Enable caching auto model = core.read_model(modelPath); // Step 2: Read Model -//! [ov:caching:part4] +//! [ov:caching:part5] ov::AnyMap config; ov::EncryptionCallbacks encryption_callbacks; static const char codec_key[] = {0x30, 0x60, 0x70, 0x02, 0x04, 0x08, 0x3F, 0x6F, 0x72, 0x74, 0x78, 0x7F}; @@ -84,13 +108,13 @@ encryption_callbacks.encrypt = codec_xor; encryption_callbacks.decrypt = codec_xor; config.insert(ov::cache_encryption_callbacks(encryption_callbacks)); // Step 4: Set device configuration auto compiled = core.compile_model(model, device, config); // Step 5: LoadNetwork -//! [ov:caching:part4] +//! [ov:caching:part5] if (!compiled) { throw std::runtime_error("error"); } } -void part5() { +void part6() { std::string modelPath = "/tmp/myModel.xml"; std::string device = "GPU"; ov::Core core; // Step 1: create ov::Core object @@ -103,7 +127,7 @@ void part5() { return; } core.set_property(ov::cache_dir("/path/to/cache/dir")); // Step 1b: Enable caching -//! [ov:caching:part5] +//! [ov:caching:part6] static const char codec_key[] = {0x30, 0x60, 0x70, 0x02, 0x04, 0x08, 0x3F, 0x6F, 0x72, 0x74, 0x78, 0x7F}; auto codec_xor = [&](const std::string& source_str) { auto key_size = sizeof(codec_key); @@ -119,7 +143,7 @@ auto compiled = core.compile_model(modelPath, device, ov::cache_encryption_callbacks(ov::EncryptionCallbacks{codec_xor, codec_xor}), ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE)); // Step 5: Compile model -//! [ov:caching:part5] +//! [ov:caching:part6] if (!compiled) { throw std::runtime_error("error"); } @@ -133,6 +157,7 @@ int main() { part3(); part4(); part5(); + part6(); } catch (...) { } return 0; diff --git a/docs/articles_en/assets/snippets/ov_caching.py b/docs/articles_en/assets/snippets/ov_caching.py index b4534ebcd2d9c3..02262b18927040 100644 --- a/docs/articles_en/assets/snippets/ov_caching.py +++ b/docs/articles_en/assets/snippets/ov_caching.py @@ -44,6 +44,16 @@ # ! [ov:caching:part3] # ! [ov:caching:part4] +core = ov.Core() +if "GPU" in core.available_devices: + core.set_property({props.cache_dir: path_to_cache_dir}) + config_cache = {} + config_cache["CACHE_MODE"] = "OPTIMIZE_SIZE" + # Note: model path needs to point to the *.xml file, not *.bin when using the IR model format. + compiled_model = core.compile_model(model=model_path, device_name='GPU', config=config_cache) +# ! [ov:caching:part4] + +# ! [ov:caching:part5] import base64 def encrypt_base64(src): @@ -58,9 +68,9 @@ def decrypt_base64(src): config_cache["CACHE_ENCRYPTION_CALLBACKS"] = [encrypt_base64, decrypt_base64] model = core.read_model(model=model_path) compiled_model = core.compile_model(model=model, device_name=device_name, config=config_cache) -# ! [ov:caching:part4] - # ! [ov:caching:part5] + +# ! [ov:caching:part6] import base64 def encrypt_base64(src): @@ -76,4 +86,4 @@ def decrypt_base64(src): config_cache["CACHE_ENCRYPTION_CALLBACKS"] = [encrypt_base64, decrypt_base64] config_cache["CACHE_MODE"] = "OPTIMIZE_SIZE" compiled_model = core.compile_model(model=model_path, device_name='GPU', config=config_cache) -# ! [ov:caching:part5] +# ! [ov:caching:part6] diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst index b3253f775bdb02..a58a0c8dd8c27b 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst @@ -136,10 +136,10 @@ To check in advance if a particular device supports model caching, your applicat :language: cpp :fragment: [ov:caching:part3] -Set "cache_encryption_callbacks" config option to enable cache encryption -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Set ``CacheMode`` property to ``OPTIMIZE_SIZE`` to enable weightless caching +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -If model caching is enabled in the CPU Plugin, the model topology can be encrypted while it is saved to the cache and decrypted when it is loaded from the cache. Currently, this property can be set only in ``compile_model``. +Weightless caching is a feature that allows you to create a cache file which doesn't contain the weights of the model. Instead, the weights are loaded from the original model file. This helps to reduce the size of the cache file. .. tab-set:: @@ -157,7 +157,18 @@ If model caching is enabled in the CPU Plugin, the model topology can be encrypt :language: cpp :fragment: [ov:caching:part4] -If model caching is enabled in the GPU Plugin, the model topology can be encrypted while it is saved to the cache and decrypted when it is loaded from the cache. Full encryption only works when the ``CacheMode`` property is set to ``OPTIMIZE_SIZE``. +.. important:: + + Currently, this property is supported only by the GPU Plugin and IR model format. + +.. important:: + + Some weights which undergo transformations during model compilation may not be eligible for weightless caching. In such cases, the cache file will contain these weights while still using the weightless caching mechanism for the rest. The feature supports some of the common transformations and replicates them after loading the model from the cache. + +Set "cache_encryption_callbacks" config option to enable cache encryption ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +If model caching is enabled in the CPU Plugin, the model topology can be encrypted while it is saved to the cache and decrypted when it is loaded from the cache. Currently, this property can be set only in ``compile_model``. .. tab-set:: @@ -175,6 +186,24 @@ If model caching is enabled in the GPU Plugin, the model topology can be encrypt :language: cpp :fragment: [ov:caching:part5] +If model caching is enabled in the GPU Plugin, the model topology can be encrypted while it is saved to the cache and decrypted when it is loaded from the cache. Full encryption only works when the ``CacheMode`` property is set to ``OPTIMIZE_SIZE``. + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_caching.py + :language: py + :fragment: [ov:caching:part6] + + .. tab-item:: C++ + :sync: cpp + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_caching.cpp + :language: cpp + :fragment: [ov:caching:part6] + .. important:: Currently, this property is supported only by the CPU and GPU plugins. For other HW plugins, setting this property will not encrypt/decrypt the model topology in cache and will not affect performance.