diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index 8a2985a284769a..aa067da4f360fd 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -8,6 +8,7 @@ #include #include "itt.hpp" +#include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "openvino/op/ops.hpp" #include "openvino/pass/constant_folding.hpp" #include "openvino/pass/manager.hpp" @@ -1405,6 +1406,13 @@ bool fuse_type_to_constant(const std::shared_ptr& node, new_const->validate_and_infer_types(); new_const->set_friendly_name(constant->get_friendly_name()); ov::copy_runtime_info(constant, new_const); + + const auto& rt_info = node->get_rt_info(); + auto weightless_caching_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static()); + if (weightless_caching_attr != rt_info.end()) { + new_const->get_rt_info()[ov::WeightlessCacheAttribute::get_type_info_static()] = + weightless_caching_attr->second; + } return true; } return false; diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index 318f15ab1a64dc..c2b7133506aebe 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -13,6 +13,7 @@ #include "common_test_utils/ov_test_utils.hpp" #include "openvino/core/model.hpp" +#include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "openvino/opsets/opset1.hpp" #include "openvino/opsets/opset10.hpp" #include "openvino/opsets/opset15.hpp" @@ -2702,3 +2703,38 @@ TEST(TransformationTests, ConvertPrecision_assign_read_value_preserve_orig_types FunctionsComparator::Result result = func_comparator(model_ref, model); ASSERT_TRUE(result.valid) << result.message; } + +TEST(TransformationTests, ConvertPrecision_assign_read_value_preserve_weightless_cache_info_as_rt_attribute) { + pass::Manager manager; + + auto some_value = opset10::Constant::create(element::f32, Shape{1}, {2}); + auto& node_rt_info = some_value->get_rt_info(); + ov::WeightlessCacheAttribute attr(element::f32.size(), 0, element::f32); + node_rt_info[ov::WeightlessCacheAttribute::get_type_info_static()] = attr; + + ov::ParameterVector inputParams; + ov::ResultVector results; + results.push_back(std::make_shared(some_value->output(0))); + auto model = std::make_shared(results, inputParams); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = false; + bool convert_input_output_precision = false; + bool store_original_precision_as_rt_attribute = true; + manager.register_pass(precisions_map{{element::f32, element::f16}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32, + convert_input_output_precision, + store_original_precision_as_rt_attribute); + manager.run_passes(model); + + const auto& ops = model->get_ops(); + auto it = std::find_if(ops.begin(), ops.end(), [](const std::shared_ptr& node) { + return ov::op::util::is_constant(node); + }); + + ASSERT_TRUE(it != ops.end()); + const auto& new_rt_info = (*it)->get_rt_info(); + auto weightless_caching_attr_it = new_rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static()); + ASSERT_TRUE(weightless_caching_attr_it != new_rt_info.end()); +} diff --git a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp index fedcb030fb52cf..e3cf2609b26c8d 100644 --- a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp +++ b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp @@ -5,6 +5,7 @@ #pragma once #include "openvino/core/core_visibility.hpp" +#include "openvino/core/node.hpp" #include "openvino/core/runtime_attribute.hpp" namespace ov { @@ -25,14 +26,16 @@ class OPENVINO_API WeightlessCacheAttribute : public RuntimeAttribute { WeightlessCacheAttribute() = delete; - WeightlessCacheAttribute(size_t original_size, size_t bin_offset) + WeightlessCacheAttribute(size_t original_size, size_t bin_offset, ov::element::Type original_dtype) : original_size(original_size), - bin_offset(bin_offset) {} + bin_offset(bin_offset), + original_dtype(original_dtype) {} bool is_copyable() const override; size_t original_size; size_t bin_offset; + ov::element::Type original_dtype; }; } // namespace ov diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp index 2d1dfba956ea72..d7e250f9916302 100644 --- a/src/frontends/ir/src/ir_deserializer.cpp +++ b/src/frontends/ir/src/ir_deserializer.cpp @@ -950,10 +950,12 @@ std::shared_ptr ov::XmlDeserializer::create_node(const std::vector(pugixml::get_uint64_attr(dn, "size")), - static_cast(pugixml::get_uint64_attr(dn, "offset"))); + static_cast(pugixml::get_uint64_attr(dn, "offset")), + ov::element::Type(pugixml::get_str_attr(dn, "element_type"))); } } diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp index 461f063ec26bc5..8a9a35b1e92fe9 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp @@ -4,15 +4,170 @@ #pragma once #include +#include #include "intel_gpu/runtime/engine.hpp" #include "intel_gpu/runtime/memory.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/util/op_types.hpp" +#include "openvino/pass/manager.hpp" #include "openvino/runtime/shared_buffer.hpp" #include "openvino/util/mmap_object.hpp" #include "primitive.hpp" +#include "transformations/convert_precision.hpp" namespace cldnn { +struct weights_mem { + std::shared_ptr>> shared_buf = nullptr; + std::shared_ptr transformed_constant = nullptr; + + const uint8_t* get_loaded_data() { + if (transformed_constant) { + return reinterpret_cast(transformed_constant->get_data_ptr()); + } + OPENVINO_ASSERT(shared_buf); + return shared_buf->get_ptr(); + } +}; + +struct weightless_cache_manager { + void set_constant_info(size_t bin_offset, + size_t original_size, + ov::element::Type original_dtype, + ov::element::Type curr_dtype, + ov::Shape shape) { + this->bin_offset = bin_offset; + this->original_size = original_size; + this->original_dtype = original_dtype; + this->curr_dtype = curr_dtype; + this->shape = shape; + do_weightless_caching = true; + + if (original_dtype != curr_dtype) { + do_precision_conversion = true; + } + } + + void invalidate() { + do_weightless_caching = false; + } + + void set_new_dtype(ov::element::Type curr_dtype) { + this->curr_dtype = curr_dtype; + do_precision_conversion = original_dtype != curr_dtype; + } + + bool save(BinaryOutputBuffer& ob, size_t data_size) const { + if (!do_weightless_caching) { + ob << false; + return false; + } + + ob << true; + ob << bin_offset; + ob << do_precision_conversion; + if (do_precision_conversion) { + ob << original_size; + ob << make_data(&original_dtype, sizeof(ov::element::Type)); + ob << make_data(&curr_dtype, sizeof(ov::element::Type)); + + size_t num_dims = shape.size(); + ob << make_data(&num_dims, sizeof(size_t)); + ob << make_data(shape.data(), num_dims * sizeof(ov::Shape::value_type)); + } + return true; + } + + std::shared_ptr load(BinaryInputBuffer& ib, + std::shared_ptr mapped_weights, + size_t data_size) { + ib >> do_weightless_caching; + if (!do_weightless_caching) { + return nullptr; + } + + OPENVINO_ASSERT(mapped_weights != nullptr, "mmap object is null"); + + ib >> bin_offset; + ib >> do_precision_conversion; + if (do_precision_conversion) { + ib >> original_size; + ib >> make_data(&original_dtype, sizeof(ov::element::Type)); + ib >> make_data(&curr_dtype, sizeof(ov::element::Type)); + + size_t num_dims = 0; + ib >> make_data(&num_dims, sizeof(size_t)); + shape.resize(num_dims); + ib >> make_data(shape.data(), num_dims * sizeof(ov::Shape::value_type)); + } else { + original_size = data_size; + } + + auto mem_obj = std::make_shared(); + mem_obj->shared_buf = std::make_shared>>( + mapped_weights->data() + bin_offset, + original_size, + mapped_weights); + + if (should_run_transformations()) { + run_transformations(mem_obj); + } + return mem_obj; + } + +private: + bool do_weightless_caching = false; + bool do_precision_conversion = false; + + size_t bin_offset = SIZE_MAX; + size_t original_size = SIZE_MAX; + ov::element::Type original_dtype = ov::element::Type_t::undefined; + ov::element::Type curr_dtype = ov::element::Type_t::undefined; + ov::Shape shape; + + bool should_run_transformations() { + return do_precision_conversion; + } + + void run_transformations(std::shared_ptr mem_obj) { + auto orig_constant = std::make_shared(original_dtype, + shape, + mem_obj->shared_buf->get_ptr(), + mem_obj->shared_buf); + + ov::ParameterVector inputParams; + ov::ResultVector results; + results.push_back(std::make_shared(orig_constant->output(0))); + auto model = std::make_shared(results, inputParams, "aux"); + + ov::pass::Manager manager("Plugin:GPU:weightless_cache_transformations"); + + if (do_precision_conversion) { + precisions_map fp_convert_precision_map = { + {original_dtype, curr_dtype}}; + type_to_fuse_map empty_fuse_map = {}; + const bool keep_precision_sensitive_in_fp32 = false; + const bool convert_input_output_precision = false; + const bool store_original_precision_as_rt_attribute = true; + manager.register_pass(fp_convert_precision_map, + empty_fuse_map, + keep_precision_sensitive_in_fp32, + convert_input_output_precision, + store_original_precision_as_rt_attribute); + } + + manager.run_passes(model); + const auto& ops = model->get_ops(); + auto it = std::find_if(ops.begin(), ops.end(), [](const std::shared_ptr& node) { + return ov::op::util::is_constant(node); + }); + OPENVINO_ASSERT(it != ops.end()); + mem_obj->transformed_constant = std::dynamic_pointer_cast(*it); + OPENVINO_ASSERT(mem_obj->transformed_constant->get_element_type() == curr_dtype); + } +}; + /// @brief Provides input data to topology. /// @details This primitive allows to pass data which is known at topology creation. /// For example, weights and biases for scoring networks. @@ -20,21 +175,32 @@ namespace cldnn { struct data : public primitive_base { CLDNN_DECLARE_PRIMITIVE(data) - data() : primitive_base("", {}) {} + data() : primitive_base("", {}) { + cache_info = std::make_shared(); + } /// @brief Constructs data primitive. /// @param id This primitive id. /// @param mem @ref memory object which contains data. /// @note If memory is attached by memory::attach(), the attached buffer should be valid till network build. - data(const primitive_id& id, memory::ptr mem) - : primitive_base(id, {}), mem(std::move(mem)) {} + data(const primitive_id& id, memory::ptr mem) : primitive_base(id, {}), mem(std::move(mem)) { + cache_info = std::make_shared(); + } + + data(const primitive_id& id, memory::ptr mem, std::shared_ptr cache_info) + : primitive_base(id, {}), + mem(std::move(mem)), + cache_info(cache_info) { + if (!cache_info) { + this->cache_info = std::make_shared(); + } + } /// @brief @ref memory object which contains data. /// @note If memory is attached by memory::attach(), the attached buffer should be valid till network build. memory::ptr mem; - size_t original_size = SIZE_MAX; - size_t bin_offset = SIZE_MAX; + std::shared_ptr cache_info; size_t hash() const override { size_t seed = primitive::hash(); @@ -53,13 +219,8 @@ struct data : public primitive_base { size_t data_size = mem->size(); ob << make_data(&data_size, sizeof(size_t)); - bool is_cache_without_weights = bin_offset != SIZE_MAX && data_size == original_size; - - if (is_cache_without_weights) { - ob << true; - ob << bin_offset; - } else { - ob << false; + bool do_weightless_caching = cache_info->save(ob, data_size); + if (!do_weightless_caching) { if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { ob << make_data(mem->buffer_ptr(), data_size); } else { @@ -88,26 +249,12 @@ struct data : public primitive_base { mem = ib.get_engine().allocate_memory(output_layout, _allocation_type, false); - bool is_cache_without_weights; - ib >> is_cache_without_weights; - if (is_cache_without_weights && mapped_weights == nullptr) { - OPENVINO_THROW("mmap object is null"); - } - - std::shared_ptr>> shared_buf; - if (is_cache_without_weights) { - ib >> bin_offset; - original_size = data_size; - - shared_buf = std::make_shared>>( - mapped_weights->data() + bin_offset, - data_size, - mapped_weights); - } + auto mem_obj = cache_info->load(ib, mapped_weights, data_size); + bool is_weightless_caching_enabled = mem_obj != nullptr; if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { - if (is_cache_without_weights) { - std::memcpy(reinterpret_cast(mem->buffer_ptr()), shared_buf->get_ptr(), data_size); + if (is_weightless_caching_enabled) { + std::memcpy(reinterpret_cast(mem->buffer_ptr()), mem_obj->get_loaded_data(), data_size); } else { ib >> make_data(mem->buffer_ptr(), data_size); } @@ -116,8 +263,8 @@ struct data : public primitive_base { auto& strm = ib.get_engine().get_service_stream(); if (data_size < DATA_BLOCK_SIZE || output_layout.format.is_image_2d()) { std::vector _buf(data_size); - if (is_cache_without_weights) { - std::memcpy(reinterpret_cast(_buf.data()), shared_buf->get_ptr(), data_size); + if (is_weightless_caching_enabled) { + std::memcpy(reinterpret_cast(_buf.data()), mem_obj->get_loaded_data(), data_size); } else { ib >> make_data(_buf.data(), data_size); } @@ -135,9 +282,9 @@ struct data : public primitive_base { size_t copy_size = (data_size > (dst_offset + DATA_BLOCK_SIZE)) ? DATA_BLOCK_SIZE : (data_size - dst_offset); if (buf_flag) { - if (is_cache_without_weights) { + if (is_weightless_caching_enabled) { std::memcpy(reinterpret_cast(_buf1.data()), - shared_buf->get_ptr() + dst_offset, + mem_obj->get_loaded_data() + dst_offset, copy_size); } else { ib >> make_data(_buf1.data(), copy_size); @@ -148,9 +295,9 @@ struct data : public primitive_base { } ev1 = mem->copy_from(strm, _buf1.data(), src_offset, dst_offset, copy_size, is_blocking); } else { - if (is_cache_without_weights) { + if (is_weightless_caching_enabled) { std::memcpy(reinterpret_cast(_buf2.data()), - shared_buf->get_ptr() + dst_offset, + mem_obj->get_loaded_data() + dst_offset, copy_size); } else { ib >> make_data(_buf2.data(), copy_size); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp index 85173e9eb33e7c..a4129800733875 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp @@ -74,11 +74,14 @@ void propagate_constants::run(program& p) { // replace all constant nodes which are relevant for inference (either used by non-const user or marked as output) // with recomputed cldnn::data for (auto& cout : to_replace) { - auto& id_to_replace = cout.first; - auto mem_impl = cout.second; - - auto const_data = - std::make_shared("_cldnn_const_prop_" + id_to_replace, mem_impl /* <<< REMOVE ME WHEN POSSIBLE */); + auto& id_to_replace = std::get<0>(cout); + auto mem_impl = std::get<1>(cout); + auto cache_info = std::get<2>(cout); + auto in_layout = std::get<3>(cout); + + auto const_data = std::make_shared("_cldnn_const_prop_" + id_to_replace, + mem_impl, /* <<< REMOVE ME WHEN POSSIBLE */ + cache_info); auto& new_node = p.get_or_create(const_data); auto& curr_node = p.get_node(id_to_replace); @@ -92,6 +95,25 @@ void propagate_constants::run(program& p) { } } + auto is_reorder_with_only_dtype_change = [&](program_node& dst) { + if (!in_layout) { + return false; + } + auto& dst_layout = dst.get_output_layout(); + if (in_layout->data_type == dst_layout.data_type) { + return false; + } + + auto aux_layout = dst_layout; + aux_layout.data_type = in_layout->data_type; + return aux_layout == *in_layout.get(); + }; + if (is_reorder_with_only_dtype_change(new_node)) { + new_node.as().get_primitive()->cache_info->set_new_dtype(new_node.get_output_layout().data_type); + } else { + new_node.as().get_primitive()->cache_info->invalidate(); + } + curr_node.dependencies.clear(); // remove all constant users (as they will be either removed or replaced by cldnn::data which does not have any // dependencies) @@ -113,9 +135,10 @@ bool propagate_constants::has_non_const_user(program_node& node) const { return false; } -std::list> propagate_constants::calculate(engine& engine, - const ExecutionConfig& config, - std::shared_ptr task_executor) { +std::list, std::shared_ptr>> +propagate_constants::calculate(engine& engine, + const ExecutionConfig& config, + std::shared_ptr task_executor) { if (!has_non_trivial_constants) return {}; @@ -123,15 +146,37 @@ std::list> propagate_constants::calculate(e cf_config.set_property(ov::intel_gpu::optimize_data(false)); cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs)); network::ptr net = network::build_network(engine, nodes, cf_config, task_executor, true); - for (auto& cin : const_inputs) + std::map, std::shared_ptr>> + weightless_cache_map; + for (auto& cin : const_inputs) { net->set_input_data(cin->id(), cin->get_attached_memory_ptr()); + auto users = cin->get_users(); + if (users.size() == 1 && users.front()->is_type()) { + auto rprim = users.front()->as().get_primitive(); + auto id = rprim->id; + auto cache_ptr = cin->as().get_primitive()->cache_info; + auto layout_ptr = std::make_shared(cin->get_output_layout()); + weightless_cache_map.emplace(id, std::make_pair(cache_ptr, layout_ptr)); + } + } + net->execute({}); net->reset_execution(true); // wait for computations to complete auto outputs = net->get_outputs(); - std::list> ret; - for (auto& out : outputs) ret.push_back({out->id(), out->output_memory_ptr()}); + std::list, std::shared_ptr>> + ret; + for (auto& out : outputs) { + std::shared_ptr cache_ptr = nullptr; + std::shared_ptr layout_ptr = nullptr; + auto it = weightless_cache_map.find(out->id()); + if (it != weightless_cache_map.end()) { + cache_ptr = it->second.first; + layout_ptr = it->second.second; + } + ret.push_back({out->id(), out->output_memory_ptr(), cache_ptr, layout_ptr}); + } return ret; } diff --git a/src/plugins/intel_gpu/src/graph/include/pass_manager.h b/src/plugins/intel_gpu/src/graph/include/pass_manager.h index 490076a37f788e..0b7c3d85c37e27 100644 --- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h +++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h @@ -211,9 +211,10 @@ class propagate_constants : public base_pass { private: void run(program& p) override; - std::list> calculate(engine& engine, - const ExecutionConfig& config, - std::shared_ptr task_executor); + std::list, std::shared_ptr>> + calculate(engine& engine, + const ExecutionConfig& config, + std::shared_ptr task_executor); bool has_non_const_user(program_node& node) const; void handle_constant(program& prog, program_node& node); void add_constant(program& prog, program_node& node); diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index 368e25abe2ddac..a9bb813d0ce587 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -14,6 +14,7 @@ #include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/plugin/program_builder.hpp" +#include "intel_gpu/primitives/data.hpp" #include "intel_gpu/runtime/itt.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" #include "intel_gpu/primitives/mutable_data.hpp" @@ -311,11 +312,15 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrm_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) { if (auto data_prim = dynamic_cast(prim.get())) { auto rt_info = op.get_rt_info(); + auto weightless_cache_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static()); if (weightless_cache_attr != rt_info.end()) { - data_prim->bin_offset = weightless_cache_attr->second.as().bin_offset; - data_prim->original_size = - weightless_cache_attr->second.as().original_size; + auto& attr = weightless_cache_attr->second.as(); + data_prim->cache_info->set_constant_info(attr.bin_offset, + attr.original_size, + attr.original_dtype, + op.get_output_element_type(0), + op.get_output_shape(0)); } } } diff --git a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp index 839b2640ca180c..17e1ed6d0a9bbe 100644 --- a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp +++ b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp @@ -8,48 +8,40 @@ #include "common_test_utils/common_utils.hpp" #include "common_test_utils/file_utils.hpp" #include "common_test_utils/ov_tensor_utils.hpp" -#include "common_test_utils/subgraph_builders/2_input_subtract.hpp" -#include "common_test_utils/subgraph_builders/concat_with_params.hpp" -#include "common_test_utils/subgraph_builders/conv_bias.hpp" -#include "common_test_utils/subgraph_builders/conv_pool_relu.hpp" -#include "common_test_utils/subgraph_builders/conv_pool_relu_no_reshapes.hpp" -#include "common_test_utils/subgraph_builders/conv_pool_relu_non_zero.hpp" -#include "common_test_utils/subgraph_builders/convert_transpose.hpp" -#include "common_test_utils/subgraph_builders/detection_output.hpp" -#include "common_test_utils/subgraph_builders/kso_func.hpp" -#include "common_test_utils/subgraph_builders/matmul_bias.hpp" -#include "common_test_utils/subgraph_builders/multi_single_conv.hpp" -#include "common_test_utils/subgraph_builders/multiple_input_outpput_double_concat.hpp" -#include "common_test_utils/subgraph_builders/nested_branch_conv_concat.hpp" -#include "common_test_utils/subgraph_builders/nested_split_conv_concat.hpp" #include "common_test_utils/subgraph_builders/read_concat_split_assign.hpp" #include "common_test_utils/subgraph_builders/single_concat_with_constant.hpp" -#include "common_test_utils/subgraph_builders/single_conv.hpp" -#include "common_test_utils/subgraph_builders/single_split.hpp" -#include "common_test_utils/subgraph_builders/split_concat.hpp" -#include "common_test_utils/subgraph_builders/split_conv_concat.hpp" -#include "common_test_utils/subgraph_builders/split_multi_conv_concat.hpp" #include "common_test_utils/subgraph_builders/ti_with_lstm_cell.hpp" #include "common_test_utils/test_common.hpp" #include "openvino/pass/serialize.hpp" namespace { -class CheckWeightlessCacheAccuracy : public ::testing::Test, - public ::testing::WithParamInterface { +typedef std::tuple testParams; + +class CheckWeightlessCacheAccuracy : public ::testing::Test, public ::testing::WithParamInterface { public: - static std::string get_test_case_name(::testing::TestParamInfo obj) { - bool use_compile_model_api = obj.param; + static std::string get_test_case_name(::testing::TestParamInfo obj) { + bool use_compile_model_api_; + ov::element::Type inference_mode_; + ov::element::Type model_dtype_; + std::tie(use_compile_model_api_, inference_mode_, model_dtype_) = obj.param; std::ostringstream result; - result << "use_compile_model_api=" << use_compile_model_api; + const char separator = '_'; + result << "use_compile_model_api=" << use_compile_model_api_ << separator; + result << "inference_mode=" << inference_mode_ << separator; + result << "model_dtype=" << model_dtype_; return result.str(); } + protected: std::shared_ptr model; std::string xml_path; std::string bin_path; std::string cache_path; - bool use_compile_model_api; // for loading from cache + std::string cache_dir; + bool use_compile_model_api; // for loading from cache + ov::element::Type inference_mode; + ov::element::Type model_dtype; void SetUp() override; void TearDown() override; @@ -61,36 +53,46 @@ void CheckWeightlessCacheAccuracy::SetUp() { xml_path = filePrefix + ".xml"; bin_path = filePrefix + ".bin"; cache_path = filePrefix + ".blob"; - use_compile_model_api = GetParam(); + cache_dir = filePrefix + "_cache_dir"; + + std::tie(use_compile_model_api, inference_mode, model_dtype) = GetParam(); } void CheckWeightlessCacheAccuracy::TearDown() { std::remove(xml_path.c_str()); std::remove(bin_path.c_str()); std::remove(cache_path.c_str()); + + ov::test::utils::removeFilesWithExt(cache_dir, "blob"); + ov::test::utils::removeFilesWithExt(cache_dir, "cl_cache"); + ov::test::utils::removeDir(cache_dir); } void CheckWeightlessCacheAccuracy::run() { - ov::AnyMap config = { ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE) }; - ov::AnyMap config_with_weights_path = { ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), ov::weights_path(bin_path) }; + ov::AnyMap config = {ov::cache_dir(cache_dir), + ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), + ov::hint::inference_precision(inference_mode)}; + ov::AnyMap config_with_weights_path = {ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), + ov::weights_path(bin_path), + ov::hint::inference_precision(inference_mode)}; auto core = ov::test::utils::PluginCache::get().core(); ov::pass::Serialize(xml_path, bin_path).run_on_model(model); ov::CompiledModel compiled_model; - OV_ASSERT_NO_THROW(compiled_model = core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config)); + compiled_model = core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config); - auto ofstr = std::ofstream(cache_path, std::ofstream::binary); - OV_ASSERT_NO_THROW(compiled_model.export_model(ofstr)); - ofstr.close(); + if (!use_compile_model_api) { + auto ofstr = std::ofstream(cache_path, std::ofstream::binary); + compiled_model.export_model(ofstr); + ofstr.close(); + } auto ifstr = std::ifstream(cache_path, std::ifstream::binary); ov::CompiledModel imported_model; if (use_compile_model_api) { - OV_ASSERT_NO_THROW(imported_model = - core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config)); + imported_model = core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config); } else { - OV_ASSERT_NO_THROW(imported_model = - core->import_model(ifstr, ov::test::utils::DEVICE_GPU, config_with_weights_path)); + imported_model = core->import_model(ifstr, ov::test::utils::DEVICE_GPU, config_with_weights_path); } ifstr.close(); @@ -99,39 +101,57 @@ void CheckWeightlessCacheAccuracy::run() { for (size_t param_idx = 0; param_idx < model->get_parameters().size(); ++param_idx) { auto input = model->get_parameters().at(param_idx); - auto tensor = ov::test::utils::create_and_fill_tensor(input->get_element_type(), input->get_shape()); + auto tensor = ov::test::utils::create_and_fill_tensor_real_distribution(input->get_element_type(), + input->get_shape(), + -100, + 100, + param_idx); orig_req.set_tensor(input, tensor); new_req.set_tensor(input, tensor); } - OV_ASSERT_NO_THROW(orig_req.infer()); - OV_ASSERT_NO_THROW(new_req.infer()); + orig_req.infer(); + new_req.infer(); auto result_vector = model->get_results(); for (auto& res : result_vector) { auto orig_out = orig_req.get_tensor(res); auto new_out = new_req.get_tensor(res); - ov::test::utils::compare(orig_out, new_out); + ov::test::utils::compare(orig_out, new_out, inference_mode); } } TEST_P(CheckWeightlessCacheAccuracy, ReadConcatSplitAssign) { - model = ov::test::utils::make_read_concat_split_assign({1, 1, 2, 4}, ov::element::f16); - run(); + OV_ASSERT_NO_THROW(model = ov::test::utils::make_read_concat_split_assign({1, 1, 2, 4}, model_dtype)); + OV_ASSERT_NO_THROW(run()); } TEST_P(CheckWeightlessCacheAccuracy, SingleConcatWithConstant) { - model = ov::test::utils::make_single_concat_with_constant({1, 1, 2, 4}, ov::element::f16); - run(); + OV_ASSERT_NO_THROW(model = ov::test::utils::make_single_concat_with_constant({1, 1, 2, 4}, model_dtype)); + OV_ASSERT_NO_THROW(run()); } TEST_P(CheckWeightlessCacheAccuracy, TiWithLstmCell) { - model = ov::test::utils::make_ti_with_lstm_cell(ov::element::f16); - run(); + OV_ASSERT_NO_THROW(model = ov::test::utils::make_ti_with_lstm_cell(model_dtype)); + OV_ASSERT_NO_THROW(run()); } -INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy, CheckWeightlessCacheAccuracy, - ::testing::Bool(), +const std::vector inference_modes = { + ov::element::f32, + ov::element::f16, +}; + +const std::vector model_dtypes = { + ov::element::f32, + ov::element::f16, + ov::element::bf16, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy, + CheckWeightlessCacheAccuracy, + ::testing::Combine(::testing::Bool(), + ::testing::ValuesIn(inference_modes), + ::testing::ValuesIn(model_dtypes)), CheckWeightlessCacheAccuracy::get_test_case_name); } // namespace diff --git a/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp b/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp index 7abdbcb8c2fc52..7b4f27b5af05b4 100644 --- a/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp @@ -23,11 +23,11 @@ using namespace ov; namespace shape_infer_tests { struct eltwise_test_params { - layout input1_layout; - layout input2_layout; + cldnn::layout input1_layout; + cldnn::layout input2_layout; eltwise_mode mode; AutoBroadcastSpec auto_broadcast_spec; - layout expected_layout; + cldnn::layout expected_layout; std::vector stride; };