From b4b8eae0012bd99d85ec4aa9c62d9683f9b9d8b0 Mon Sep 17 00:00:00 2001 From: Tony Xiang Date: Sat, 18 Jan 2025 14:42:55 +0100 Subject: [PATCH 01/18] start visitor Signed-off-by: Tony Xiang --- .../include/power_grid_model/auxiliary/dataset.hpp | 3 +++ .../auxiliary/serialization/deserializer.hpp | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/dataset.hpp b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/dataset.hpp index 07259ebf9..f8c228a2c 100644 --- a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/dataset.hpp +++ b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/dataset.hpp @@ -40,6 +40,9 @@ struct ComponentInfo { // for non-uniform component, this is -1, we use indptr to describe the elements per scenario Idx elements_per_scenario{}; Idx total_elements{}; + // does the dataset contain only a subset of attributes with meaningful value + bool has_attribute_indications{false}; + std::vector attribute_indications{}; }; struct DatasetInfo { diff --git a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp index 5e7025837..dcbd1d864 100644 --- a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp +++ b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp @@ -152,6 +152,14 @@ struct DefaultNullVisitor : msgpack::null_visitor { } }; +struct NullVisitorCheckMap : DefaultNullVisitor { + bool has_map{false}; + bool start_map(uint32_t /*num_kv_pairs*/) { + has_map = true; + return true; + } +}; + template struct DefaultErrorVisitor : DefaultNullVisitor { static constexpr std::string_view static_err_msg = "Unexpected data type!\n"; From 32b637b305abefe62a7f78c3cbe58f24494f022a Mon Sep 17 00:00:00 2001 From: Tony Xiang Date: Sat, 18 Jan 2025 14:47:14 +0100 Subject: [PATCH 02/18] parse has map in the first run Signed-off-by: Tony Xiang --- .../auxiliary/serialization/deserializer.hpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp index dcbd1d864..4020ab074 100644 --- a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp +++ b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp @@ -356,6 +356,7 @@ template <> struct ValueVisitor> : DefaultErrorVisitor using MapArrayVisitor = detail::MapArrayVisitor; using StringVisitor = detail::StringVisitor; using BoolVisitor = detail::BoolVisitor; @@ -371,6 +372,7 @@ class Deserializer { std::string_view component; Idx size; size_t offset; + bool has_map; }; using DataByteMeta = std::vector>; using AttributeByteMeta = std::vector>>; @@ -499,6 +501,12 @@ class Deserializer { msgpack::parse(data_, size_, offset_, visitor); } + bool parse_skip_check_map() { + NullVisitorCheckMap visitor{}; + msgpack::parse(data_, size_, offset_, visitor); + return visitor.has_map; + } + WritableDataset pre_parse() { try { return pre_parse_impl(); @@ -636,9 +644,9 @@ class Deserializer { while (n_components-- != 0) { component_key_ = parse_string(); Idx const component_size = parse_map_array().size; - count_per_scenario.push_back({component_key_, component_size, offset_}); - // skip all the real content - parse_skip(); + // skip all the real content but check if it has map + bool const has_map = parse_skip_check_map(); + count_per_scenario.push_back({component_key_, component_size, offset_, has_map}); } component_key_ = {}; return count_per_scenario; From fd8ff8c7c4a1c28162c48de5b2c09b02dcb948aa Mon Sep 17 00:00:00 2001 From: Tony Xiang Date: Sat, 18 Jan 2025 14:56:34 +0100 Subject: [PATCH 03/18] enable attribute indications Signed-off-by: Tony Xiang --- .../include/power_grid_model/auxiliary/dataset.hpp | 7 +++++++ .../auxiliary/serialization/deserializer.hpp | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/dataset.hpp b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/dataset.hpp index f8c228a2c..55a19219a 100644 --- a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/dataset.hpp +++ b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/dataset.hpp @@ -339,6 +339,13 @@ template class Dataset { add_component_info_impl(component, elements_per_scenario, total_elements); } + void enable_atrribute_indications(std::string_view component) + requires is_indptr_mutable_v + { + Idx const idx = find_component(component, true); + dataset_info_.component_info[idx].has_attribute_indications = true; + } + void add_buffer(std::string_view component, std::integral auto elements_per_scenario_, std::integral auto total_elements_, Indptr* indptr, Data* data) requires(!is_indptr_mutable_v) diff --git a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp index 4020ab074..2f9bdfe6e 100644 --- a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp +++ b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp @@ -695,6 +695,12 @@ class Deserializer { elements_per_scenario * batch_size; // multiply handler.add_component_info(component_key_, elements_per_scenario, total_elements); msg_data_offsets_.push_back(component_byte_meta); + // check if all scenarios does not have any map + // if yes, enable attribute indications + if (std::none_of(component_byte_meta.cbegin(), component_byte_meta.cend(), + [](auto const& x) { return x.has_map; })) { + handler.enable_atrribute_indications(component_key_); + } component_key_ = {}; } From d983c773cbe45f85faaf2ce8901574f604862430 Mon Sep 17 00:00:00 2001 From: Tony Xiang Date: Sat, 18 Jan 2025 15:02:23 +0100 Subject: [PATCH 04/18] set attribute indications to dataset Signed-off-by: Tony Xiang --- .../include/power_grid_model/auxiliary/dataset.hpp | 8 ++++++++ .../auxiliary/serialization/deserializer.hpp | 9 +++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/dataset.hpp b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/dataset.hpp index 55a19219a..f1c53ac4a 100644 --- a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/dataset.hpp +++ b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/dataset.hpp @@ -346,6 +346,14 @@ template class Dataset { dataset_info_.component_info[idx].has_attribute_indications = true; } + void set_attribute_indications(std::string_view component, std::span attribute_indications) + requires is_indptr_mutable_v + { + Idx const idx = find_component(component, true); + dataset_info_.component_info[idx].attribute_indications = {attribute_indications.begin(), + attribute_indications.end()}; + } + void add_buffer(std::string_view component, std::integral auto elements_per_scenario_, std::integral auto total_elements_, Indptr* indptr, Data* data) requires(!is_indptr_mutable_v) diff --git a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp index 2f9bdfe6e..712dd2a7a 100644 --- a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp +++ b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp @@ -579,7 +579,7 @@ class Deserializer { WritableDataset handler{is_batch_, batch_size, dataset, *meta_data_}; count_data(handler, data_counts); - parse_predefined_attributes(handler.dataset(), attributes); + parse_predefined_attributes(handler, attributes); return handler; } @@ -600,8 +600,9 @@ class Deserializer { return attributes; } - void parse_predefined_attributes(MetaDataset const& dataset, AttributeByteMeta const& attributes) { + void parse_predefined_attributes(WritableDataset& handler, AttributeByteMeta const& attributes) { root_key_ = "attributes"; + MetaDataset const& dataset = handler.dataset(); for (auto const& single_component : attributes) { component_key_ = single_component.first; MetaComponent const* const component = &dataset.get_component(component_key_); @@ -611,6 +612,10 @@ class Deserializer { attributes_per_component.push_back(&component->get_attribute(single_component.second[element_number_])); } attributes_[component] = std::move(attributes_per_component); + // set attribute intidation if enabled + if (handler.get_component_info(component_key_).has_attribute_indications) { + handler.set_attribute_indications(component_key_, attributes_[component]); + } element_number_ = -1; } component_key_ = {}; From cc6c42256bc40c4142437adfdc45fa5541a00202 Mon Sep 17 00:00:00 2001 From: Tony Xiang Date: Sat, 18 Jan 2025 15:05:12 +0100 Subject: [PATCH 05/18] optimize process Signed-off-by: Tony Xiang --- .../auxiliary/serialization/deserializer.hpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp index 712dd2a7a..f08cde6aa 100644 --- a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp +++ b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp @@ -699,11 +699,12 @@ class Deserializer { elements_per_scenario < 0 ? std::reduce(counter.cbegin(), counter.cend()) : // aggregation elements_per_scenario * batch_size; // multiply handler.add_component_info(component_key_, elements_per_scenario, total_elements); - msg_data_offsets_.push_back(component_byte_meta); // check if all scenarios does not have any map - // if yes, enable attribute indications - if (std::none_of(component_byte_meta.cbegin(), component_byte_meta.cend(), - [](auto const& x) { return x.has_map; })) { + bool const has_attribute_indications = std::none_of(component_byte_meta.cbegin(), component_byte_meta.cend(), + [](auto const& x) { return x.has_map; }); + msg_data_offsets_.push_back(std::move(component_byte_meta)); + // enable attribute indications if possible + if (has_attribute_indications) { handler.enable_atrribute_indications(component_key_); } component_key_ = {}; From af442327d5de4b1183e562a574ecb6f5f42c39ca Mon Sep 17 00:00:00 2001 From: Tony Xiang Date: Sat, 18 Jan 2025 15:19:37 +0100 Subject: [PATCH 06/18] skip whole scenario Signed-off-by: Tony Xiang --- .../auxiliary/serialization/deserializer.hpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp index f08cde6aa..d0871a8b1 100644 --- a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp +++ b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp @@ -775,6 +775,15 @@ class Deserializer { auto const reordered_attribute_buffers = detail::is_columnar_v ? detail::reordered_attribute_buffers(buffer, attributes) : std::vector>{}; + // for columnar buffer + // if there is no intersection between the attributes and the usered provied buffer + // skip the whole component for all scenarios and elements + if constexpr (std::same_as) { + if (info.has_attribute_indications && reordered_attribute_buffers.empty()) { + component_key_ = ""; + return; + } + } BufferView const buffer_view{ .buffer = &buffer, .idx = 0, .reordered_attribute_buffers = reordered_attribute_buffers}; From 926993bd09e5215563496218be3247efa993d3ed Mon Sep 17 00:00:00 2001 From: Tony Xiang Date: Sat, 18 Jan 2025 15:46:12 +0100 Subject: [PATCH 07/18] skip whole scenario Signed-off-by: Tony Xiang --- .../auxiliary/serialization/deserializer.hpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp index d0871a8b1..17676e91a 100644 --- a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp +++ b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp @@ -776,9 +776,10 @@ class Deserializer { ? detail::reordered_attribute_buffers(buffer, attributes) : std::vector>{}; // for columnar buffer - // if there is no intersection between the attributes and the usered provied buffer + // if there is no intersection between the pre-defined attributes and the user provied buffer + // and the whole component does not have map // skip the whole component for all scenarios and elements - if constexpr (std::same_as) { + if constexpr (std::same_as) { if (info.has_attribute_indications && reordered_attribute_buffers.empty()) { component_key_ = ""; return; @@ -817,6 +818,16 @@ class Deserializer { return; } + // for columnar buffer + // if there is no intersection between the pre-defined attributes and the usered provied buffer + // and this scenario does not have map + // skip the whole component scenario for all elements + if constexpr (std::same_as) { + if (buffer_view.reordered_attribute_buffers.empty() && !msg_data.has_map) { + return; + } + } + // set offset and skip array header offset_ = msg_data.offset; parse_map_array(); From 9db08f97d47ab2eb2315305faa6387b324e4b086 Mon Sep 17 00:00:00 2001 From: Tony Xiang Date: Sat, 18 Jan 2025 15:46:44 +0100 Subject: [PATCH 08/18] commments Signed-off-by: Tony Xiang --- .../power_grid_model/auxiliary/serialization/deserializer.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp index 17676e91a..e6c77af6d 100644 --- a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp +++ b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp @@ -778,7 +778,7 @@ class Deserializer { // for columnar buffer // if there is no intersection between the pre-defined attributes and the user provied buffer // and the whole component does not have map - // skip the whole component for all scenarios and elements + // skip the whole component for all scenarios and all elements if constexpr (std::same_as) { if (info.has_attribute_indications && reordered_attribute_buffers.empty()) { component_key_ = ""; @@ -821,7 +821,7 @@ class Deserializer { // for columnar buffer // if there is no intersection between the pre-defined attributes and the usered provied buffer // and this scenario does not have map - // skip the whole component scenario for all elements + // skip the whole scenario for this compoment for all elements if constexpr (std::same_as) { if (buffer_view.reordered_attribute_buffers.empty() && !msg_data.has_map) { return; From 4d8e19edbd898b7fd6d8b3bc1bf0d3ea01587675 Mon Sep 17 00:00:00 2001 From: Tony Xiang Date: Sat, 18 Jan 2025 15:54:48 +0100 Subject: [PATCH 09/18] C-API definition Signed-off-by: Tony Xiang --- .../include/power_grid_model_c/dataset.h | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/power_grid_model_c/power_grid_model_c/include/power_grid_model_c/dataset.h b/power_grid_model_c/power_grid_model_c/include/power_grid_model_c/dataset.h index 78478d887..f1ba835db 100644 --- a/power_grid_model_c/power_grid_model_c/include/power_grid_model_c/dataset.h +++ b/power_grid_model_c/power_grid_model_c/include/power_grid_model_c/dataset.h @@ -83,6 +83,46 @@ PGM_API PGM_Idx PGM_dataset_info_elements_per_scenario(PGM_Handle* handle, PGM_D */ PGM_API PGM_Idx PGM_dataset_info_total_elements(PGM_Handle* handle, PGM_DatasetInfo const* info, PGM_Idx component_idx); +/** + * @brief Return if a component has attribute indications. + * + * Attribute indications are used to indicate the presence of meaningful attributes for a certain component in the + * dataset. + * + * @param handle + * @param info A pointer to the info object. + * @param component_idx The index number of the component. + * @return 1 if the component has attribute indications, 0 if it does not. + */ +PGM_API PGM_Idx PGM_dataset_info_has_attribute_indications(PGM_Handle* handle, PGM_DatasetInfo const* info, + PGM_Idx component_idx); + +/** + * @brief Return the number of attribute indications for a component.s + * + * @param handle + * @param info A pointer to the info object. + * @param component_idx The index number of the component. + * @return The number of attribute indications for the component. + * It is UB if PGM_dataset_info_has_attribute_indications() returns zero. + */ +PGM_API PGM_Idx PGM_dataset_info_n_attribute_indications(PGM_Handle* handle, PGM_DatasetInfo const* info, + PGM_Idx component_idx); + +/** + * @brief Return the name of the i-th attribute indication for a component. + * + * @param handle + * @param info A pointer to the info object. + * @param component_idx The index number of the component. + * @param attribute_idx The index number of attribute indication. + * @return A pointer to the null-terminated string of the attribute indication. + * The pointer has the same lifetime as the input info pointer. + * It is UB if PGM_dataset_info_has_attribute_indications() returns zero, or if attribute_idx is out of bounds. + */ +PGM_API char const* PGM_dataset_info_n_attribute_indications(PGM_Handle* handle, PGM_DatasetInfo const* info, + PGM_Idx component_idx, PGM_Idx attribute_idx); + /** * @brief Create an instance of PGM_ConstDataset. * @param handle From d12b5f54d831d0bb90bd27f529ad97a8355be9b1 Mon Sep 17 00:00:00 2001 From: Tony Xiang Date: Sat, 18 Jan 2025 16:00:30 +0100 Subject: [PATCH 10/18] c api implementation Signed-off-by: Tony Xiang --- .../include/power_grid_model_c/dataset.h | 2 +- .../power_grid_model_c/src/dataset.cpp | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/power_grid_model_c/power_grid_model_c/include/power_grid_model_c/dataset.h b/power_grid_model_c/power_grid_model_c/include/power_grid_model_c/dataset.h index f1ba835db..2b53be27a 100644 --- a/power_grid_model_c/power_grid_model_c/include/power_grid_model_c/dataset.h +++ b/power_grid_model_c/power_grid_model_c/include/power_grid_model_c/dataset.h @@ -120,7 +120,7 @@ PGM_API PGM_Idx PGM_dataset_info_n_attribute_indications(PGM_Handle* handle, PGM * The pointer has the same lifetime as the input info pointer. * It is UB if PGM_dataset_info_has_attribute_indications() returns zero, or if attribute_idx is out of bounds. */ -PGM_API char const* PGM_dataset_info_n_attribute_indications(PGM_Handle* handle, PGM_DatasetInfo const* info, +PGM_API char const* PGM_dataset_info_name_attribute_indications(PGM_Handle* handle, PGM_DatasetInfo const* info, PGM_Idx component_idx, PGM_Idx attribute_idx); /** diff --git a/power_grid_model_c/power_grid_model_c/src/dataset.cpp b/power_grid_model_c/power_grid_model_c/src/dataset.cpp index 3b19492b7..e395e2688 100644 --- a/power_grid_model_c/power_grid_model_c/src/dataset.cpp +++ b/power_grid_model_c/power_grid_model_c/src/dataset.cpp @@ -43,6 +43,21 @@ PGM_Idx PGM_dataset_info_total_elements(PGM_Handle* /*unused*/, PGM_DatasetInfo return info->component_info[component_idx].total_elements; } +PGM_Idx PGM_dataset_info_has_attribute_indications(PGM_Handle* /* handle */, PGM_DatasetInfo const* info, + PGM_Idx component_idx) { + return static_cast(info->component_info[component_idx].has_attribute_indications); +} + +PGM_Idx PGM_dataset_info_n_attribute_indications(PGM_Handle* /* handle */, PGM_DatasetInfo const* info, + PGM_Idx component_idx) { + return static_cast(info->component_info[component_idx].attribute_indications.size()); +} + +char const* PGM_dataset_info_name_attribute_indications(PGM_Handle* /* handle */, PGM_DatasetInfo const* info, + PGM_Idx component_idx, PGM_Idx attribute_idx) { + return info->component_info[component_idx].attribute_indications[attribute_idx]->name; +} + // const dataset PGM_ConstDataset* PGM_create_dataset_const(PGM_Handle* handle, char const* dataset, PGM_Idx is_batch, From d0887bc6574d0fa20b904c8d5af22eac1a4e4a60 Mon Sep 17 00:00:00 2001 From: Tony Xiang Date: Wed, 22 Jan 2025 13:59:55 +0100 Subject: [PATCH 11/18] fix bug on offset Signed-off-by: Tony Xiang --- .../power_grid_model/auxiliary/serialization/deserializer.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp index e6c77af6d..01a5ee7fa 100644 --- a/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp +++ b/power_grid_model_c/power_grid_model/include/power_grid_model/auxiliary/serialization/deserializer.hpp @@ -649,9 +649,10 @@ class Deserializer { while (n_components-- != 0) { component_key_ = parse_string(); Idx const component_size = parse_map_array().size; + size_t const scenario_offset = offset_; // skip all the real content but check if it has map bool const has_map = parse_skip_check_map(); - count_per_scenario.push_back({component_key_, component_size, offset_, has_map}); + count_per_scenario.push_back({component_key_, component_size, scenario_offset, has_map}); } component_key_ = {}; return count_per_scenario; From 0060c9a68f1bc70c1cb4c5fc000aebfb1c4aa883 Mon Sep 17 00:00:00 2001 From: Tony Xiang Date: Wed, 22 Jan 2025 15:02:12 +0100 Subject: [PATCH 12/18] fix a bug on relevant filter Signed-off-by: Tony Xiang --- src/power_grid_model/_core/power_grid_dataset.py | 16 +++++++++------- tests/unit/test_serialization.py | 7 ++++--- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/power_grid_model/_core/power_grid_dataset.py b/src/power_grid_model/_core/power_grid_dataset.py index 92285cb8d..ad0724b9e 100644 --- a/src/power_grid_model/_core/power_grid_dataset.py +++ b/src/power_grid_model/_core/power_grid_dataset.py @@ -26,7 +26,7 @@ power_grid_core as pgc, ) from power_grid_model._core.power_grid_meta import ComponentMetaData, DatasetMetaData, power_grid_meta_data -from power_grid_model._utils import get_dataset_type, is_columnar, is_nan_or_equivalent, process_data_filter +from power_grid_model._utils import get_dataset_type, is_columnar, is_nan_or_equivalent, is_sparse, process_data_filter from power_grid_model.data_types import AttributeType, ComponentData, Dataset from power_grid_model.enum import ComponentAttributeFilterOptions from power_grid_model.typing import ComponentAttributeMapping, _ComponentAttributeMappingDict @@ -455,22 +455,24 @@ def _get_buffer_properties(self, info: CDatasetInfo) -> Mapping[ComponentType, B if component in self._data_filter } - def _filter_attributes(self, attributes): + def _filter_attributes(self, buffer): + if is_sparse(buffer): + attributes = buffer["data"] + else: + attributes = buffer keys_to_remove = [] for attr, array in attributes.items(): - if is_columnar(array): - continue if is_nan_or_equivalent(array): keys_to_remove.append(attr) for key in keys_to_remove: del attributes[key] def _filter_with_mapping(self): - for component_type, attributes in self._data.items(): + for component_type, component_buffer in self._data.items(): if component_type in self._data_filter: filter_option = self._data_filter[component_type] - if filter_option is ComponentAttributeFilterOptions.relevant: - self._filter_attributes(attributes) + if filter_option is ComponentAttributeFilterOptions.relevant and is_columnar(component_buffer): + self._filter_attributes(component_buffer) def _post_filtering(self): if isinstance(self._data_filter, dict): diff --git a/tests/unit/test_serialization.py b/tests/unit/test_serialization.py index c805c3881..bf7ae7867 100644 --- a/tests/unit/test_serialization.py +++ b/tests/unit/test_serialization.py @@ -618,9 +618,9 @@ def assert_serialization_correct(deserialized_dataset: Dataset, serialized_datas def _check_only_relevant_attributes_present(component_values) -> bool: + if isinstance(component_values, np.ndarray): + return True for array in component_values.values(): - if not isinstance(array, np.ndarray): - continue if (array.dtype == np.float64 and np.isnan(array).all()) or ( array.dtype in (np.int32, np.int8) and np.all(array == np.iinfo(array.dtype).min) ): @@ -633,7 +633,8 @@ def assert_deserialization_filtering_correct(deserialized_dataset: Dataset, data return True if data_filter is ComponentAttributeFilterOptions.relevant: for component_values in deserialized_dataset.values(): - if not _check_only_relevant_attributes_present(component_values): + buffer = component_values if not is_sparse(component_values) else component_values["data"] + if not _check_only_relevant_attributes_present(buffer): return False return True From 159eae8d863db205e2c840e87d0dec6a459ae4c1 Mon Sep 17 00:00:00 2001 From: Tony Xiang <19280867+TonyXiang8787@users.noreply.github.com> Date: Thu, 13 Feb 2025 13:22:22 +0100 Subject: [PATCH 13/18] change name Signed-off-by: Tony Xiang <19280867+TonyXiang8787@users.noreply.github.com> --- .../power_grid_model_c/include/power_grid_model_c/dataset.h | 4 ++-- power_grid_model_c/power_grid_model_c/src/dataset.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/power_grid_model_c/power_grid_model_c/include/power_grid_model_c/dataset.h b/power_grid_model_c/power_grid_model_c/include/power_grid_model_c/dataset.h index 2b53be27a..84be3827c 100644 --- a/power_grid_model_c/power_grid_model_c/include/power_grid_model_c/dataset.h +++ b/power_grid_model_c/power_grid_model_c/include/power_grid_model_c/dataset.h @@ -120,8 +120,8 @@ PGM_API PGM_Idx PGM_dataset_info_n_attribute_indications(PGM_Handle* handle, PGM * The pointer has the same lifetime as the input info pointer. * It is UB if PGM_dataset_info_has_attribute_indications() returns zero, or if attribute_idx is out of bounds. */ -PGM_API char const* PGM_dataset_info_name_attribute_indications(PGM_Handle* handle, PGM_DatasetInfo const* info, - PGM_Idx component_idx, PGM_Idx attribute_idx); +PGM_API char const* PGM_dataset_info_attribute_name(PGM_Handle* handle, PGM_DatasetInfo const* info, + PGM_Idx component_idx, PGM_Idx attribute_idx); /** * @brief Create an instance of PGM_ConstDataset. diff --git a/power_grid_model_c/power_grid_model_c/src/dataset.cpp b/power_grid_model_c/power_grid_model_c/src/dataset.cpp index e395e2688..0b9e3bee6 100644 --- a/power_grid_model_c/power_grid_model_c/src/dataset.cpp +++ b/power_grid_model_c/power_grid_model_c/src/dataset.cpp @@ -53,8 +53,8 @@ PGM_Idx PGM_dataset_info_n_attribute_indications(PGM_Handle* /* handle */, PGM_D return static_cast(info->component_info[component_idx].attribute_indications.size()); } -char const* PGM_dataset_info_name_attribute_indications(PGM_Handle* /* handle */, PGM_DatasetInfo const* info, - PGM_Idx component_idx, PGM_Idx attribute_idx) { +char const* PGM_dataset_info_attribute_name(PGM_Handle* /* handle */, PGM_DatasetInfo const* info, + PGM_Idx component_idx, PGM_Idx attribute_idx) { return info->component_info[component_idx].attribute_indications[attribute_idx]->name; } From 2cdd4073431f417ee10fd046eff256be781002ba Mon Sep 17 00:00:00 2001 From: Tony Xiang <19280867+TonyXiang8787@users.noreply.github.com> Date: Thu, 13 Feb 2025 13:28:37 +0100 Subject: [PATCH 14/18] set c binding Signed-off-by: Tony Xiang <19280867+TonyXiang8787@users.noreply.github.com> --- src/power_grid_model/_core/power_grid_core.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/power_grid_model/_core/power_grid_core.py b/src/power_grid_model/_core/power_grid_core.py index 5495e749b..1b4da9b8b 100644 --- a/src/power_grid_model/_core/power_grid_core.py +++ b/src/power_grid_model/_core/power_grid_core.py @@ -430,6 +430,18 @@ def dataset_info_elements_per_scenario( # type: ignore[empty-body] def dataset_info_total_elements(self, info: DatasetInfoPtr, component_idx: int) -> int: # type: ignore[empty-body] pass # pragma: no cover + @make_c_binding + def dataset_info_has_attribute_indications(self, info: DatasetInfoPtr, component_idx: int) -> int: # type: ignore[empty-body] + pass # pragma: no cover + + @make_c_binding + def dataset_info_n_attribute_indications(self, info: DatasetInfoPtr, component_idx: int) -> int: # type: ignore[empty-body] + pass # pragma: no cover + + @make_c_binding + def dataset_info_attribute_name(self, info: DatasetInfoPtr, component_idx: int, attribute_idx: int) -> str: # type: ignore[empty-body] + pass # pragma: no cover + @make_c_binding def create_dataset_mutable( # type: ignore[empty-body] self, dataset: str, is_batch: int, batch_size: int From 34614ca1a5e4ba9c13598d50b4214a9b9e5a4a3a Mon Sep 17 00:00:00 2001 From: Tony Xiang <19280867+TonyXiang8787@users.noreply.github.com> Date: Thu, 13 Feb 2025 13:30:33 +0100 Subject: [PATCH 15/18] format Signed-off-by: Tony Xiang <19280867+TonyXiang8787@users.noreply.github.com> --- src/power_grid_model/_core/power_grid_core.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/power_grid_model/_core/power_grid_core.py b/src/power_grid_model/_core/power_grid_core.py index 1b4da9b8b..17169eea3 100644 --- a/src/power_grid_model/_core/power_grid_core.py +++ b/src/power_grid_model/_core/power_grid_core.py @@ -431,15 +431,21 @@ def dataset_info_total_elements(self, info: DatasetInfoPtr, component_idx: int) pass # pragma: no cover @make_c_binding - def dataset_info_has_attribute_indications(self, info: DatasetInfoPtr, component_idx: int) -> int: # type: ignore[empty-body] + def dataset_info_has_attribute_indications( # type: ignore[empty-body] + self, info: DatasetInfoPtr, component_idx: int + ) -> int: pass # pragma: no cover @make_c_binding - def dataset_info_n_attribute_indications(self, info: DatasetInfoPtr, component_idx: int) -> int: # type: ignore[empty-body] + def dataset_info_n_attribute_indications( # type: ignore[empty-body] + self, info: DatasetInfoPtr, component_idx: int + ) -> int: pass # pragma: no cover @make_c_binding - def dataset_info_attribute_name(self, info: DatasetInfoPtr, component_idx: int, attribute_idx: int) -> str: # type: ignore[empty-body] + def dataset_info_attribute_name( # type: ignore[empty-body] + self, info: DatasetInfoPtr, component_idx: int, attribute_idx: int + ) -> str: pass # pragma: no cover @make_c_binding From 81fa0a3f5c41134b7776a2b1e4e37a9ca8a925bc Mon Sep 17 00:00:00 2001 From: Tony Xiang <19280867+TonyXiang8787@users.noreply.github.com> Date: Thu, 13 Feb 2025 13:37:08 +0100 Subject: [PATCH 16/18] get indications Signed-off-by: Tony Xiang <19280867+TonyXiang8787@users.noreply.github.com> --- .../_core/power_grid_dataset.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/power_grid_model/_core/power_grid_dataset.py b/src/power_grid_model/_core/power_grid_dataset.py index 28a5549f9..1d6d32b15 100644 --- a/src/power_grid_model/_core/power_grid_dataset.py +++ b/src/power_grid_model/_core/power_grid_dataset.py @@ -132,6 +132,28 @@ def total_elements(self) -> Mapping[ComponentType, int]: for idx, component_name in enumerate(self.components()) } + def attribute_indications(self) -> Mapping[ComponentType, None | list[str]]: + """ + The attribute indications in the dataset. + + Returns: + A map of component to its attribute indications. + None means no attribute indications + """ + result_dict = {} + components = self.components() + for component_idx, component_name in enumerate(components): + has_indications = pgc.dataset_info_has_attribute_indications(self._info, component_idx) + if has_indications == 0: + result_dict[component_name] = None + else: + n_indications = pgc.dataset_info_n_attribute_indications(self._info, component_idx) + result_dict[component_name] = [ + pgc.dataset_info_attribute_name(self._info, component_idx, attribute_idx) + for attribute_idx in range(n_indications) + ] + return result_dict + class CMutableDataset: """ From 474f468c0d403c75aa786d5bec564359039c0903 Mon Sep 17 00:00:00 2001 From: Tony Xiang <19280867+TonyXiang8787@users.noreply.github.com> Date: Thu, 13 Feb 2025 13:59:29 +0100 Subject: [PATCH 17/18] python api passes all existing tests Signed-off-by: Tony Xiang <19280867+TonyXiang8787@users.noreply.github.com> --- src/power_grid_model/_core/power_grid_dataset.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/power_grid_model/_core/power_grid_dataset.py b/src/power_grid_model/_core/power_grid_dataset.py index 1d6d32b15..993de8c1b 100644 --- a/src/power_grid_model/_core/power_grid_dataset.py +++ b/src/power_grid_model/_core/power_grid_dataset.py @@ -132,7 +132,7 @@ def total_elements(self) -> Mapping[ComponentType, int]: for idx, component_name in enumerate(self.components()) } - def attribute_indications(self) -> Mapping[ComponentType, None | list[str]]: + def attribute_indications(self) -> Mapping[ComponentType, None | list[AttributeType]]: """ The attribute indications in the dataset. @@ -468,6 +468,7 @@ def _get_buffer_properties(self, info: CDatasetInfo) -> Mapping[ComponentType, B components = info.components() n_elements_per_scenario = info.elements_per_scenario() n_total_elements = info.total_elements() + attribute_indications = info.attribute_indications() return { component: BufferProperties( @@ -479,6 +480,7 @@ def _get_buffer_properties(self, info: CDatasetInfo) -> Mapping[ComponentType, B columns=_get_filtered_attributes( schema=self._schema[component], component_data_filter=self._data_filter[component], + attribute_indication=attribute_indications[component], ), ) for component in components @@ -513,11 +515,14 @@ def _post_filtering(self): def _get_filtered_attributes( schema: ComponentMetaData, component_data_filter: set[str] | list[str] | None | ComponentAttributeFilterOptions, -) -> list[str] | None: + attribute_indication: None | list[AttributeType], +) -> list[AttributeType] | None: if component_data_filter is None: return None if isinstance(component_data_filter, ComponentAttributeFilterOptions): + if component_data_filter == ComponentAttributeFilterOptions.relevant and attribute_indication is not None: + return attribute_indication return [] if schema.dtype.names is None else list(schema.dtype.names) return list(component_data_filter) From 96d7c79c22010815e64896616000ccbb6cd3244c Mon Sep 17 00:00:00 2001 From: Tony Xiang <19280867+TonyXiang8787@users.noreply.github.com> Date: Thu, 13 Feb 2025 14:57:34 +0100 Subject: [PATCH 18/18] fix lint Signed-off-by: Tony Xiang <19280867+TonyXiang8787@users.noreply.github.com> --- src/power_grid_model/_core/power_grid_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/power_grid_model/_core/power_grid_dataset.py b/src/power_grid_model/_core/power_grid_dataset.py index 993de8c1b..7b0ce85f5 100644 --- a/src/power_grid_model/_core/power_grid_dataset.py +++ b/src/power_grid_model/_core/power_grid_dataset.py @@ -140,7 +140,7 @@ def attribute_indications(self) -> Mapping[ComponentType, None | list[AttributeT A map of component to its attribute indications. None means no attribute indications """ - result_dict = {} + result_dict: dict[ComponentType, None | list[AttributeType]] = {} components = self.components() for component_idx, component_name in enumerate(components): has_indications = pgc.dataset_info_has_attribute_indications(self._info, component_idx)