From a08bd0c914dc6d49afaa5fdd6654a61131902f33 Mon Sep 17 00:00:00 2001 From: Hossein Moein Date: Fri, 24 Nov 2023 09:42:01 -0500 Subject: [PATCH] Using C++23 to reimplement sort by using zip --- CMakeLists.txt | 2 +- include/DataFrame/DataFrameTypes.h | 4 +- include/DataFrame/Internals/DataFrame.tcc | 531 ++++++++++-------- .../DataFrame/Internals/DataFrame_functors.h | 3 +- .../DataFrame/Internals/DataFrame_misc.tcc | 3 +- .../Internals/DataFrame_standalone.tcc | 31 +- src/Makefile.Linux.GCC64 | 2 +- src/Makefile.Linux.GCC64D | 4 +- 8 files changed, 320 insertions(+), 260 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bfea8b352..1737fb05c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,7 @@ add_library(DataFrame::DataFrame ALIAS DataFrame) target_sources(DataFrame PRIVATE src/Utils/DateTime.cc) -target_compile_features(DataFrame PUBLIC cxx_std_20) +target_compile_features(DataFrame PUBLIC cxx_std_23) target_compile_definitions( DataFrame PRIVATE $<$:HMDF_HAVE_CLOCK_GETTIME> diff --git a/include/DataFrame/DataFrameTypes.h b/include/DataFrame/DataFrameTypes.h index 7e2977ff2..f4fe86f73 100644 --- a/include/DataFrame/DataFrameTypes.h +++ b/include/DataFrame/DataFrameTypes.h @@ -579,10 +579,10 @@ struct RandGenParams { std::size_t t_dist { 1 }; // The μ distribution parameter (the mean of the distribution) // - double mean { 1.0 }; + double mean { 0 }; // the σ distribution parameter (standard deviation) // - double std { 0 }; + double std { 1 }; // The λ distribution parameter (the rate parameter) // double lambda { 1.0 }; diff --git a/include/DataFrame/Internals/DataFrame.tcc b/include/DataFrame/Internals/DataFrame.tcc index f88f398e6..a427a9d53 100644 --- a/include/DataFrame/Internals/DataFrame.tcc +++ b/include/DataFrame/Internals/DataFrame.tcc @@ -34,6 +34,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include // ---------------------------------------------------------------------------- @@ -124,7 +125,7 @@ void DataFrame:: sort_common_(DataFrame &df, CF &&comp_func, bool ignore_index) { const size_type idx_s = df.indices_.size(); - StlVecType sorting_idxs(idx_s, 0); + StlVecType sorting_idxs(idx_s); std::iota(sorting_idxs.begin(), sorting_idxs.end(), 0); std::sort(sorting_idxs.begin(), sorting_idxs.end(), comp_func); @@ -691,81 +692,67 @@ sort(const char *name, sort_spec dir, bool ignore_index) { make_consistent(); - const SpinGuard guard (lock_); + ColumnVecType *vec { nullptr}; + const SpinGuard guard (lock_); if (! ::strcmp(name, DF_INDEX_COL_NAME)) { - const auto &idx_vec = get_index(); - auto a = - [&idx_vec](size_type i, size_type j) -> bool { - return (idx_vec[i] < idx_vec[j]); - }; - auto d = - [&idx_vec](size_type i, size_type j) -> bool { - return (idx_vec[i] > idx_vec[j]); - }; - auto aa = - [&idx_vec](size_type i, size_type j) -> bool { - return (abs__(idx_vec[i]) < abs__(idx_vec[j])); - }; - auto ad = - [&idx_vec](size_type i, size_type j) -> bool { - return (abs__(idx_vec[i]) > abs__(idx_vec[j])); - }; - - if (dir == sort_spec::ascen) - sort_common_(*this, - std::move(a), - ignore_index); - else if (dir == sort_spec::desce) - sort_common_(*this, - std::move(d), - ignore_index); - else if (dir == sort_spec::abs_ascen) - sort_common_(*this, - std::move(aa), - ignore_index); - else if (dir == sort_spec::abs_desce) - sort_common_(*this, - std::move(ad), - ignore_index); + vec = reinterpret_cast *>(&indices_); + ignore_index = true; } - else { - const auto &col_vec = get_column(name); - auto a = - [&col_vec](size_type i, size_type j) -> bool { - return (col_vec[i] < col_vec[j]); - }; - auto d = - [&col_vec](size_type i, size_type j) -> bool { - return (col_vec[i] > col_vec[j]); - }; - auto aa = - [&col_vec](size_type i, size_type j) -> bool { - return (abs__(col_vec[i]) < abs__(col_vec[j])); - }; - auto ad = - [&col_vec](size_type i, size_type j) -> bool { - return (abs__(col_vec[i]) > abs__(col_vec[j])); - }; - - if (dir == sort_spec::ascen) [[likely]] - sort_common_(*this, - std::move(a), - ignore_index); - else if (dir == sort_spec::desce) - sort_common_(*this, - std::move(d), - ignore_index); - else if (dir == sort_spec::abs_ascen) - sort_common_(*this, - std::move(aa), - ignore_index); - else if (dir == sort_spec::abs_desce) - sort_common_(*this, - std::move(ad), - ignore_index); + else + vec = &(get_column(name, false)); + + auto a = [](const auto &lhs, const auto &rhs) -> bool { + return (std::get<0>(lhs) < std::get<0>(rhs)); + }; + auto d = [](const auto &lhs, const auto &rhs) -> bool { + return (std::get<0>(lhs) > std::get<0>(rhs)); + }; + auto aa = [](const auto &lhs, const auto &rhs) -> bool { + return (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))); + }; + auto ad = [](const auto &lhs, const auto &rhs) -> bool { + return (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))); + }; + + const size_type idx_s = indices_.size(); + StlVecType sorting_idxs(idx_s); + + std::iota(sorting_idxs.begin(), sorting_idxs.end(), 0); + + auto zip = std::ranges::views::zip(*vec, sorting_idxs); + auto zip_idx = std::ranges::views::zip(*vec, indices_, sorting_idxs); + + if (dir == sort_spec::ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, a); + else + std::ranges::sort(zip, a); + } + else if (dir == sort_spec::desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, d); + else + std::ranges::sort(zip, d); + } + else if (dir == sort_spec::abs_ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, aa); + else + std::ranges::sort(zip, aa); } + else if (dir == sort_spec::abs_desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, ad); + else + std::ranges::sort(zip, ad); + } + + sort_functor_ functor (sorting_idxs, idx_s); + for (const auto &citer : column_list_) [[likely]] + if (citer.first != name) + data_[citer.second].change(functor); return; } @@ -774,219 +761,270 @@ sort(const char *name, sort_spec dir, bool ignore_index) { template template void DataFrame:: -sort(const char *name1, sort_spec dir1, const char *name2, sort_spec dir2, +sort(const char *name1, sort_spec dir1, + const char *name2, sort_spec dir2, bool ignore_index) { make_consistent(); - const ColumnVecType *vec1 { nullptr}; - const ColumnVecType *vec2 { nullptr}; - const SpinGuard guard (lock_); + ColumnVecType *vec1 { nullptr}; + ColumnVecType *vec2 { nullptr}; + const SpinGuard guard (lock_); - if (! ::strcmp(name1, DF_INDEX_COL_NAME)) + if (! ::strcmp(name1, DF_INDEX_COL_NAME)) { vec1 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec1 = &(get_column(name1, false)); - if (! ::strcmp(name2, DF_INDEX_COL_NAME)) + if (! ::strcmp(name2, DF_INDEX_COL_NAME)) { vec2 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec2 = &(get_column(name2, false)); auto a_a = - [vec1, vec2](size_type i, size_type j) -> bool { - if (vec1->at(i) < vec1->at(j)) + [](const auto &lhs, const auto &rhs) -> bool { + if (std::get<0>(lhs) < std::get<0>(rhs)) return (true); - else if (vec1->at(i) > vec1->at(j)) + else if (std::get<0>(lhs) > std::get<0>(rhs)) return (false); - return (vec2->at(i) < vec2->at(j)); + return (std::get<1>(lhs) < std::get<1>(rhs)); }; auto d_d = - [vec1, vec2](size_type i, size_type j) -> bool { - if (vec1->at(i) > vec1->at(j)) + [](const auto &lhs, const auto &rhs) -> bool { + if (std::get<0>(lhs) > std::get<0>(rhs)) return (true); - else if (vec1->at(i) < vec1->at(j)) + else if (std::get<0>(lhs) < std::get<0>(rhs)) return (false); - return (vec2->at(i) > vec2->at(j)); + return (std::get<1>(lhs) > std::get<1>(rhs)); }; auto a_d = - [vec1, vec2](size_type i, size_type j) -> bool { - if (vec1->at(i) < vec1->at(j)) + [](const auto &lhs, const auto &rhs) -> bool { + if (std::get<0>(lhs) < std::get<0>(rhs)) return (true); - else if (vec1->at(i) > vec1->at(j)) + else if (std::get<0>(lhs) > std::get<0>(rhs)) return (false); - return (vec2->at(i) > vec2->at(j)); + return (std::get<1>(lhs) > std::get<1>(rhs)); }; auto d_a = - [vec1, vec2](size_type i, size_type j) -> bool { - if (vec1->at(i) > vec1->at(j)) + [](const auto &lhs, const auto &rhs) -> bool { + if (std::get<0>(lhs) > std::get<0>(rhs)) return (true); - else if (vec1->at(i) < vec1->at(j)) + else if (std::get<0>(lhs) < std::get<0>(rhs)) return (false); - return (vec2->at(i) < vec2->at(j)); + return (std::get<1>(lhs) < std::get<1>(rhs)); }; auto aa_aa = - [vec1, vec2](size_type i, size_type j) -> bool { - if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + [](const auto &lhs, const auto &rhs) -> bool { + if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (false); - return (abs__(vec2->at(i)) < abs__(vec2->at(j))); + return (abs__(std::get<1>(lhs)) < abs__(std::get<1>(rhs))); }; auto ad_ad = - [vec1, vec2](size_type i, size_type j) -> bool { - if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + [](const auto &lhs, const auto &rhs) -> bool { + if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (false); - return (abs__(vec2->at(i)) > abs__(vec2->at(j))); + return (abs__(std::get<1>(lhs)) > abs__(std::get<1>(rhs))); }; auto aa_ad = - [vec1, vec2](size_type i, size_type j) -> bool { - if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + [](const auto &lhs, const auto &rhs) -> bool { + if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (false); - return (abs__(vec2->at(i)) > abs__(vec2->at(j))); + return (abs__(std::get<1>(lhs)) > abs__(std::get<1>(rhs))); }; auto ad_aa = - [vec1, vec2](size_type i, size_type j) -> bool { - if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + [](const auto &lhs, const auto &rhs) -> bool { + if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (false); - return (abs__(vec2->at(i)) < abs__(vec2->at(j))); + return (abs__(std::get<1>(lhs)) < abs__(std::get<1>(rhs))); }; auto a_aa = - [vec1, vec2](size_type i, size_type j) -> bool { - if (vec1->at(i) < vec1->at(j)) + [](const auto &lhs, const auto &rhs) -> bool { + if (std::get<0>(lhs) < std::get<0>(rhs)) return (true); - else if (vec1->at(i) > vec1->at(j)) + else if (std::get<0>(lhs) > std::get<0>(rhs)) return (false); - return (abs__(vec2->at(i)) < abs__(vec2->at(j))); + return (abs__(std::get<1>(lhs)) < abs__(std::get<1>(rhs))); }; auto a_ad = - [vec1, vec2](size_type i, size_type j) -> bool { - if (vec1->at(i) < vec1->at(j)) + [](const auto &lhs, const auto &rhs) -> bool { + if (std::get<0>(lhs) < std::get<0>(rhs)) return (true); - else if (vec1->at(i) > vec1->at(j)) + else if (std::get<0>(lhs) > std::get<0>(rhs)) return (false); - return (abs__(vec2->at(i)) > abs__(vec2->at(j))); + return (abs__(std::get<1>(lhs)) > abs__(std::get<1>(rhs))); }; auto d_aa = - [vec1, vec2](size_type i, size_type j) -> bool { - if (vec1->at(i) > vec1->at(j)) + [](const auto &lhs, const auto &rhs) -> bool { + if (std::get<0>(lhs) > std::get<0>(rhs)) return (true); - else if (vec1->at(i) < vec1->at(j)) + else if (std::get<0>(lhs) < std::get<0>(rhs)) return (false); - return (abs__(vec2->at(i)) < abs__(vec2->at(j))); + return (abs__(std::get<1>(lhs)) < abs__(std::get<1>(rhs))); }; auto d_ad = - [vec1, vec2](size_type i, size_type j) -> bool { - if (vec1->at(i) > vec1->at(j)) + [](const auto &lhs, const auto &rhs) -> bool { + if (std::get<0>(lhs) > std::get<0>(rhs)) return (true); - else if (vec1->at(i) < vec1->at(j)) + else if (std::get<0>(lhs) < std::get<0>(rhs)) return (false); - return (abs__(vec2->at(i)) > abs__(vec2->at(j))); + return (abs__(std::get<1>(lhs)) > abs__(std::get<1>(rhs))); }; auto aa_a = - [vec1, vec2](size_type i, size_type j) -> bool { - if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + [](const auto &lhs, const auto &rhs) -> bool { + if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (false); - return (vec2->at(i) < vec2->at(j)); + return (std::get<1>(lhs) < std::get<1>(rhs)); }; auto ad_a = - [vec1, vec2](size_type i, size_type j) -> bool { - if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + [](const auto &lhs, const auto &rhs) -> bool { + if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (false); - return (vec2->at(i) < vec2->at(j)); + return (std::get<1>(lhs) < std::get<1>(rhs)); }; auto aa_d = - [vec1, vec2](size_type i, size_type j) -> bool { - if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + [](const auto &lhs, const auto &rhs) -> bool { + if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (false); - return (vec2->at(i) > vec2->at(j)); + return (std::get<1>(lhs) > std::get<1>(rhs)); }; auto ad_d = - [vec1, vec2](size_type i, size_type j) -> bool { - if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + [](const auto &lhs, const auto &rhs) -> bool { + if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (false); - return (vec2->at(i) > vec2->at(j)); + return (std::get<1>(lhs) > std::get<1>(rhs)); }; + const size_type idx_s = indices_.size(); + StlVecType sorting_idxs(idx_s); + + std::iota(sorting_idxs.begin(), sorting_idxs.end(), 0); + + auto zip = std::ranges::views::zip(*vec1, *vec2, sorting_idxs); + auto zip_idx = + std::ranges::views::zip(*vec1, *vec2, indices_, sorting_idxs); + + if (dir1 == sort_spec::ascen && dir2 == sort_spec::ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, a_a); + else + std::ranges::sort(zip, a_a); + } + else if (dir1 == sort_spec::desce && dir2 == sort_spec::desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, d_d); + else + std::ranges::sort(zip, d_d); + } + else if (dir1 == sort_spec::ascen && dir2 == sort_spec::desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, a_d); + else + std::ranges::sort(zip, a_d); + } + else if (dir1 == sort_spec::desce && dir2 == sort_spec::ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, d_a); + else + std::ranges::sort(zip, d_a); + } + else if (dir1 == sort_spec::abs_ascen && dir2 == sort_spec::abs_ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, aa_aa); + else + std::ranges::sort(zip, aa_aa); + } + else if (dir1 == sort_spec::abs_desce && dir2 == sort_spec::abs_desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, ad_ad); + else + std::ranges::sort(zip, ad_ad); + } + else if (dir1 == sort_spec::abs_ascen && dir2 == sort_spec::abs_desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, aa_ad); + else + std::ranges::sort(zip, aa_ad); + } + else if (dir1 == sort_spec::abs_desce && dir2 == sort_spec::abs_ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, ad_aa); + else + std::ranges::sort(zip, ad_aa); + } + else if (dir1 == sort_spec::ascen && dir2 == sort_spec::abs_ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, a_aa); + else + std::ranges::sort(zip, a_aa); + } + else if (dir1 == sort_spec::ascen && dir2 == sort_spec::abs_desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, a_ad); + else + std::ranges::sort(zip, a_ad); + } + else if (dir1 == sort_spec::desce && dir2 == sort_spec::abs_ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, d_aa); + else + std::ranges::sort(zip, d_aa); + } + else if (dir1 == sort_spec::desce && dir2 == sort_spec::abs_desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, d_ad); + else + std::ranges::sort(zip, d_ad); + } + else if (dir1 == sort_spec::abs_ascen && dir2 == sort_spec::ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, aa_a); + else + std::ranges::sort(zip, aa_a); + } + else if (dir1 == sort_spec::abs_desce && dir2 == sort_spec::ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, ad_a); + else + std::ranges::sort(zip, ad_a); + } + else if (dir1 == sort_spec::abs_ascen && dir2 == sort_spec::desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, aa_d); + else + std::ranges::sort(zip, aa_d); + } + else { // dir1 == sort_spec::abs_desce && dir2 == sort_spec::desce + if (! ignore_index) + std::ranges::sort(zip_idx, ad_d); + else + std::ranges::sort(zip, ad_d); + } - if (dir1 == sort_spec::ascen && dir2 == sort_spec::ascen) - sort_common_(*this, - std::move(a_a), - ignore_index); - else if (dir1 == sort_spec::desce && dir2 == sort_spec::desce) - sort_common_(*this, - std::move(d_d), - ignore_index); - else if (dir1 == sort_spec::ascen && dir2 == sort_spec::desce) - sort_common_(*this, - std::move(a_d), - ignore_index); - else if (dir1 == sort_spec::desce && dir2 == sort_spec::ascen) - sort_common_(*this, - std::move(d_a), - ignore_index); - else if (dir1 == sort_spec::abs_ascen && dir2 == sort_spec::abs_ascen) - sort_common_(*this, - std::move(aa_aa), - ignore_index); - else if (dir1 == sort_spec::abs_desce && dir2 == sort_spec::abs_desce) - sort_common_(*this, - std::move(ad_ad), - ignore_index); - else if (dir1 == sort_spec::abs_ascen && dir2 == sort_spec::abs_desce) - sort_common_(*this, - std::move(aa_ad), - ignore_index); - else if (dir1 == sort_spec::abs_desce && dir2 == sort_spec::abs_ascen) - sort_common_(*this, - std::move(ad_aa), - ignore_index); - else if (dir1 == sort_spec::ascen && dir2 == sort_spec::abs_ascen) - sort_common_(*this, - std::move(a_aa), - ignore_index); - else if (dir1 == sort_spec::ascen && dir2 == sort_spec::abs_desce) - sort_common_(*this, - std::move(a_ad), - ignore_index); - else if (dir1 == sort_spec::desce && dir2 == sort_spec::abs_ascen) - sort_common_(*this, - std::move(d_aa), - ignore_index); - else if (dir1 == sort_spec::desce && dir2 == sort_spec::abs_desce) - sort_common_(*this, - std::move(d_ad), - ignore_index); - else if (dir1 == sort_spec::abs_ascen && dir2 == sort_spec::ascen) - sort_common_(*this, - std::move(aa_a), - ignore_index); - else if (dir1 == sort_spec::abs_desce && dir2 == sort_spec::ascen) - sort_common_(*this, - std::move(ad_a), - ignore_index); - else if (dir1 == sort_spec::abs_ascen && dir2 == sort_spec::desce) - sort_common_(*this, - std::move(aa_d), - ignore_index); - else // dir1 == sort_spec::abs_desce && dir2 == sort_spec::desce - sort_common_(*this, - std::move(ad_d), - ignore_index); + sort_functor_ functor (sorting_idxs, idx_s); + + for (const auto &citer : column_list_) [[likely]] + if (citer.first != name1 && citer.first != name2) + data_[citer.second].change(functor); return; } @@ -1002,90 +1040,113 @@ sort(const char *name1, sort_spec dir1, make_consistent(); - const ColumnVecType *vec1 { nullptr}; - const ColumnVecType *vec2 { nullptr}; - const ColumnVecType *vec3 { nullptr}; - const SpinGuard guard (lock_); + ColumnVecType *vec1 { nullptr}; + ColumnVecType *vec2 { nullptr}; + ColumnVecType *vec3 { nullptr}; + const SpinGuard guard (lock_); - if (! ::strcmp(name1, DF_INDEX_COL_NAME)) + if (! ::strcmp(name1, DF_INDEX_COL_NAME)) { vec1 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec1 = &(get_column(name1, false)); - if (! ::strcmp(name2, DF_INDEX_COL_NAME)) + if (! ::strcmp(name2, DF_INDEX_COL_NAME)) { vec2 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec2 = &(get_column(name2, false)); - if (! ::strcmp(name3, DF_INDEX_COL_NAME)) + if (! ::strcmp(name3, DF_INDEX_COL_NAME)) { vec3 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec3 = &(get_column(name3, false)); auto cf = - [vec1, vec2, vec3, dir1, dir2, dir3] - (size_type i, size_type j) -> bool { + [dir1, dir2, dir3](const auto &lhs, const auto &rhs) -> bool { if (dir1 == sort_spec::ascen) { - if (vec1->at(i) < vec1->at(j)) + if (std::get<0>(lhs) < std::get<0>(rhs)) return (true); - else if (vec1->at(i) > vec1->at(j)) + else if (std::get<0>(lhs) > std::get<0>(rhs)) return (false); } else if (dir1 == sort_spec::desce) { - if (vec1->at(i) > vec1->at(j)) + if (std::get<0>(lhs) > std::get<0>(rhs)) return (true); - else if (vec1->at(i) < vec1->at(j)) + else if (std::get<0>(lhs) < std::get<0>(rhs)) return (false); } else if (dir1 == sort_spec::abs_ascen) { - if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (false); } else { // sort_spec::abs_desce - if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (false); } if (dir2 == sort_spec::ascen) { - if (vec2->at(i) < vec2->at(j)) + if (std::get<1>(lhs) < std::get<1>(rhs)) return (true); - else if (vec2->at(i) > vec2->at(j)) + else if (std::get<1>(lhs) > std::get<1>(rhs)) return (false); } else if (dir2 == sort_spec::desce) { - if (vec2->at(i) > vec2->at(j)) + if (std::get<1>(lhs) > std::get<1>(rhs)) return (true); - else if (vec2->at(i) < vec2->at(j)) + else if (std::get<1>(lhs) < std::get<1>(rhs)) return (false); } else if (dir2 == sort_spec::abs_ascen) { - if (abs__(vec2->at(i)) < abs__(vec2->at(j))) + if (abs__(std::get<1>(lhs)) < abs__(std::get<1>(rhs))) return (true); - else if (abs__(vec2->at(i)) > abs__(vec2->at(j))) + else if (abs__(std::get<1>(lhs)) > abs__(std::get<1>(rhs))) return (false); } else { // sort_spec::abs_desce - if (abs__(vec2->at(i)) > abs__(vec2->at(j))) + if (abs__(std::get<1>(lhs)) > abs__(std::get<1>(rhs))) return (true); - else if (abs__(vec2->at(i)) < abs__(vec2->at(j))) + else if (abs__(std::get<1>(lhs)) < abs__(std::get<1>(rhs))) return (false); } if (dir3 == sort_spec::ascen) - return (vec3->at(i) < vec3->at(j)); + return (std::get<2>(lhs) < std::get<2>(rhs)); else if (dir3 == sort_spec::desce) - return (vec3->at(i) > vec3->at(j)); + return (std::get<2>(lhs) > std::get<2>(rhs)); else if (dir3 == sort_spec::abs_ascen) - return (abs__(vec3->at(i)) < abs__(vec3->at(j))); + return (abs__(std::get<2>(lhs)) < abs__(std::get<2>(rhs))); else // sort_spec::abs_desce - return (abs__(vec3->at(i)) > abs__(vec3->at(j))); + return (abs__(std::get<2>(lhs)) > abs__(std::get<2>(rhs))); }; - sort_common_(*this, std::move(cf), ignore_index); + const size_type idx_s = indices_.size(); + StlVecType sorting_idxs(idx_s); + + std::iota(sorting_idxs.begin(), sorting_idxs.end(), 0); + + auto zip = std::ranges::views::zip(*vec1, *vec2, *vec3, sorting_idxs); + auto zip_idx = + std::ranges::views::zip(*vec1, *vec2, *vec3, indices_, sorting_idxs); + + if (! ignore_index) + std::ranges::sort(zip_idx, cf); + else + std::ranges::sort(zip, cf); + + sort_functor_ functor (sorting_idxs, idx_s); + + for (const auto &citer : column_list_) [[likely]] + if (citer.first != name1 && citer.first != name2 && citer.first != name3) + data_[citer.second].change(functor); return; } diff --git a/include/DataFrame/Internals/DataFrame_functors.h b/include/DataFrame/Internals/DataFrame_functors.h index 754c4f059..5eb7e4e6a 100644 --- a/include/DataFrame/Internals/DataFrame_functors.h +++ b/include/DataFrame/Internals/DataFrame_functors.h @@ -67,8 +67,7 @@ struct sort_functor_ : DataVec::template visitor_base { : sorted_idxs(si), idx_s(is) { } const StlVecType &sorted_idxs; - StlVecType sorted_idxs_copy; - const size_t idx_s; + const size_t idx_s; template void operator() (T2 &vec); diff --git a/include/DataFrame/Internals/DataFrame_misc.tcc b/include/DataFrame/Internals/DataFrame_misc.tcc index 2c9203156..e6fb1f88d 100644 --- a/include/DataFrame/Internals/DataFrame_misc.tcc +++ b/include/DataFrame/Internals/DataFrame_misc.tcc @@ -77,8 +77,7 @@ template void DataFrame::sort_functor_::operator() (T2 &vec) { - sorted_idxs_copy = sorted_idxs; - _sort_by_sorted_index_(vec, sorted_idxs_copy, idx_s); + _sort_by_sorted_index_(vec, sorted_idxs, idx_s); return; } diff --git a/include/DataFrame/Internals/DataFrame_standalone.tcc b/include/DataFrame/Internals/DataFrame_standalone.tcc index e655e485e..ebeaec5c7 100644 --- a/include/DataFrame/Internals/DataFrame_standalone.tcc +++ b/include/DataFrame/Internals/DataFrame_standalone.tcc @@ -865,23 +865,24 @@ inline static O _remove_copy_if_(I first, I last, O d_first, PRE predicate) { template static inline void -_sort_by_sorted_index_(T &to_be_sorted, V &sorting_idxs, size_t idx_s) { - - if (idx_s > 0) { - idx_s -= 1; - for (size_t i = 0; i < idx_s; ++i) [[likely]] { - // while the element i is not yet in place - // - while (sorting_idxs[i] != sorting_idxs[sorting_idxs[i]]) { - // swap it with the element at its final place - // - const size_t j = sorting_idxs[i]; - - std::swap(to_be_sorted[j], to_be_sorted[sorting_idxs[j]]); - std::swap(sorting_idxs[i], sorting_idxs[j]); +_sort_by_sorted_index_(T &to_be_sorted, const V &sorting_idxs, size_t idx_s) { + + std::vector done (idx_s, false); + + for (std::size_t i = 0; i < idx_s; ++i) [[likely]] + if (! done[i]) { + done[i] = true; + + std::size_t prev_j = i; + std::size_t j = sorting_idxs[i]; + + while (i != j) { + std::swap(to_be_sorted[prev_j], to_be_sorted[j]); + done[j] = true; + prev_j = j; + j = sorting_idxs[j]; } } - } } // ---------------------------------------------------------------------------- diff --git a/src/Makefile.Linux.GCC64 b/src/Makefile.Linux.GCC64 index e67e1a0b4..ed055558c 100644 --- a/src/Makefile.Linux.GCC64 +++ b/src/Makefile.Linux.GCC64 @@ -11,7 +11,7 @@ CXX = /usr/bin/g++ INCLUDES = -I/usr/include/c++/7 -I/usr/include LFLAGS = -CXXFLAGS = -O3 $(INCLUDES) $(DEFINES) -std=c++20 +CXXFLAGS = -O3 $(INCLUDES) $(DEFINES) -std=c++2b PLATFORM_LIBS = -lpthread -ldl -lm -lstdc++ diff --git a/src/Makefile.Linux.GCC64D b/src/Makefile.Linux.GCC64D index 51f306649..bbaadbacf 100644 --- a/src/Makefile.Linux.GCC64D +++ b/src/Makefile.Linux.GCC64D @@ -11,8 +11,8 @@ CXX = /usr/bin/g++ INCLUDES = -I/usr/include/c++/7 -I/usr/inc17lude LFLAGS = -CXXFLAGS = -g $(INCLUDES) $(DEFINES) -D_GLIBCXX_DEBUG -pedantic -Wall -Wextra -std=c++20 -# CXXFLAGS = -g $(INCLUDES) $(DEFINES) -std=c++20 +CXXFLAGS = -g $(INCLUDES) $(DEFINES) -D_GLIBCXX_DEBUG -pedantic -Wall -Wextra -std=c++2b +# CXXFLAGS = -g $(INCLUDES) $(DEFINES) -std=c++2b PLATFORM_LIBS = -lpthread -ldl -lm -lstdc++ -fsanitize-address-use-after-scope -fsanitize=address