Skip to content

Commit

Permalink
Test case on-going for '_get_atomic_region()'.
Browse files Browse the repository at this point in the history
  • Loading branch information
yohplala committed Feb 18, 2025
1 parent c7bb430 commit 954e4db
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 25 deletions.
38 changes: 23 additions & 15 deletions oups/store/ordered_merge_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,35 +583,43 @@ def _get_atomic_merge_regions(
print(f"df_idx_tmrg_ends_excl: {df_idx_tmrg_ends_excl}")
# Find regions in DataFrame not overlapping with any row group.
# `amr` for atomic merge region.
df_idxs_enlarged = r_[
df_interlaces_wo_overlap = r_[
df_idx_tmrg_starts[0], # gap at start (0 to first start)
df_idx_tmrg_starts[1:] - df_idx_tmrg_ends_excl[:-1],
df_idx_tmrg_ends_excl[:-1] - df_idx_tmrg_starts[1:],
len(df_ordered_on) - df_idx_tmrg_ends_excl[-1], # gap at end
]
print(f"df_idxs_enlarged: {df_idxs_enlarged}")
amr_idx_non_overlapping = flatnonzero(df_idxs_enlarged)
print(f"amr_idx_non_overlapping: {amr_idx_non_overlapping}")
rg_idxs = arange(len(rg_mins) + 1)
print(f"rg_idxs: {rg_idxs}")
if len(amr_idx_non_overlapping) == 0:
print(f"df_interlaces_wo_overlap: {df_interlaces_wo_overlap}")
rg_idx_df_interlaces_wo_overlap = flatnonzero(df_interlaces_wo_overlap)
print(f"rg_idx_df_interlaces_wo_overlap: {rg_idx_df_interlaces_wo_overlap}")
rg_idxs_template = arange(len(rg_mins) + 1)
# print(f"rg_idxs: {rg_idxs}")
if len(rg_idx_df_interlaces_wo_overlap) == 0:
# No non-overlapping regions in DataFrame
return rg_idxs[:-1], rg_idxs[1:], df_idx_tmrg_ends_excl
print("No non-overlapping regions in DataFrame")
return rg_idxs_template[:-1], rg_idxs_template[1:], df_idx_tmrg_ends_excl
else:
# Get insert accounting for previous insertions
# insert_positions = amr_idx_non_overlapping + arange(len(amr_idx_non_overlapping))
# print(f"insert_positions: {insert_positions}")
# Fill arrays
rg_idx_to_insert = rg_idxs[amr_idx_non_overlapping]
print("Non-overlapping regions in DataFrame")
rg_idx_to_insert = rg_idxs_template[rg_idx_df_interlaces_wo_overlap]
print(f"rg_idx_to_insert: {rg_idx_to_insert}")
rg_idxs_with_inserts = insert(rg_idxs, amr_idx_non_overlapping, rg_idx_to_insert)
rg_idxs_with_inserts = insert(
rg_idxs_template,
rg_idx_df_interlaces_wo_overlap,
rg_idx_to_insert,
)
print(f"rg_idxs_with_inserts: {rg_idxs_with_inserts}")
if amr_idx_non_overlapping[-1] == len(df_ordered_on):
df_idx_to_insert = df_idx_tmrg_starts[amr_idx_non_overlapping]
if rg_idx_df_interlaces_wo_overlap[-1] == len(df_ordered_on):
df_idx_to_insert = df_idx_tmrg_starts[rg_idx_df_interlaces_wo_overlap]
else:
df_idx_to_insert = r_[df_idx_tmrg_starts, len(df_ordered_on)][amr_idx_non_overlapping]
df_idx_to_insert = r_[df_idx_tmrg_starts, len(df_ordered_on)][
rg_idx_df_interlaces_wo_overlap
]
df_idx_with_inserts = insert(
df_idx_tmrg_ends_excl,
amr_idx_non_overlapping,
rg_idx_df_interlaces_wo_overlap,
df_idx_to_insert,
)
print(f"df_idx_with_inserts: {df_idx_with_inserts}")
Expand Down
33 changes: 23 additions & 10 deletions tests/test_store/test_ordered_merge_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@
array([1, 1, 2]), # df_idx_tmrg_ends_excl
),
),
# TODO: work in progress here
(
"gap_at_end_rg_trailing_df",
[10, 20, 30], # rg_mins
Expand All @@ -116,20 +115,32 @@
True,
(
array([0, 1, 2]), # rg_idx_starts
array([1, 2, 2]), # rg_idx_ends_excl
array([1, 2, 3]), # rg_idx_ends_excl
array([1, 2, 2]), # df_idx_tmrg_ends_excl
),
),
(
"multiple_gaps_non_overlapping_rg",
"multiple_gaps_df_not_overlapping_rg",
[20, 40, 43], # rg_mins
[25, 43, 45], # rg_maxs
[25, 42, 45], # rg_maxs
Series([5, 22, 32, 42, 46, 52]), # df_ordered_on
True,
(
array([0, 0, 1, 1, 2, 2, 3]), # rg_idx_starts
array([0, 1, 1, 2, 2, 3, 3]), # rg_idx_ends_excl
array([1, 2, 3, 4, 5, 6, 7]), # df_idx_tmrg_ends_excl
array([0, 0, 1, 1, 2, 3]), # rg_idx_starts
array([0, 1, 1, 2, 3, 3]), # rg_idx_ends_excl
array([1, 2, 3, 4, 4, 6]), # df_idx_tmrg_ends_excl
),
),
(
"no_drop_duplicates_with_gap_and_overlapping_rg",
[20, 40, 43], # rg_mins, 43 overlaps with previous rg max
[25, 43, 45], # rg_maxs
Series([5, 22, 32, 43, 46, 52]), # df_ordered_on - 43 is duplicate
False, # don't drop duplicates - 43 expected to fall in last rg
(
array([0, 0, 1, 1, 2, 3]), # rg_idx_starts
array([0, 1, 1, 2, 3, 3]), # rg_idx_ends_excl
array([1, 2, 3, 3, 4, 6]), # df_idx_tmrg_ends_excl
),
),
(
Expand All @@ -139,11 +150,13 @@
Series([15, 22, 32]), # df_ordered_on - note 15 is duplicate
False,
(
array([0, 1, 2]), # rg_idx_starts
array([1, 2, 2]), # rg_idx_ends_excl
array([1, 2, 3]), # df_idx_tmrg_ends_excl
array([0, 1, 1, 2]), # rg_idx_starts
array([1, 1, 2, 2]), # rg_idx_ends_excl
array([0, 1, 2, 3]), # df_idx_tmrg_ends_excl
),
),
# work in progress
# test with several values in df for a given row group.
],
)
def test_get_atomic_merge_regions(
Expand Down

0 comments on commit 954e4db

Please sign in to comment.