Skip to content

Commit 33f68e4

Browse files
authored
Add FromIter for Ranges (#278)
* Add `FromIter` for `Ranges` Add a method to construct ranges from an iterator of arbitrary segments. This allows to `.collect()` an iterator of tuples of bounds. This is more ergonomic than folding the previous ranges with the next segment each time, and also faster. Split out from #273 Closes astral-sh#33 Fixes #249 * Fix ascii art alignment * Fix algorithm with new proptest * Sorting comment * Review
1 parent 2a37e13 commit 33f68e4

File tree

1 file changed

+190
-0
lines changed

1 file changed

+190
-0
lines changed

version-ranges/src/lib.rs

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,186 @@ impl<V> IntoIterator for Ranges<V> {
879879
}
880880
}
881881

882+
impl<V: Ord> FromIterator<(Bound<V>, Bound<V>)> for Ranges<V> {
883+
/// Constructor from arbitrary, unsorted and potentially overlapping ranges.
884+
///
885+
/// This is equivalent, but faster, to computing the [`Ranges::union`] of the
886+
/// [`Ranges::from_range_bounds`] of each segment.
887+
fn from_iter<T: IntoIterator<Item = (Bound<V>, Bound<V>)>>(iter: T) -> Self {
888+
// We have three constraints we need to fulfil:
889+
// 1. The segments are sorted, from lowest to highest (through `Ord`): By sorting.
890+
// 2. Each segment contains at least one version (start < end): By skipping invalid
891+
// segments.
892+
// 3. There is at least one version between two segments: By merging overlapping elements.
893+
//
894+
// Technically, the implementation has a O(n²) worst case complexity since we're inserting
895+
// and removing. This has two motivations: One is that we don't have any performance
896+
// critical usages of this method as of this writing, so we have no real world benchmark.
897+
// The other is that we get the elements from an iterator, so to avoid moving elements
898+
// around we would first need to build a different, sorted collection with extra
899+
// allocation(s), before we could build our real segments. --Konsti
900+
901+
// For this implementation, we choose to only build a single smallvec and insert or remove
902+
// in it, instead of e.g. collecting the segments into a sorted datastructure first and then
903+
// construction the second smallvec without shifting.
904+
let mut segments: SmallVec<[Interval<V>; 1]> = SmallVec::new();
905+
906+
for segment in iter {
907+
if !valid_segment(&segment.start_bound(), &segment.end_bound()) {
908+
continue;
909+
}
910+
// Find where to insert the new segment
911+
let insertion_point = segments.partition_point(|elem: &Interval<V>| {
912+
cmp_bounds_start(elem.start_bound(), segment.start_bound())
913+
.unwrap()
914+
.is_lt()
915+
});
916+
// Is it overlapping with the previous segment?
917+
let previous_overlapping = insertion_point > 0
918+
&& !end_before_start_with_gap(
919+
&segments[insertion_point - 1].end_bound(),
920+
&segment.start_bound(),
921+
);
922+
923+
// Is it overlapping with the following segment? We'll check if there's more than one
924+
// overlap later.
925+
let next_overlapping = insertion_point < segments.len()
926+
&& !end_before_start_with_gap(
927+
&segment.end_bound(),
928+
&segments[insertion_point].start_bound(),
929+
);
930+
931+
match (previous_overlapping, next_overlapping) {
932+
(true, true) => {
933+
// previous: |------|
934+
// segment: |------|
935+
// following: |------|
936+
// final: |---------------|
937+
//
938+
// OR
939+
//
940+
// previous: |------|
941+
// segment: |-----------|
942+
// following: |----|
943+
// final: |---------------|
944+
//
945+
// OR
946+
//
947+
// previous: |------|
948+
// segment: |----------------|
949+
// following: |----| |------|
950+
// final: |------------------------|
951+
// We merge all three segments into one, which is effectively removing one of
952+
// two previously inserted and changing the bounds on the other.
953+
954+
// Remove all elements covered by the final element
955+
let mut following = segments.remove(insertion_point);
956+
while insertion_point < segments.len()
957+
&& !end_before_start_with_gap(
958+
&segment.end_bound(),
959+
&segments[insertion_point].start_bound(),
960+
)
961+
{
962+
following = segments.remove(insertion_point);
963+
}
964+
965+
// Set end to max(segment.end, <last overlapping segment>.end)
966+
if cmp_bounds_end(segment.end_bound(), following.end_bound())
967+
.unwrap()
968+
.is_lt()
969+
{
970+
segments[insertion_point - 1].1 = following.1;
971+
} else {
972+
segments[insertion_point - 1].1 = segment.1;
973+
}
974+
}
975+
(true, false) => {
976+
// previous: |------|
977+
// segment: |------|
978+
// following: |------|
979+
//
980+
// OR
981+
//
982+
// previous: |----------|
983+
// segment: |---|
984+
// following: |------|
985+
//
986+
// final: |----------| |------|
987+
// We can reuse the existing element by extending it.
988+
989+
// Set end to max(segment.end, <previous>.end)
990+
if cmp_bounds_end(
991+
segments[insertion_point - 1].end_bound(),
992+
segment.end_bound(),
993+
)
994+
.unwrap()
995+
.is_lt()
996+
{
997+
segments[insertion_point - 1].1 = segment.1;
998+
}
999+
}
1000+
(false, true) => {
1001+
// previous: |------|
1002+
// segment: |------|
1003+
// following: |------|
1004+
// final: |------| |----------|
1005+
//
1006+
// OR
1007+
//
1008+
// previous: |------|
1009+
// segment: |----------|
1010+
// following: |---|
1011+
// final: |------| |----------|
1012+
//
1013+
// OR
1014+
//
1015+
// previous: |------|
1016+
// segment: |------------|
1017+
// following: |---| |------|
1018+
//
1019+
// final: |------| |-----------------|
1020+
// We can reuse the existing element by extending it.
1021+
1022+
// Remove all fully covered segments so the next element is the last one that
1023+
// overlaps.
1024+
while insertion_point + 1 < segments.len()
1025+
&& !end_before_start_with_gap(
1026+
&segment.end_bound(),
1027+
&segments[insertion_point + 1].start_bound(),
1028+
)
1029+
{
1030+
// We know that the one after also overlaps, so we can drop the current
1031+
// following.
1032+
segments.remove(insertion_point);
1033+
}
1034+
1035+
// Set end to max(segment.end, <last overlapping segment>.end)
1036+
if cmp_bounds_end(segments[insertion_point].end_bound(), segment.end_bound())
1037+
.unwrap()
1038+
.is_lt()
1039+
{
1040+
segments[insertion_point].1 = segment.1;
1041+
}
1042+
segments[insertion_point].0 = segment.0;
1043+
}
1044+
(false, false) => {
1045+
// previous: |------|
1046+
// segment: |------|
1047+
// following: |------|
1048+
//
1049+
// final: |------| |------| |------|
1050+
1051+
// This line is O(n), which makes the algorithm O(n²), but it should be good
1052+
// enough for now.
1053+
segments.insert(insertion_point, segment);
1054+
}
1055+
}
1056+
}
1057+
1058+
Self { segments }.check_invariants()
1059+
}
1060+
}
1061+
8821062
// REPORT ######################################################################
8831063

8841064
impl<V: Display + Eq> Display for Ranges<V> {
@@ -1183,6 +1363,16 @@ pub mod tests {
11831363
}
11841364
assert!(simp.segments.len() <= range.segments.len())
11851365
}
1366+
1367+
#[test]
1368+
fn from_iter_valid(segments in proptest::collection::vec(any::<(Bound<u32>, Bound<u32>)>(), ..30)) {
1369+
let mut expected = Ranges::empty();
1370+
for segment in &segments {
1371+
expected = expected.union(&Ranges::from_range_bounds(*segment));
1372+
}
1373+
let actual = Ranges::from_iter(segments.clone());
1374+
assert_eq!(expected, actual, "{segments:?}");
1375+
}
11861376
}
11871377

11881378
#[test]

0 commit comments

Comments
 (0)