From 95a4ddea3eaf8c89c04a8293ea64c3befc748956 Mon Sep 17 00:00:00 2001 From: gezihuzi Date: Wed, 4 Sep 2024 12:39:13 +0800 Subject: [PATCH] Fix: Improve collapse_overlapped_ranges function (#2474) * Fix: Improve collapse_overlapped_ranges function - Refactor into separate sort_and_deduplicate_ranges and merge_overlapping_ranges functions - Enhance sorting to consider both start and end of ranges - Optimize merging logic to handle adjacent ranges - Add comprehensive examples in function documentation - Ensure proper handling of duplicate and unsorted input ranges - Improve overall efficiency and readability of range collapsing algorithm * move debug_assert --------- Co-authored-by: PSeitz --- src/snippet/mod.rs | 166 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 133 insertions(+), 33 deletions(-) diff --git a/src/snippet/mod.rs b/src/snippet/mod.rs index 16edac0431..e749e7fba0 100644 --- a/src/snippet/mod.rs +++ b/src/snippet/mod.rs @@ -257,40 +257,68 @@ fn select_best_fragment_combination(fragments: &[FragmentCandidate], text: &str) } } -/// Returns ranges that are collapsed into non-overlapped ranges. +/// Sorts and removes duplicate ranges from the input. +/// +/// This function first sorts the ranges by their start position, +/// then by their end position, and finally removes any duplicate ranges. /// /// ## Examples -/// - [0..1, 2..3] -> [0..1, 2..3] # no overlap -/// - [0..1, 1..2] -> [0..1, 1..2] # no overlap -/// - [0..2, 1..2] -> [0..2] # collapsed -/// - [0..2, 1..3] -> [0..3] # collapsed -/// - [0..3, 1..2] -> [0..3] # second range's end is also inside of the first range +/// - [0..3, 3..6, 0..3, 3..6] -> [0..3, 3..6] +/// - [2..4, 1..3, 2..4, 0..2] -> [0..2, 1..3, 2..4] +fn sort_and_deduplicate_ranges(ranges: &[Range]) -> Vec> { + let mut sorted_ranges = ranges.to_vec(); + sorted_ranges.sort_by_key(|range| (range.start, range.end)); + sorted_ranges.dedup(); + sorted_ranges +} + +/// Merges overlapping or adjacent ranges into non-overlapping ranges. +/// +/// This function assumes that the input ranges are already sorted +/// and deduplicated. Use `sort_and_deduplicate_ranges` before calling +/// this function if the input might contain unsorted or duplicate ranges. /// -/// Note: This function assumes `ranges` is sorted by `Range.start` in ascending order. -fn collapse_overlapped_ranges(ranges: &[Range]) -> Vec> { +/// ## Examples +/// - [0..1, 2..3] -> [0..1, 2..3] # no overlap +/// - [0..1, 1..2] -> [0..2] # adjacent, merged +/// - [0..2, 1..3] -> [0..3] # overlapping, merged +/// - [0..3, 1..2] -> [0..3] # second range is completely within the first +fn merge_overlapping_ranges(ranges: &[Range]) -> Vec> { debug_assert!(is_sorted(ranges.iter().map(|range| range.start))); - - let mut result = Vec::new(); - let mut ranges_it = ranges.iter(); - - let mut current = match ranges_it.next() { - Some(range) => range.clone(), - None => return result, - }; - + let mut result = Vec::>::new(); for range in ranges { - if current.end > range.start { - current = current.start..std::cmp::max(current.end, range.end); + if let Some(last) = result.last_mut() { + if last.end > range.start { + // Only merge when there is a true overlap. + last.end = std::cmp::max(last.end, range.end); + } else { + // Do not overlap or only adjacent, add new scope. + result.push(range.clone()); + } } else { - result.push(current); - current = range.clone(); + // The first range + result.push(range.clone()); } } - - result.push(current); result } +/// Collapses ranges into non-overlapped ranges. +/// +/// This function first sorts and deduplicates the input ranges, +/// then merges any overlapping or adjacent ranges. +/// +/// ## Examples +/// - [0..1, 2..3] -> [0..1, 2..3] # no overlap +/// - [0..1, 1..2] -> [0..2] # adjacent, merged +/// - [0..2, 1..3] -> [0..3] # overlapping, merged +/// - [0..3, 1..2] -> [0..3] # second range is completely within the first +/// - [0..3, 3..6, 0..3, 3..6] -> [0..6] # duplicates removed, then merged +pub fn collapse_overlapped_ranges(ranges: &[Range]) -> Vec> { + let prepared = sort_and_deduplicate_ranges(ranges); + merge_overlapping_ranges(&prepared) +} + fn is_sorted(mut it: impl Iterator) -> bool { if let Some(first) = it.next() { let mut prev = first; @@ -448,6 +476,7 @@ impl SnippetGenerator { #[cfg(test)] mod tests { use std::collections::BTreeMap; + use std::ops::Range; use maplit::btreemap; @@ -741,16 +770,6 @@ Survey in 2016, 2017, and 2018."#; Ok(()) } - #[test] - fn test_collapse_overlapped_ranges() { - #![allow(clippy::single_range_in_vec_init)] - assert_eq!(&collapse_overlapped_ranges(&[0..1, 2..3]), &[0..1, 2..3]); - assert_eq!(&collapse_overlapped_ranges(&[0..1, 1..2]), &[0..1, 1..2]); - assert_eq!(&collapse_overlapped_ranges(&[0..2, 1..2]), &[0..2]); - assert_eq!(&collapse_overlapped_ranges(&[0..2, 1..3]), &[0..3]); - assert_eq!(&collapse_overlapped_ranges(&[0..3, 1..2]), &[0..3]); - } - #[test] fn test_snippet_with_overlapped_highlighted_ranges() { let text = "abc"; @@ -801,4 +820,85 @@ Survey in 2016, 2017, and 2018."#; sponsored by\nMozilla which describes it as a "safe" ); } + + #[test] + fn test_collapse_overlapped_ranges() { + #![allow(clippy::single_range_in_vec_init)] + assert_eq!(&collapse_overlapped_ranges(&[0..1, 2..3]), &[0..1, 2..3]); + assert_eq!(&collapse_overlapped_ranges(&[0..1, 1..2]), &[0..1, 1..2]); + assert_eq!(&collapse_overlapped_ranges(&[0..2, 1..2]), &[0..2]); + assert_eq!(&collapse_overlapped_ranges(&[0..2, 1..3]), &[0..3]); + assert_eq!(&collapse_overlapped_ranges(&[0..3, 1..2]), &[0..3]); + } + + #[test] + fn test_no_overlap() { + let ranges = vec![0..1, 2..3, 4..5]; + let result = collapse_overlapped_ranges(&ranges); + assert_eq!(result, vec![0..1, 2..3, 4..5]); + } + + #[test] + fn test_adjacent_ranges() { + let ranges = vec![0..1, 1..2, 2..3]; + let result = collapse_overlapped_ranges(&ranges); + assert_eq!(result, vec![0..1, 1..2, 2..3]); + } + + #[test] + fn test_overlapping_ranges() { + let ranges = vec![0..2, 1..3, 2..4]; + let result = collapse_overlapped_ranges(&ranges); + assert_eq!(result, vec![0..4]); + } + + #[test] + fn test_contained_ranges() { + let ranges = vec![0..5, 1..2, 3..4]; + let result = collapse_overlapped_ranges(&ranges); + assert_eq!(result, vec![0..5]); + } + + #[test] + fn test_duplicate_ranges() { + let ranges = vec![0..2, 2..4, 0..2, 2..4]; + let result = collapse_overlapped_ranges(&ranges); + assert_eq!(result, vec![0..2, 2..4]); + } + + #[test] + fn test_unsorted_ranges() { + let ranges = vec![2..4, 0..2, 1..3]; + let result = collapse_overlapped_ranges(&ranges); + assert_eq!(result, vec![0..4]); + } + + #[test] + fn test_complex_scenario() { + let ranges = vec![0..2, 5..7, 1..3, 8..9, 2..4, 3..6, 8..10]; + let result = collapse_overlapped_ranges(&ranges); + assert_eq!(result, vec![0..7, 8..10]); + } + + #[test] + fn test_empty_input() { + let ranges: Vec> = vec![]; + let result = collapse_overlapped_ranges(&ranges); + assert_eq!(result, ranges); + } + + #[test] + fn test_single_range() { + #![allow(clippy::single_range_in_vec_init)] + let ranges = vec![0..5]; + let result = collapse_overlapped_ranges(&ranges); + assert_eq!(result, vec![0..5]); + } + + #[test] + fn test_zero_length_ranges() { + let ranges = vec![0..0, 1..1, 2..2, 3..3]; + let result = collapse_overlapped_ranges(&ranges); + assert_eq!(result, vec![0..0, 1..1, 2..2, 3..3]); + } }