Skip to content

Commit eb561e5

Browse files
Merge #704
704: `MergeJoinBy` also accept functions returning `bool` r=jswrenn a=Philippe-Cholet Related to #701 . The produced iterator then yields `Either` items instead of `EitherOrBoth` items when the user function returns `Ordering`. Co-authored-by: Philippe-Cholet <[email protected]>
2 parents 3e92550 + ae31559 commit eb561e5

File tree

3 files changed

+170
-63
lines changed

3 files changed

+170
-63
lines changed

src/lib.rs

+38-7
Original file line numberDiff line numberDiff line change
@@ -1012,7 +1012,10 @@ pub trait Itertools : Iterator {
10121012
/// Create an iterator that merges items from both this and the specified
10131013
/// iterator in ascending order.
10141014
///
1015-
/// It chooses whether to pair elements based on the `Ordering` returned by the
1015+
/// The function can either return an `Ordering` variant or a boolean.
1016+
///
1017+
/// If `cmp_fn` returns `Ordering`,
1018+
/// it chooses whether to pair elements based on the `Ordering` returned by the
10161019
/// specified compare function. At any point, inspecting the tip of the
10171020
/// iterators `I` and `J` as items `i` of type `I::Item` and `j` of type
10181021
/// `J::Item` respectively, the resulting iterator will:
@@ -1028,18 +1031,46 @@ pub trait Itertools : Iterator {
10281031
/// use itertools::Itertools;
10291032
/// use itertools::EitherOrBoth::{Left, Right, Both};
10301033
///
1031-
/// let multiples_of_2 = (0..10).step_by(2);
1032-
/// let multiples_of_3 = (0..10).step_by(3);
1034+
/// let a = vec![0, 2, 4, 6, 1].into_iter();
1035+
/// let b = (0..10).step_by(3);
1036+
///
1037+
/// itertools::assert_equal(
1038+
/// a.merge_join_by(b, |i, j| i.cmp(j)),
1039+
/// vec![Both(0, 0), Left(2), Right(3), Left(4), Both(6, 6), Left(1), Right(9)]
1040+
/// );
1041+
/// ```
1042+
///
1043+
/// If `cmp_fn` returns `bool`,
1044+
/// it chooses whether to pair elements based on the boolean returned by the
1045+
/// specified function. At any point, inspecting the tip of the
1046+
/// iterators `I` and `J` as items `i` of type `I::Item` and `j` of type
1047+
/// `J::Item` respectively, the resulting iterator will:
1048+
///
1049+
/// - Emit `Either::Left(i)` when `true`,
1050+
/// and remove `i` from its source iterator
1051+
/// - Emit `Either::Right(j)` when `false`,
1052+
/// and remove `j` from its source iterator
1053+
///
1054+
/// It is similar to the `Ordering` case if the first argument is considered
1055+
/// "less" than the second argument.
1056+
///
1057+
/// ```
1058+
/// use itertools::Itertools;
1059+
/// use itertools::Either::{Left, Right};
1060+
///
1061+
/// let a = vec![0, 2, 4, 6, 1].into_iter();
1062+
/// let b = (0..10).step_by(3);
10331063
///
10341064
/// itertools::assert_equal(
1035-
/// multiples_of_2.merge_join_by(multiples_of_3, |i, j| i.cmp(j)),
1036-
/// vec![Both(0, 0), Left(2), Right(3), Left(4), Both(6, 6), Left(8), Right(9)]
1065+
/// a.merge_join_by(b, |i, j| i <= j),
1066+
/// vec![Left(0), Right(0), Left(2), Right(3), Left(4), Left(6), Left(1), Right(6), Right(9)]
10371067
/// );
10381068
/// ```
10391069
#[inline]
1040-
fn merge_join_by<J, F>(self, other: J, cmp_fn: F) -> MergeJoinBy<Self, J::IntoIter, F>
1070+
fn merge_join_by<J, F, T>(self, other: J, cmp_fn: F) -> MergeJoinBy<Self, J::IntoIter, F>
10411071
where J: IntoIterator,
1042-
F: FnMut(&Self::Item, &J::Item) -> std::cmp::Ordering,
1072+
F: FnMut(&Self::Item, &J::Item) -> T,
1073+
T: merge_join::OrderingOrBool<Self::Item, J::Item>,
10431074
Self: Sized
10441075
{
10451076
merge_join_by(self, other, cmp_fn)

src/merge_join.rs

+107-56
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,23 @@ use std::cmp::Ordering;
22
use std::iter::Fuse;
33
use std::fmt;
44

5+
use either::Either;
6+
57
use super::adaptors::{PutBack, put_back};
68
use crate::either_or_both::EitherOrBoth;
9+
use crate::size_hint::{self, SizeHint};
710
#[cfg(doc)]
811
use crate::Itertools;
912

1013
/// Return an iterator adaptor that merge-joins items from the two base iterators in ascending order.
1114
///
1215
/// [`IntoIterator`] enabled version of [`Itertools::merge_join_by`].
13-
pub fn merge_join_by<I, J, F>(left: I, right: J, cmp_fn: F)
16+
pub fn merge_join_by<I, J, F, T>(left: I, right: J, cmp_fn: F)
1417
-> MergeJoinBy<I::IntoIter, J::IntoIter, F>
1518
where I: IntoIterator,
1619
J: IntoIterator,
17-
F: FnMut(&I::Item, &J::Item) -> Ordering
20+
F: FnMut(&I::Item, &J::Item) -> T,
21+
T: OrderingOrBool<I::Item, J::Item>,
1822
{
1923
MergeJoinBy {
2024
left: put_back(left.into_iter().fuse()),
@@ -30,7 +34,66 @@ pub fn merge_join_by<I, J, F>(left: I, right: J, cmp_fn: F)
3034
pub struct MergeJoinBy<I: Iterator, J: Iterator, F> {
3135
left: PutBack<Fuse<I>>,
3236
right: PutBack<Fuse<J>>,
33-
cmp_fn: F
37+
cmp_fn: F,
38+
}
39+
40+
pub trait OrderingOrBool<L, R> {
41+
type MergeResult;
42+
fn left(left: L) -> Self::MergeResult;
43+
fn right(right: R) -> Self::MergeResult;
44+
// "merge" never returns (Some(...), Some(...), ...) so Option<Either<I::Item, J::Item>>
45+
// is appealing but it is always followed by two put_backs, so we think the compiler is
46+
// smart enough to optimize it. Or we could move put_backs into "merge".
47+
fn merge(self, left: L, right: R) -> (Option<L>, Option<R>, Self::MergeResult);
48+
fn size_hint(left: SizeHint, right: SizeHint) -> SizeHint;
49+
}
50+
51+
impl<L, R> OrderingOrBool<L, R> for Ordering {
52+
type MergeResult = EitherOrBoth<L, R>;
53+
fn left(left: L) -> Self::MergeResult {
54+
EitherOrBoth::Left(left)
55+
}
56+
fn right(right: R) -> Self::MergeResult {
57+
EitherOrBoth::Right(right)
58+
}
59+
fn merge(self, left: L, right: R) -> (Option<L>, Option<R>, Self::MergeResult) {
60+
match self {
61+
Ordering::Equal => (None, None, EitherOrBoth::Both(left, right)),
62+
Ordering::Less => (None, Some(right), EitherOrBoth::Left(left)),
63+
Ordering::Greater => (Some(left), None, EitherOrBoth::Right(right)),
64+
}
65+
}
66+
fn size_hint(left: SizeHint, right: SizeHint) -> SizeHint {
67+
let (a_lower, a_upper) = left;
68+
let (b_lower, b_upper) = right;
69+
let lower = ::std::cmp::max(a_lower, b_lower);
70+
let upper = match (a_upper, b_upper) {
71+
(Some(x), Some(y)) => x.checked_add(y),
72+
_ => None,
73+
};
74+
(lower, upper)
75+
}
76+
}
77+
78+
impl<L, R> OrderingOrBool<L, R> for bool {
79+
type MergeResult = Either<L, R>;
80+
fn left(left: L) -> Self::MergeResult {
81+
Either::Left(left)
82+
}
83+
fn right(right: R) -> Self::MergeResult {
84+
Either::Right(right)
85+
}
86+
fn merge(self, left: L, right: R) -> (Option<L>, Option<R>, Self::MergeResult) {
87+
if self {
88+
(None, Some(right), Either::Left(left))
89+
} else {
90+
(Some(left), None, Either::Right(right))
91+
}
92+
}
93+
fn size_hint(left: SizeHint, right: SizeHint) -> SizeHint {
94+
// Not ExactSizeIterator because size may be larger than usize
95+
size_hint::add(left, right)
96+
}
3497
}
3598

3699
impl<I, J, F> Clone for MergeJoinBy<I, J, F>
@@ -52,49 +115,34 @@ impl<I, J, F> fmt::Debug for MergeJoinBy<I, J, F>
52115
debug_fmt_fields!(MergeJoinBy, left, right);
53116
}
54117

55-
impl<I, J, F> Iterator for MergeJoinBy<I, J, F>
118+
impl<I, J, F, T> Iterator for MergeJoinBy<I, J, F>
56119
where I: Iterator,
57120
J: Iterator,
58-
F: FnMut(&I::Item, &J::Item) -> Ordering
121+
F: FnMut(&I::Item, &J::Item) -> T,
122+
T: OrderingOrBool<I::Item, J::Item>,
59123
{
60-
type Item = EitherOrBoth<I::Item, J::Item>;
124+
type Item = T::MergeResult;
61125

62126
fn next(&mut self) -> Option<Self::Item> {
63127
match (self.left.next(), self.right.next()) {
64128
(None, None) => None,
65-
(Some(left), None) =>
66-
Some(EitherOrBoth::Left(left)),
67-
(None, Some(right)) =>
68-
Some(EitherOrBoth::Right(right)),
129+
(Some(left), None) => Some(T::left(left)),
130+
(None, Some(right)) => Some(T::right(right)),
69131
(Some(left), Some(right)) => {
70-
match (self.cmp_fn)(&left, &right) {
71-
Ordering::Equal =>
72-
Some(EitherOrBoth::Both(left, right)),
73-
Ordering::Less => {
74-
self.right.put_back(right);
75-
Some(EitherOrBoth::Left(left))
76-
},
77-
Ordering::Greater => {
78-
self.left.put_back(left);
79-
Some(EitherOrBoth::Right(right))
80-
}
132+
let (left, right, next) = (self.cmp_fn)(&left, &right).merge(left, right);
133+
if let Some(left) = left {
134+
self.left.put_back(left);
135+
}
136+
if let Some(right) = right {
137+
self.right.put_back(right);
81138
}
139+
Some(next)
82140
}
83141
}
84142
}
85143

86-
fn size_hint(&self) -> (usize, Option<usize>) {
87-
let (a_lower, a_upper) = self.left.size_hint();
88-
let (b_lower, b_upper) = self.right.size_hint();
89-
90-
let lower = ::std::cmp::max(a_lower, b_lower);
91-
92-
let upper = match (a_upper, b_upper) {
93-
(Some(x), Some(y)) => x.checked_add(y),
94-
_ => None,
95-
};
96-
97-
(lower, upper)
144+
fn size_hint(&self) -> SizeHint {
145+
T::size_hint(self.left.size_hint(), self.right.size_hint())
98146
}
99147

100148
fn count(mut self) -> usize {
@@ -106,10 +154,12 @@ impl<I, J, F> Iterator for MergeJoinBy<I, J, F>
106154
(None, Some(_right)) => break count + 1 + self.right.into_parts().1.count(),
107155
(Some(left), Some(right)) => {
108156
count += 1;
109-
match (self.cmp_fn)(&left, &right) {
110-
Ordering::Equal => {}
111-
Ordering::Less => self.right.put_back(right),
112-
Ordering::Greater => self.left.put_back(left),
157+
let (left, right, _) = (self.cmp_fn)(&left, &right).merge(left, right);
158+
if let Some(left) = left {
159+
self.left.put_back(left);
160+
}
161+
if let Some(right) = right {
162+
self.right.put_back(right);
113163
}
114164
}
115165
}
@@ -122,27 +172,24 @@ impl<I, J, F> Iterator for MergeJoinBy<I, J, F>
122172
match (self.left.next(), self.right.next()) {
123173
(None, None) => break previous_element,
124174
(Some(left), None) => {
125-
break Some(EitherOrBoth::Left(
175+
break Some(T::left(
126176
self.left.into_parts().1.last().unwrap_or(left),
127177
))
128178
}
129179
(None, Some(right)) => {
130-
break Some(EitherOrBoth::Right(
180+
break Some(T::right(
131181
self.right.into_parts().1.last().unwrap_or(right),
132182
))
133183
}
134184
(Some(left), Some(right)) => {
135-
previous_element = match (self.cmp_fn)(&left, &right) {
136-
Ordering::Equal => Some(EitherOrBoth::Both(left, right)),
137-
Ordering::Less => {
138-
self.right.put_back(right);
139-
Some(EitherOrBoth::Left(left))
140-
}
141-
Ordering::Greater => {
142-
self.left.put_back(left);
143-
Some(EitherOrBoth::Right(right))
144-
}
185+
let (left, right, elem) = (self.cmp_fn)(&left, &right).merge(left, right);
186+
if let Some(left) = left {
187+
self.left.put_back(left);
188+
}
189+
if let Some(right) = right {
190+
self.right.put_back(right);
145191
}
192+
previous_element = Some(elem);
146193
}
147194
}
148195
}
@@ -156,13 +203,17 @@ impl<I, J, F> Iterator for MergeJoinBy<I, J, F>
156203
n -= 1;
157204
match (self.left.next(), self.right.next()) {
158205
(None, None) => break None,
159-
(Some(_left), None) => break self.left.nth(n).map(EitherOrBoth::Left),
160-
(None, Some(_right)) => break self.right.nth(n).map(EitherOrBoth::Right),
161-
(Some(left), Some(right)) => match (self.cmp_fn)(&left, &right) {
162-
Ordering::Equal => {}
163-
Ordering::Less => self.right.put_back(right),
164-
Ordering::Greater => self.left.put_back(left),
165-
},
206+
(Some(_left), None) => break self.left.nth(n).map(T::left),
207+
(None, Some(_right)) => break self.right.nth(n).map(T::right),
208+
(Some(left), Some(right)) => {
209+
let (left, right, _) = (self.cmp_fn)(&left, &right).merge(left, right);
210+
if let Some(left) = left {
211+
self.left.put_back(left);
212+
}
213+
if let Some(right) = right {
214+
self.right.put_back(right);
215+
}
216+
}
166217
}
167218
}
168219
}

tests/quick.rs

+25
Original file line numberDiff line numberDiff line change
@@ -829,6 +829,31 @@ quickcheck! {
829829
}
830830
}
831831

832+
quickcheck! {
833+
fn merge_join_by_ordering_vs_bool(a: Vec<u8>, b: Vec<u8>) -> bool {
834+
use either::Either;
835+
use itertools::free::merge_join_by;
836+
let mut has_equal = false;
837+
let it_ord = merge_join_by(a.clone(), b.clone(), Ord::cmp).flat_map(|v| match v {
838+
EitherOrBoth::Both(l, r) => {
839+
has_equal = true;
840+
vec![Either::Left(l), Either::Right(r)]
841+
}
842+
EitherOrBoth::Left(l) => vec![Either::Left(l)],
843+
EitherOrBoth::Right(r) => vec![Either::Right(r)],
844+
});
845+
let it_bool = merge_join_by(a, b, PartialOrd::le);
846+
itertools::equal(it_ord, it_bool) || has_equal
847+
}
848+
fn merge_join_by_bool_unwrapped_is_merge_by(a: Vec<u8>, b: Vec<u8>) -> bool {
849+
use either::Either;
850+
use itertools::free::merge_join_by;
851+
let it = a.clone().into_iter().merge_by(b.clone(), PartialOrd::ge);
852+
let it_join = merge_join_by(a, b, PartialOrd::ge).map(Either::into_inner);
853+
itertools::equal(it, it_join)
854+
}
855+
}
856+
832857
quickcheck! {
833858
fn size_tee(a: Vec<u8>) -> bool {
834859
let (mut t1, mut t2) = a.iter().tee();

0 commit comments

Comments
 (0)