Skip to content

Commit eb5b096

Browse files
committed
RangeInclusive internal iteration performance improvement.
Specialize Iterator::try_fold and DoubleEndedIterator::try_rfold to improve code generation in all internal iteration scenarios. This changes brings the performance of internal iteration with RangeInclusive on par with the performance of iteration with Range: - Single conditional jump in hot loop, - Unrolling and vectorization, - And even Closed Form substitution. Unfortunately, it only applies to internal iteration. Despite various attempts at stream-lining the implementation of next and next_back, LLVM has stubbornly refused to optimize external iteration appropriately, leaving me with a choice between: - The current implementation, for which Closed Form substitution is performed, but which uses 2 conditional jumps in the hot loop when optimization fail. - An implementation using a "is_done" boolean, which uses 1 conditional jump in the hot loop when optimization fail, allowing unrolling and vectorization, but for which Closed Form substitution fails. In the absence of any conclusive evidence as to which usecase matters most, and with no assurance that the lack of Closed Form substitution is not indicative of other optimizations being foiled, there is no way to pick one implementation over the other, and thus I defer to the statu quo as far as next and next_back are concerned.
1 parent cae623c commit eb5b096

File tree

2 files changed

+50
-3
lines changed

2 files changed

+50
-3
lines changed

src/libcore/iter/range.rs

+48-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use convert::TryFrom;
22
use mem;
3-
use ops::{self, Add, Sub};
3+
use ops::{self, Add, Sub, Try};
44
use usize;
55

66
use super::{FusedIterator, TrustedLen};
@@ -368,11 +368,11 @@ impl<A: Step> Iterator for ops::RangeInclusive<A> {
368368
Some(Less) => {
369369
self.is_empty = Some(false);
370370
self.start = plus_n.add_one();
371-
return Some(plus_n)
371+
return Some(plus_n);
372372
}
373373
Some(Equal) => {
374374
self.is_empty = Some(true);
375-
return Some(plus_n)
375+
return Some(plus_n);
376376
}
377377
_ => {}
378378
}
@@ -382,6 +382,29 @@ impl<A: Step> Iterator for ops::RangeInclusive<A> {
382382
None
383383
}
384384

385+
#[inline]
386+
fn try_fold<B, F, R>(&mut self, init: B, mut f: F) -> R
387+
where
388+
Self: Sized, F: FnMut(B, Self::Item) -> R, R: Try<Ok=B>
389+
{
390+
self.compute_is_empty();
391+
392+
let mut accum = init;
393+
394+
while self.start < self.end {
395+
let n = self.start.add_one();
396+
let n = mem::replace(&mut self.start, n);
397+
accum = f(accum, n)?;
398+
}
399+
400+
if self.start == self.end {
401+
accum = f(accum, self.start.clone())?;
402+
}
403+
404+
self.is_empty = Some(true);
405+
Try::from_ok(accum)
406+
}
407+
385408
#[inline]
386409
fn last(mut self) -> Option<A> {
387410
self.next_back()
@@ -415,6 +438,28 @@ impl<A: Step> DoubleEndedIterator for ops::RangeInclusive<A> {
415438
self.end.clone()
416439
})
417440
}
441+
442+
#[inline]
443+
fn try_rfold<B, F, R>(&mut self, init: B, mut f: F) -> R where
444+
Self: Sized, F: FnMut(B, Self::Item) -> R, R: Try<Ok=B>
445+
{
446+
self.compute_is_empty();
447+
448+
let mut accum = init;
449+
450+
while self.start < self.end {
451+
let n = self.end.sub_one();
452+
let n = mem::replace(&mut self.end, n);
453+
accum = f(accum, n)?;
454+
}
455+
456+
if self.start == self.end {
457+
accum = f(accum, self.start.clone())?;
458+
}
459+
460+
self.is_empty = Some(true);
461+
Try::from_ok(accum)
462+
}
418463
}
419464

420465
#[stable(feature = "fused", since = "1.26.0")]

src/libcore/ops/range.rs

+2
Original file line numberDiff line numberDiff line change
@@ -334,12 +334,14 @@ pub struct RangeInclusive<Idx> {
334334
trait RangeInclusiveEquality: Sized {
335335
fn canonicalized_is_empty(range: &RangeInclusive<Self>) -> bool;
336336
}
337+
337338
impl<T> RangeInclusiveEquality for T {
338339
#[inline]
339340
default fn canonicalized_is_empty(range: &RangeInclusive<Self>) -> bool {
340341
range.is_empty.unwrap_or_default()
341342
}
342343
}
344+
343345
impl<T: PartialOrd> RangeInclusiveEquality for T {
344346
#[inline]
345347
fn canonicalized_is_empty(range: &RangeInclusive<Self>) -> bool {

0 commit comments

Comments
 (0)