diff --git a/src/least_satisfying.rs b/src/least_satisfying.rs index 5fbce24..ef587a8 100644 --- a/src/least_satisfying.rs +++ b/src/least_satisfying.rs @@ -1,7 +1,13 @@ use std::collections::BTreeMap; use std::fmt; -pub fn least_satisfying(slice: &[T], mut predicate: P) -> usize +// Returns the index of the earliest element of `slice` for which `predicate` returns Satisfies::Yes, +// assuming that all elements with `Satisfies::No` precede those with `Satisfies::Yes`. +pub fn least_satisfying( + slice: &[T], + midpoint_selection: MidpointSelection, + mut predicate: P, +) -> usize where T: fmt::Display + fmt::Debug, P: FnMut(&T, usize, usize) -> Satisfies, @@ -9,9 +15,33 @@ where let mut cache = BTreeMap::new(); let mut predicate = |idx: usize, rm_no, lm_yes| { let range: usize = lm_yes - rm_no + 1; - // FIXME: This does not consider unknown_ranges. let remaining = range / 2; - let estimate = if range < 3 { 0 } else { range.ilog2() as usize }; + + let estimate; + { + // The estimate of the remaining step count based on the range of the values left to check. + // Can be an underestimate if the (future) midpoint(s) don't land close enough to the + // true middle of the bisected ranges, but usually by no more than 2. + let range_est = range.ilog2() as usize; + match midpoint_selection { + MidpointSelection::Naive => estimate = range_est, + MidpointSelection::Stabilized { start_offset } => { + // The estimate of the remaining step count based on the height of the current idx in + // the overall binary tree. This is tailored to the specific midpoint selection strategy + // currently used, and relies on the fact that each step of the way we get at least + // one more step away from the root of the binary tree. + // Can arbitrarily overestimate the number of steps (think a short bisection range centered + // around the tree root). + // Can also *under*estimate the number of steps if the `idx` was not actually + // a direct result of `midpoint_stable_offset`, but rather tweaked slightly to work around + // unknown ranges. + let height_est = (start_offset + 1 + idx).trailing_zeros() as usize; + // Real estimate. Combines our best guesses via the two above methods. Can still be somewhat + // off in presence of unknown ranges. + estimate = height_est.clamp(range_est, range_est + 2) + } + }; + } *cache .entry(idx) .or_insert_with(|| predicate(&slice[idx], remaining, estimate)) @@ -25,13 +55,19 @@ where // this should be tested before the call let mut lm_yes = slice.len() - 1; - let mut next = (rm_no + lm_yes) / 2; - + let mut next: usize; loop { // simple case with no unknown ranges if rm_no + 1 == lm_yes { return lm_yes; } + next = match midpoint_selection { + MidpointSelection::Naive => (rm_no + lm_yes) / 2, + MidpointSelection::Stabilized { start_offset } => { + midpoint_stable_offset(start_offset, rm_no, lm_yes) + } + }; + for (left, right) in unknown_ranges.iter().copied() { // if we're straddling an unknown range, then pretend it doesn't exist if rm_no + 1 == left && right + 1 == lm_yes { @@ -52,11 +88,9 @@ where match r { Satisfies::Yes => { lm_yes = next; - next = (rm_no + lm_yes) / 2; } Satisfies::No => { rm_no = next; - next = (rm_no + lm_yes) / 2; } Satisfies::Unknown => { let mut left = next; @@ -70,19 +104,32 @@ where right += 1; } unknown_ranges.push((left + 1, right - 1)); - next = left; } } } } +// Governs the way a midpoint element is selected. +#[derive(Clone, Copy)] +pub enum MidpointSelection { + // Midpoint is simple `(start + end) / 2` + // Shall achieve the bisection in the least steps possible. + Naive, + // Midpoint would aim to be reused between different bisections, + // regardless of the initial bounds selection. + // The `start_offset` is the offset of the first element of the slice + // in a (hypothetical) "overall" array of "all the elements possible". + Stabilized { start_offset: usize }, +} + #[cfg(test)] mod tests { use super::Satisfies::{No, Unknown, Yes}; use super::{least_satisfying, Satisfies}; + use super::{midpoint_stable, MidpointSelection}; use quickcheck::{QuickCheck, TestResult}; - fn prop(xs: Vec>) -> TestResult { + fn prop(midpoint_sel: Option, xs: Vec>) -> TestResult { let mut satisfies_v = xs .into_iter() .map(std::convert::Into::into) @@ -98,8 +145,17 @@ mod tests { _ => {} } } + if midpoint_sel.unwrap_or(0) > usize::MAX / 2 { + // not interested in testing usize overflows + return TestResult::discard(); + } - let res = least_satisfying(&satisfies_v, |i, _, _| *i); + let midpoint = match midpoint_sel { + None => MidpointSelection::Naive, + Some(x) => MidpointSelection::Stabilized { start_offset: x }, + }; + + let res = least_satisfying(&satisfies_v, midpoint, |i, _, _| *i); let exp = first_yes.unwrap(); TestResult::from_bool(res == exp) } @@ -107,7 +163,11 @@ mod tests { #[test] fn least_satisfying_1() { assert_eq!( - least_satisfying(&[No, Unknown, Unknown, No, Yes], |i, _, _| *i), + least_satisfying( + &[No, Unknown, Unknown, No, Yes], + MidpointSelection::Naive, + |i, _, _| *i + ), 4 ); } @@ -115,24 +175,49 @@ mod tests { #[test] fn least_satisfying_2() { assert_eq!( - least_satisfying(&[No, Unknown, Yes, Unknown, Yes], |i, _, _| *i), + least_satisfying( + &[No, Unknown, Yes, Unknown, Yes], + MidpointSelection::Naive, + |i, _, _| *i + ), 2 ); } #[test] fn least_satisfying_3() { - assert_eq!(least_satisfying(&[No, No, No, No, Yes], |i, _, _| *i), 4); + assert_eq!( + least_satisfying( + &[No, No, No, No, Yes], + MidpointSelection::Naive, + |i, _, _| *i + ), + 4 + ); } #[test] fn least_satisfying_4() { - assert_eq!(least_satisfying(&[No, No, Yes, Yes, Yes], |i, _, _| *i), 2); + assert_eq!( + least_satisfying( + &[No, No, Yes, Yes, Yes], + MidpointSelection::Naive, + |i, _, _| *i + ), + 2 + ); } #[test] fn least_satisfying_5() { - assert_eq!(least_satisfying(&[No, Yes, Yes, Yes, Yes], |i, _, _| *i), 1); + assert_eq!( + least_satisfying( + &[No, Yes, Yes, Yes, Yes], + MidpointSelection::Naive, + |i, _, _| *i + ), + 1 + ); } #[test] @@ -140,6 +225,7 @@ mod tests { assert_eq!( least_satisfying( &[No, Yes, Yes, Unknown, Unknown, Yes, Unknown, Yes], + MidpointSelection::Naive, |i, _, _| *i ), 1 @@ -148,21 +234,142 @@ mod tests { #[test] fn least_satisfying_7() { - assert_eq!(least_satisfying(&[No, Yes, Unknown, Yes], |i, _, _| *i), 1); + assert_eq!( + least_satisfying( + &[No, Yes, Unknown, Yes], + MidpointSelection::Naive, + |i, _, _| *i + ), + 1 + ); } #[test] fn least_satisfying_8() { assert_eq!( - least_satisfying(&[No, Unknown, No, No, Unknown, Yes, Yes], |i, _, _| *i), + least_satisfying( + &[No, Unknown, No, No, Unknown, Yes, Yes], + MidpointSelection::Naive, + |i, _, _| *i + ), 5 ); } #[test] - fn qc_prop() { - QuickCheck::new().quickcheck(prop as fn(_) -> _); + fn least_satisfying_9() { + assert_eq!( + least_satisfying(&[No, Unknown, Yes], MidpointSelection::Naive, |i, _, _| *i), + 2 + ); + } + + #[test] + fn qc_prop_least_satisfying() { + QuickCheck::new().quickcheck(prop as fn(_, _) -> _); + } + + #[test] + fn midpoint_test() { + assert_eq!(midpoint_stable(1, 3), 2); + assert_eq!(midpoint_stable(3, 6), 4); + assert_eq!(midpoint_stable(1, 5), 4); + assert_eq!(midpoint_stable(2, 5), 4); + assert_eq!(midpoint_stable(4, 7), 6); + assert_eq!(midpoint_stable(8, 13), 12); + assert_eq!(midpoint_stable(8, 16), 12); + + assert_eq!(midpoint_stable(25, 27), 26); + assert_eq!(midpoint_stable(25, 28), 26); + assert_eq!(midpoint_stable(25, 29), 28); + assert_eq!(midpoint_stable(33, 65), 64); } + + #[test] + fn qc_prop_midpoint_stable() { + fn prop_midpoint(left: usize, right: usize) -> TestResult { + if left > usize::MAX / 2 || right > usize::MAX / 2 { + return TestResult::discard(); + } + if left == 0 { + return TestResult::discard(); + } + if left + 1 >= right { + return TestResult::discard(); + } + let mid = midpoint_stable(left, right); + // check that it's in range + if mid <= left || right <= mid { + return TestResult::failed(); + } + // check that there are no less-deep candidates in range + let mid_height = mid.trailing_zeros(); + let step = 1 << (mid_height + 1); + let mut probe = left & !(step - 1); + while probe < right { + if probe > left { + return TestResult::failed(); + } + probe += step; + } + TestResult::passed() + } + QuickCheck::new().quickcheck(prop_midpoint as fn(_, _) -> _); + } +} + +// see documentation of `midpoint_stable` below +fn midpoint_stable_offset(start_offset: usize, left: usize, right: usize) -> usize { + // return (left + right)/2; + // The implementation of `midpoint_stable` treats the slice as a binary tree + // with the assumption that the slice index starts at one, not zero + // (i.e. it assumes that both 1 and 3 are child nodes of 2, and 0 is not present + // in the tree at all). + // But we don't want to bubble this requirement up the stack since it's a bit + // counterintuitive and hard to explain, so just bump it here instead + let start_offset = start_offset + 1; + midpoint_stable(left + start_offset, right + start_offset) - start_offset +} +/// Returns a "stabilized midpoint" between the two slice indices (endpoints excluded). +/// +/// That is, returns such an index that is likely to be reused by future bisector invocations. +/// In practice, this reinterprets the slice as a "complete" (i.e. left-heavy) binary tree, +/// and finds the lowest-depth node between the two indices. This ensures that low-depth +/// nodes are more likely to be tried first (and thus reused) regardless of the initial search boundaries, +/// while still keeping the "binary" in "binary search" and completing the task in O(log_2(n)) steps +fn midpoint_stable(left: usize, right: usize) -> usize { + assert!( + (right - left) > 1, + "midpoint_stable called with consecutive values. Can't handle this, there's no midpoint. {:?} vs {:?}", + left, + right + ); + // If we only have a single candidate - return it + if left + 1 == right - 1 { + return left + 1; + } + + // If left and right have the same binary digits up to nth place, + // left = 0bxxx0yyyy; + // right = 0bxxx1zzzz; + // then we have a number of the form + // mid = 0bxxx10000; + // which has the least possible depth (as indicated by the amount of trailing zeroes) + // of all the numbers between left (exclusive) and right (inclusive). + // The following code constructs said number (with the exception that it excludes the right bound) + let diff = isolate_most_significant_one(left ^ (right - 1)); + assert!(left & diff == 0); + assert!((right - 1) & diff > 0); + // grab the high bits from left_next, force 1 where it should be, and zero out the lower bits. + let mask = !(diff - 1); + let mid = (mask & left) | diff; + return mid; +} + +// Implementation copy-pasted from std nightly `feature(isolate_most_significant_one)` +// https://github.com/rust-lang/rust/pull/136910 +const fn isolate_most_significant_one(x: usize) -> usize { + x & (((1 as usize) << (::BITS - 1)).wrapping_shr(x.leading_zeros())) } #[derive(Copy, Clone, Debug, PartialEq, Eq)] diff --git a/src/main.rs b/src/main.rs index 65cab99..56aeac4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -29,7 +29,7 @@ mod toolchains; use crate::bounds::{Bound, Bounds}; use crate::github::get_commit; -use crate::least_satisfying::{least_satisfying, Satisfies}; +use crate::least_satisfying::{least_satisfying, MidpointSelection, Satisfies}; use crate::repo_access::{AccessViaGithub, AccessViaLocalGit, RustRepositoryAccessor}; use crate::toolchains::{ parse_to_naive_date, DownloadError, DownloadParams, InstallError, TestOutcome, Toolchain, @@ -60,6 +60,8 @@ pub struct Author { /// artifacts of this commit itself is no longer available, so this may not be entirely useful; /// however, it does limit the amount of commits somewhat. const EPOCH_COMMIT: &str = "927c55d86b0be44337f37cf5b0a76fb8ba86e06c"; +/// The earliest known date with an available nightly +const EPOCH_DATE: chrono::NaiveDate = NaiveDate::from_ymd_opt(2015, 01, 03).unwrap(); const REPORT_HEADER: &str = "\ ================================================================================== @@ -816,7 +818,13 @@ impl Config { } fn bisect_to_regression(&self, toolchains: &[Toolchain], dl_spec: &DownloadParams) -> usize { - least_satisfying(toolchains, |t, remaining, estimate| { + let midpoint = match &toolchains[0].spec { + ToolchainSpec::Ci { .. } => MidpointSelection::Naive, + ToolchainSpec::Nightly { date } => MidpointSelection::Stabilized { + start_offset: (*date - EPOCH_DATE).num_days() as usize, + }, + }; + least_satisfying(toolchains, midpoint, |t, remaining, estimate| { eprintln!( "{remaining} versions remaining to test after this (roughly {estimate} steps)" );