Skip to content

Stabilize bisection logic to ease toolchain reuse #372

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
245 changes: 226 additions & 19 deletions src/least_satisfying.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,47 @@
use std::collections::BTreeMap;
use std::fmt;

pub fn least_satisfying<T, P>(slice: &[T], mut predicate: P) -> usize
// Returns the index of the earliest element of `slice` for which `predicate` returns Satisfies::Yes,
// assuming that all elements with `Satisfies::No` precede those with `Satisfies::Yes`.
pub fn least_satisfying<T, P>(
slice: &[T],
midpoint_selection: MidpointSelection,
mut predicate: P,
) -> usize
where
T: fmt::Display + fmt::Debug,
P: FnMut(&T, usize, usize) -> Satisfies,
{
let mut cache = BTreeMap::new();
let mut predicate = |idx: usize, rm_no, lm_yes| {
let range: usize = lm_yes - rm_no + 1;
// FIXME: This does not consider unknown_ranges.
let remaining = range / 2;
let estimate = if range < 3 { 0 } else { range.ilog2() as usize };

let estimate;
{
// The estimate of the remaining step count based on the range of the values left to check.
// Can be an underestimate if the (future) midpoint(s) don't land close enough to the
// true middle of the bisected ranges, but usually by no more than 2.
let range_est = range.ilog2() as usize;
match midpoint_selection {
MidpointSelection::Naive => estimate = range_est,
MidpointSelection::Stabilized { start_offset } => {
// The estimate of the remaining step count based on the height of the current idx in
// the overall binary tree. This is tailored to the specific midpoint selection strategy
// currently used, and relies on the fact that each step of the way we get at least
// one more step away from the root of the binary tree.
// Can arbitrarily overestimate the number of steps (think a short bisection range centered
// around the tree root).
// Can also *under*estimate the number of steps if the `idx` was not actually
// a direct result of `midpoint_stable_offset`, but rather tweaked slightly to work around
// unknown ranges.
let height_est = (start_offset + 1 + idx).trailing_zeros() as usize;
// Real estimate. Combines our best guesses via the two above methods. Can still be somewhat
// off in presence of unknown ranges.
estimate = height_est.clamp(range_est, range_est + 2)
}
};
}
*cache
.entry(idx)
.or_insert_with(|| predicate(&slice[idx], remaining, estimate))
Expand All @@ -25,13 +55,19 @@ where
// this should be tested before the call
let mut lm_yes = slice.len() - 1;

let mut next = (rm_no + lm_yes) / 2;

let mut next: usize;
loop {
// simple case with no unknown ranges
if rm_no + 1 == lm_yes {
return lm_yes;
}
next = match midpoint_selection {
MidpointSelection::Naive => (rm_no + lm_yes) / 2,
MidpointSelection::Stabilized { start_offset } => {
midpoint_stable_offset(start_offset, rm_no, lm_yes)
}
};

for (left, right) in unknown_ranges.iter().copied() {
// if we're straddling an unknown range, then pretend it doesn't exist
if rm_no + 1 == left && right + 1 == lm_yes {
Expand All @@ -52,11 +88,9 @@ where
match r {
Satisfies::Yes => {
lm_yes = next;
next = (rm_no + lm_yes) / 2;
}
Satisfies::No => {
rm_no = next;
next = (rm_no + lm_yes) / 2;
}
Satisfies::Unknown => {
let mut left = next;
Expand All @@ -70,19 +104,32 @@ where
right += 1;
}
unknown_ranges.push((left + 1, right - 1));
next = left;
}
}
}
}

// Governs the way a midpoint element is selected.
#[derive(Clone, Copy)]
pub enum MidpointSelection {
// Midpoint is simple `(start + end) / 2`
// Shall achieve the bisection in the least steps possible.
Naive,
// Midpoint would aim to be reused between different bisections,
// regardless of the initial bounds selection.
// The `start_offset` is the offset of the first element of the slice
// in a (hypothetical) "overall" array of "all the elements possible".
Stabilized { start_offset: usize },
}

#[cfg(test)]
mod tests {
use super::Satisfies::{No, Unknown, Yes};
use super::{least_satisfying, Satisfies};
use super::{midpoint_stable, MidpointSelection};
use quickcheck::{QuickCheck, TestResult};

fn prop(xs: Vec<Option<bool>>) -> TestResult {
fn prop(midpoint_sel: Option<usize>, xs: Vec<Option<bool>>) -> TestResult {
let mut satisfies_v = xs
.into_iter()
.map(std::convert::Into::into)
Expand All @@ -98,48 +145,87 @@ mod tests {
_ => {}
}
}
if midpoint_sel.unwrap_or(0) > usize::MAX / 2 {
// not interested in testing usize overflows
return TestResult::discard();
}

let res = least_satisfying(&satisfies_v, |i, _, _| *i);
let midpoint = match midpoint_sel {
None => MidpointSelection::Naive,
Some(x) => MidpointSelection::Stabilized { start_offset: x },
};

let res = least_satisfying(&satisfies_v, midpoint, |i, _, _| *i);
let exp = first_yes.unwrap();
TestResult::from_bool(res == exp)
}

#[test]
fn least_satisfying_1() {
assert_eq!(
least_satisfying(&[No, Unknown, Unknown, No, Yes], |i, _, _| *i),
least_satisfying(
&[No, Unknown, Unknown, No, Yes],
MidpointSelection::Naive,
|i, _, _| *i
),
4
);
}

#[test]
fn least_satisfying_2() {
assert_eq!(
least_satisfying(&[No, Unknown, Yes, Unknown, Yes], |i, _, _| *i),
least_satisfying(
&[No, Unknown, Yes, Unknown, Yes],
MidpointSelection::Naive,
|i, _, _| *i
),
2
);
}

#[test]
fn least_satisfying_3() {
assert_eq!(least_satisfying(&[No, No, No, No, Yes], |i, _, _| *i), 4);
assert_eq!(
least_satisfying(
&[No, No, No, No, Yes],
MidpointSelection::Naive,
|i, _, _| *i
),
4
);
}

#[test]
fn least_satisfying_4() {
assert_eq!(least_satisfying(&[No, No, Yes, Yes, Yes], |i, _, _| *i), 2);
assert_eq!(
least_satisfying(
&[No, No, Yes, Yes, Yes],
MidpointSelection::Naive,
|i, _, _| *i
),
2
);
}

#[test]
fn least_satisfying_5() {
assert_eq!(least_satisfying(&[No, Yes, Yes, Yes, Yes], |i, _, _| *i), 1);
assert_eq!(
least_satisfying(
&[No, Yes, Yes, Yes, Yes],
MidpointSelection::Naive,
|i, _, _| *i
),
1
);
}

#[test]
fn least_satisfying_6() {
assert_eq!(
least_satisfying(
&[No, Yes, Yes, Unknown, Unknown, Yes, Unknown, Yes],
MidpointSelection::Naive,
|i, _, _| *i
),
1
Expand All @@ -148,21 +234,142 @@ mod tests {

#[test]
fn least_satisfying_7() {
assert_eq!(least_satisfying(&[No, Yes, Unknown, Yes], |i, _, _| *i), 1);
assert_eq!(
least_satisfying(
&[No, Yes, Unknown, Yes],
MidpointSelection::Naive,
|i, _, _| *i
),
1
);
}

#[test]
fn least_satisfying_8() {
assert_eq!(
least_satisfying(&[No, Unknown, No, No, Unknown, Yes, Yes], |i, _, _| *i),
least_satisfying(
&[No, Unknown, No, No, Unknown, Yes, Yes],
MidpointSelection::Naive,
|i, _, _| *i
),
5
);
}

#[test]
fn qc_prop() {
QuickCheck::new().quickcheck(prop as fn(_) -> _);
fn least_satisfying_9() {
assert_eq!(
least_satisfying(&[No, Unknown, Yes], MidpointSelection::Naive, |i, _, _| *i),
2
);
}

#[test]
fn qc_prop_least_satisfying() {
QuickCheck::new().quickcheck(prop as fn(_, _) -> _);
}

#[test]
fn midpoint_test() {
assert_eq!(midpoint_stable(1, 3), 2);
assert_eq!(midpoint_stable(3, 6), 4);
assert_eq!(midpoint_stable(1, 5), 4);
assert_eq!(midpoint_stable(2, 5), 4);
assert_eq!(midpoint_stable(4, 7), 6);
assert_eq!(midpoint_stable(8, 13), 12);
assert_eq!(midpoint_stable(8, 16), 12);

assert_eq!(midpoint_stable(25, 27), 26);
assert_eq!(midpoint_stable(25, 28), 26);
assert_eq!(midpoint_stable(25, 29), 28);
assert_eq!(midpoint_stable(33, 65), 64);
}

#[test]
fn qc_prop_midpoint_stable() {
fn prop_midpoint(left: usize, right: usize) -> TestResult {
if left > usize::MAX / 2 || right > usize::MAX / 2 {
return TestResult::discard();
}
if left == 0 {
return TestResult::discard();
}
if left + 1 >= right {
return TestResult::discard();
}
let mid = midpoint_stable(left, right);
// check that it's in range
if mid <= left || right <= mid {
return TestResult::failed();
}
// check that there are no less-deep candidates in range
let mid_height = mid.trailing_zeros();
let step = 1 << (mid_height + 1);
let mut probe = left & !(step - 1);
while probe < right {
if probe > left {
return TestResult::failed();
}
probe += step;
}
TestResult::passed()
}
QuickCheck::new().quickcheck(prop_midpoint as fn(_, _) -> _);
}
}

// see documentation of `midpoint_stable` below
fn midpoint_stable_offset(start_offset: usize, left: usize, right: usize) -> usize {
// return (left + right)/2;
// The implementation of `midpoint_stable` treats the slice as a binary tree
// with the assumption that the slice index starts at one, not zero
// (i.e. it assumes that both 1 and 3 are child nodes of 2, and 0 is not present
// in the tree at all).
// But we don't want to bubble this requirement up the stack since it's a bit
// counterintuitive and hard to explain, so just bump it here instead
let start_offset = start_offset + 1;
midpoint_stable(left + start_offset, right + start_offset) - start_offset
}
/// Returns a "stabilized midpoint" between the two slice indices (endpoints excluded).
///
/// That is, returns such an index that is likely to be reused by future bisector invocations.
/// In practice, this reinterprets the slice as a "complete" (i.e. left-heavy) binary tree,
/// and finds the lowest-depth node between the two indices. This ensures that low-depth
/// nodes are more likely to be tried first (and thus reused) regardless of the initial search boundaries,
/// while still keeping the "binary" in "binary search" and completing the task in O(log_2(n)) steps
fn midpoint_stable(left: usize, right: usize) -> usize {
assert!(
(right - left) > 1,
"midpoint_stable called with consecutive values. Can't handle this, there's no midpoint. {:?} vs {:?}",
left,
right
);
// If we only have a single candidate - return it
if left + 1 == right - 1 {
return left + 1;
}

// If left and right have the same binary digits up to nth place,
// left = 0bxxx0yyyy;
// right = 0bxxx1zzzz;
// then we have a number of the form
// mid = 0bxxx10000;
// which has the least possible depth (as indicated by the amount of trailing zeroes)
// of all the numbers between left (exclusive) and right (inclusive).
// The following code constructs said number (with the exception that it excludes the right bound)
let diff = isolate_most_significant_one(left ^ (right - 1));
assert!(left & diff == 0);
assert!((right - 1) & diff > 0);
// grab the high bits from left_next, force 1 where it should be, and zero out the lower bits.
let mask = !(diff - 1);
let mid = (mask & left) | diff;
return mid;
}

// Implementation copy-pasted from std nightly `feature(isolate_most_significant_one)`
// https://github.com/rust-lang/rust/pull/136910
const fn isolate_most_significant_one(x: usize) -> usize {
x & (((1 as usize) << (<usize>::BITS - 1)).wrapping_shr(x.leading_zeros()))
}

#[derive(Copy, Clone, Debug, PartialEq, Eq)]
Expand Down
12 changes: 10 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ mod toolchains;

use crate::bounds::{Bound, Bounds};
use crate::github::get_commit;
use crate::least_satisfying::{least_satisfying, Satisfies};
use crate::least_satisfying::{least_satisfying, MidpointSelection, Satisfies};
use crate::repo_access::{AccessViaGithub, AccessViaLocalGit, RustRepositoryAccessor};
use crate::toolchains::{
parse_to_naive_date, DownloadError, DownloadParams, InstallError, TestOutcome, Toolchain,
Expand Down Expand Up @@ -60,6 +60,8 @@ pub struct Author {
/// artifacts of this commit itself is no longer available, so this may not be entirely useful;
/// however, it does limit the amount of commits somewhat.
const EPOCH_COMMIT: &str = "927c55d86b0be44337f37cf5b0a76fb8ba86e06c";
/// The earliest known date with an available nightly
const EPOCH_DATE: chrono::NaiveDate = NaiveDate::from_ymd_opt(2015, 01, 03).unwrap();

const REPORT_HEADER: &str = "\
==================================================================================
Expand Down Expand Up @@ -816,7 +818,13 @@ impl Config {
}

fn bisect_to_regression(&self, toolchains: &[Toolchain], dl_spec: &DownloadParams) -> usize {
least_satisfying(toolchains, |t, remaining, estimate| {
let midpoint = match &toolchains[0].spec {
ToolchainSpec::Ci { .. } => MidpointSelection::Naive,
ToolchainSpec::Nightly { date } => MidpointSelection::Stabilized {
start_offset: (*date - EPOCH_DATE).num_days() as usize,
},
};
least_satisfying(toolchains, midpoint, |t, remaining, estimate| {
eprintln!(
"{remaining} versions remaining to test after this (roughly {estimate} steps)"
);
Expand Down
Loading