Skip to content

Commit a00ccef

Browse files
committed
Clarify significance threshold
1 parent fefad06 commit a00ccef

File tree

1 file changed

+37
-29
lines changed

1 file changed

+37
-29
lines changed

site/src/comparison.rs

+37-29
Original file line numberDiff line numberDiff line change
@@ -103,11 +103,7 @@ pub async fn handle_compare(
103103
benchmark: comparison.benchmark.to_string(),
104104
profile: comparison.profile.to_string(),
105105
scenario: comparison.scenario.to_string(),
106-
is_dodgy: comparison
107-
.variance
108-
.as_ref()
109-
.map(|v| v.is_dodgy())
110-
.unwrap_or(false),
106+
is_dodgy: comparison.is_dodgy(),
111107
is_significant: comparison.is_significant(),
112108
historical_statistics: comparison.variance.map(|v| v.data),
113109
statistics: comparison.results,
@@ -135,8 +131,8 @@ async fn populate_report(comparison: &Comparison, report: &mut HashMap<Direction
135131
}
136132

137133
pub struct ComparisonSummary {
138-
hi: Option<BenchmarkComparison>,
139-
lo: Option<BenchmarkComparison>,
134+
hi: Option<TestResultComparison>,
135+
lo: Option<TestResultComparison>,
140136
}
141137

142138
impl ComparisonSummary {
@@ -147,7 +143,7 @@ impl ComparisonSummary {
147143
return None;
148144
}
149145

150-
let cmp = |b1: &BenchmarkComparison, b2: &BenchmarkComparison| {
146+
let cmp = |b1: &TestResultComparison, b2: &TestResultComparison| {
151147
b1.log_change()
152148
.partial_cmp(&b2.log_change())
153149
.unwrap_or(std::cmp::Ordering::Equal)
@@ -183,7 +179,7 @@ impl ComparisonSummary {
183179
}
184180

185181
/// The changes ordered by their signficance (most significant first)
186-
pub fn ordered_changes(&self) -> Vec<&BenchmarkComparison> {
182+
pub fn ordered_changes(&self) -> Vec<&TestResultComparison> {
187183
match (&self.hi, &self.lo) {
188184
(None, None) => Vec::new(),
189185
(Some(b), None) => vec![b],
@@ -276,7 +272,7 @@ async fn compare_given_commits(
276272
.filter_map(|(test_case, a)| {
277273
statistics_for_b
278274
.get(&test_case)
279-
.map(|&b| BenchmarkComparison {
275+
.map(|&b| TestResultComparison {
280276
benchmark: test_case.0,
281277
profile: test_case.1,
282278
scenario: test_case.2,
@@ -423,7 +419,7 @@ pub struct Comparison {
423419
pub a: ArtifactDescription,
424420
pub b: ArtifactDescription,
425421
/// Statistics based on test case
426-
pub statistics: HashSet<BenchmarkComparison>,
422+
pub statistics: HashSet<TestResultComparison>,
427423
}
428424

429425
impl Comparison {
@@ -455,7 +451,7 @@ impl Comparison {
455451
next_commit(&self.b.artifact, master_commits).map(|c| c.sha.clone())
456452
}
457453

458-
fn get_benchmarks(&self) -> impl Iterator<Item = &BenchmarkComparison> {
454+
fn get_benchmarks(&self) -> impl Iterator<Item = &TestResultComparison> {
459455
self.statistics.iter().filter(|b| b.profile != Profile::Doc)
460456
}
461457
}
@@ -631,18 +627,25 @@ pub fn next_commit<'a>(
631627
}
632628
}
633629

634-
// A single comparison based on benchmark and cache state
630+
// A single comparison between two test results
635631
#[derive(Debug, Clone)]
636-
pub struct BenchmarkComparison {
632+
pub struct TestResultComparison {
637633
benchmark: Benchmark,
638634
profile: Profile,
639635
scenario: Scenario,
640636
variance: Option<BenchmarkVariance>,
641637
results: (f64, f64),
642638
}
643639

644-
const SIGNIFICANCE_THRESHOLD: f64 = 0.001;
645-
impl BenchmarkComparison {
640+
impl TestResultComparison {
641+
/// The amount of relative change considered significant when
642+
/// the test case is not dodgy
643+
const SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD: f64 = 0.01;
644+
645+
/// The amount of relative change considered significant when
646+
/// the test case is dodgy
647+
const SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD_DODGY: f64 = 1.0;
648+
646649
fn log_change(&self) -> f64 {
647650
let (a, b) = self.results;
648651
(b / a).ln()
@@ -654,15 +657,20 @@ impl BenchmarkComparison {
654657
}
655658

656659
fn is_significant(&self) -> bool {
657-
// This particular test case frequently varies
658-
if &self.benchmark == "coercions"
659-
&& self.profile == Profile::Debug
660-
&& matches!(self.scenario, Scenario::IncrementalPatch(p) if &p == "println")
661-
{
662-
self.relative_change().abs() > 2.0
660+
let threshold = if self.is_dodgy() {
661+
Self::SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD_DODGY
663662
} else {
664-
self.log_change().abs() > SIGNIFICANCE_THRESHOLD
665-
}
663+
Self::SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD
664+
};
665+
666+
self.relative_change().abs() > threshold
667+
}
668+
669+
fn is_dodgy(&self) -> bool {
670+
self.variance
671+
.as_ref()
672+
.map(|v| v.is_dodgy())
673+
.unwrap_or(false)
666674
}
667675

668676
fn relative_change(&self) -> f64 {
@@ -671,7 +679,7 @@ impl BenchmarkComparison {
671679
}
672680

673681
fn direction(&self) -> Direction {
674-
if self.log_change() > 0.0 {
682+
if self.relative_change() > 0.0 {
675683
Direction::Regression
676684
} else {
677685
Direction::Improvement
@@ -680,7 +688,7 @@ impl BenchmarkComparison {
680688

681689
pub fn summary_line(&self, summary: &mut String, link: Option<&str>) {
682690
use std::fmt::Write;
683-
let magnitude = self.log_change().abs();
691+
let magnitude = self.relative_change().abs();
684692
let size = if magnitude > 0.10 {
685693
"Very large"
686694
} else if magnitude > 0.05 {
@@ -713,16 +721,16 @@ impl BenchmarkComparison {
713721
.unwrap();
714722
}
715723
}
716-
impl std::cmp::PartialEq for BenchmarkComparison {
724+
impl std::cmp::PartialEq for TestResultComparison {
717725
fn eq(&self, other: &Self) -> bool {
718726
self.benchmark == other.benchmark
719727
&& self.profile == other.profile
720728
&& self.scenario == other.scenario
721729
}
722730
}
723-
impl std::cmp::Eq for BenchmarkComparison {}
731+
impl std::cmp::Eq for TestResultComparison {}
724732

725-
impl std::hash::Hash for BenchmarkComparison {
733+
impl std::hash::Hash for TestResultComparison {
726734
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
727735
self.benchmark.hash(state);
728736
self.profile.hash(state);

0 commit comments

Comments
 (0)