diff --git a/collector/src/lib.rs b/collector/src/lib.rs index 26cf6e5e5..2e14d98d5 100644 --- a/collector/src/lib.rs +++ b/collector/src/lib.rs @@ -14,6 +14,7 @@ use std::hash; use std::str::FromStr; use std::path::{Path, PathBuf}; use std::process::{self, Stdio}; +use std::borrow::Cow; use chrono::{DateTime, Datelike, Duration, TimeZone, Utc}; use chrono::naive::NaiveDate; @@ -61,13 +62,27 @@ impl Ord for Commit { } } -#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Deserialize, Serialize)] pub struct Patch { index: usize, pub name: String, path: PathBuf, } +impl PartialEq for Patch { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + } +} + +impl Eq for Patch {} + +impl hash::Hash for Patch { + fn hash(&self, h: &mut H) { + self.name.hash(h); + } +} + impl Patch { pub fn new(path: PathBuf) -> Self { assert!(path.is_file()); @@ -138,14 +153,14 @@ impl BenchmarkState { } } - pub fn name(&self) -> String { + pub fn name(&self) -> Cow<'static, str> { match *self { - BenchmarkState::Clean => format!("clean"), - BenchmarkState::Nll => format!("nll"), - BenchmarkState::IncrementalStart => format!("baseline incremental"), - BenchmarkState::IncrementalClean => format!("clean incremental"), + BenchmarkState::Clean => "clean".into(), + BenchmarkState::Nll => "nll".into(), + BenchmarkState::IncrementalStart => "baseline incremental".into(), + BenchmarkState::IncrementalClean => "clean incremental".into(), BenchmarkState::IncrementalPatched(ref patch) => { - format!("patched incremental: {}", patch.name) + format!("patched incremental: {}", patch.name).into() } } } @@ -176,7 +191,7 @@ pub struct Stat { pub cnt: f64, } -#[derive(Debug, PartialEq, Clone, Deserialize, Serialize)] +#[derive(Debug, Clone, Deserialize, Serialize)] pub struct Run { pub stats: Vec, #[serde(default)] @@ -185,6 +200,48 @@ pub struct Run { pub state: BenchmarkState, } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct RunId { + check: bool, + release: bool, + state: BenchmarkState, +} + +impl RunId { + pub fn name(&self) -> String { + self.to_string() + } +} + +impl fmt::Display for RunId { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let opt = if self.release { + "-opt" + } else if self.check { + "-check" + } else { + "" + }; + write!(f, "{}{}", self.state.name(), opt) + } +} + +impl PartialEq for Run { + fn eq(&self, other: &Self) -> bool { + self.release == other.release && + self.check == other.check && + self.state == other.state + } +} + +impl PartialEq for Run { + fn eq(&self, other: &RunId) -> bool { + self.release == other.release && + self.check == other.check && + self.state == other.state + } +} + impl Run { pub fn is_clean(&self) -> bool { self.state == BenchmarkState::Clean @@ -209,15 +266,18 @@ impl Run { false } + pub fn id(&self) -> RunId { + let state = self.state.clone(); + let state = state.erase_path(); + RunId { + check: self.check, + release: self.release, + state: state, + } + } + pub fn name(&self) -> String { - let opt = if self.release { - "-opt" - } else if self.check { - "-check" - } else { - "" - }; - self.state.name() + opt + self.id().name() } pub fn get_stat(&self, stat: &str) -> Option { @@ -243,7 +303,7 @@ pub struct CommitData { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct DeltaTime(#[serde(with = "round_float")] pub f64); -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Hash, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct Date(pub DateTime); #[derive(Debug, Clone, PartialEq, Eq)] diff --git a/site/src/api.rs b/site/src/api.rs index 7e7bce574..95388dd1d 100644 --- a/site/src/api.rs +++ b/site/src/api.rs @@ -91,7 +91,7 @@ pub mod data { use server::DateData; use collector::Bound; - #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Request { pub start: Bound, pub end: Bound, @@ -101,7 +101,7 @@ pub mod data { } /// List of DateData's from oldest to newest - #[derive(Debug, PartialEq, Clone, Serialize, Deserialize)] + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Response(pub Vec); } @@ -126,6 +126,7 @@ pub mod graph { pub percent: f32, pub y: f32, pub x: u64, + pub color: String, } #[derive(Debug, PartialEq, Clone, Serialize, Deserialize)] @@ -148,7 +149,7 @@ pub mod days { pub stat: String, } - #[derive(Debug, PartialEq, Clone, Serialize, Deserialize)] + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Response { pub a: DateData, pub b: DateData, diff --git a/site/src/load.rs b/site/src/load.rs index 9f712d8dc..4e29e4a14 100644 --- a/site/src/load.rs +++ b/site/src/load.rs @@ -23,10 +23,37 @@ use chrono::{Duration, Utc}; use toml; use util; +use util::Interpolate; use git; use collector::Date; -pub use collector::{Commit, CommitData, ArtifactData, Patch, Run, Stat}; +pub use collector::{RunId, Benchmark, CommitData, Commit, ArtifactData, Patch, Run, Stat}; +use collector; + +#[derive(Debug, Serialize, Deserialize)] +pub enum InterpolationSource { + /// We interpolated the first commit in the data set from the commit + /// here + First(Commit), + + /// We interpolated the last commit in the data set from the commit + /// here + Last(Commit), + + /// We interpolated a commit in the middle from the two commits + /// around it (but not necessarily directly adjacent -- generally + /// ranges of commits don't have the data). + /// + /// Data is interpolated linearly between these two commits. + Middle(Commit, Commit), +} + +#[derive(Debug)] +pub struct Interpolation { + pub benchmark: String, + pub run: Option, + pub from: InterpolationSource, +} #[derive(Clone, Deserialize, Serialize, Debug)] pub struct CurrentState { @@ -89,7 +116,19 @@ pub struct InputData { /// timezone is not important, it isn't stored, hence the Naive variant. pub last_date: Date, - pub data: BTreeMap, + /// `data_real` is as-is, `data` has been interpolated. + data_real: BTreeMap, + data: BTreeMap, + + /// The benchmarks we interpolated for a given commit. + /// + /// Not all commits are in this map. + pub interpolated: HashMap>, + + /// The list of commits in the `data` map. + pub data_commits: Vec, + /// A map from commit to index in the `commits` vector. + pub commit_map: HashMap, pub artifact_data: BTreeMap, @@ -101,6 +140,13 @@ pub struct InputData { } impl InputData { + pub fn data(&self, interpolate: Interpolate) -> &BTreeMap { + match interpolate { + Interpolate::Yes => &self.data, + Interpolate::No => &self.data_real, + } + } + /// Initialize `InputData from the file system. pub fn from_fs(repo_loc: &str) -> Result { let repo_loc = PathBuf::from(repo_loc); @@ -229,10 +275,107 @@ impl InputData { let commits = rust_sysroot::get_commits(rust_sysroot::EPOCH_COMMIT, "master").map_err(SyncFailure::new)?; println!("Update of rust.git complete"); + let data_commits = data.keys().cloned().collect::>(); + + let mut commit_map = HashMap::with_capacity(data_commits.len()); + for (idx, commit) in data_commits.iter().enumerate() { + commit_map.insert(commit.clone(), idx); + } + + eprintln!("Starting interpolation..."); + let start = ::std::time::Instant::now(); + let data_real = data.clone(); + let mut interpolated = HashMap::new(); + let mut data_next = data; + + let current_benchmarks = data_real.iter().rev().take(20) + .flat_map(|(_, cd)| { + cd.benchmarks.keys().cloned() + }) + .collect::>() + .into_iter() + .collect::>(); + + let mut known_runs: HashMap> = HashMap::new(); + for (_, cd) in data_real.iter().rev().take(20) { + for (name, benchmark) in &cd.benchmarks { + if let Ok(benchmark) = benchmark { + let mut entry = known_runs.entry(name.clone()) + .or_insert_with(HashSet::new); + for run in &benchmark.runs { + entry.insert(run.id()); + } + } + } + } + trace!("computed current benchmarks and runs, in {:?}", start.elapsed()); + + // The data holds this tree: + // [commit] -> [benchmark] -> [run] -> [stat] + + for (commit, cd) in &mut data_next { + for benchmark_name in ¤t_benchmarks { + // We do not interpolate try commits today + // because we don't track their parents so it's + // difficult to add that data in. + if commit.is_try() { + continue; + } + + let mut assoc = AssociatedData { + commit: &commit, + data: &data_real, + commits: &data_commits, + commit_map: &commit_map, + interpolated: &mut interpolated, + }; + + let entry = cd.benchmarks.entry(benchmark_name.to_owned()) + .or_insert_with(|| Err(String::from("dummy bench"))); + + // benchmark did not run successfully at this commit + // or benchmark did not attempt to run at this commit + if entry.is_err() { + let runs = fill_benchmark_data(benchmark_name, &mut assoc); + // If we couldn't do this then do nothing + if let Some(runs) = runs { + *entry = Ok(Benchmark { + name: benchmark_name.to_owned(), + runs: runs, + }); + } + } + + // benchmark exists, but might have runs missing + if let Ok(benchmark) = entry { + let missing_runs = known_runs[benchmark_name] + .iter() + .filter(|rname| !benchmark.runs.iter().any(|r| *r == **rname)) + .collect::>(); + if !missing_runs.is_empty() { + let before = benchmark.runs.len(); + fill_benchmark_runs(benchmark, missing_runs, &mut assoc); + assert_ne!(before, benchmark.runs.len(), "made progress"); + } + } + } + } + + let interpolated = interpolated.into_iter() + .filter(|(_, v)| !v.is_empty()) + .collect::>(); + + eprintln!("Interpolation of {} commits complete in {:?}", interpolated.len(), start.elapsed()); + let data = data_next; + Ok(InputData { crate_list: crate_list, stats_list: stats_list, + data_commits: data_commits, + commit_map: commit_map, + interpolated, last_date: last_date, + data_real: data_real, data: data, artifact_data, commits, @@ -276,3 +419,198 @@ impl InputData { /// One decimal place rounded percent #[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize)] pub struct Percent(#[serde(with = "util::round_float")] pub f64); + +struct AssociatedData<'a> { + commit: &'a Commit, + data: &'a BTreeMap, + commits: &'a [Commit], + commit_map: &'a HashMap, + interpolated: &'a mut HashMap>, +} + +// This function can assume that the benchmark exists and is restricted to filling in runs within +// the benchmark. +fn fill_benchmark_runs(benchmark: &mut Benchmark, missing_runs: Vec<&RunId>, data: &mut AssociatedData) { + fn find_run<'a, I>(benchmark: &str, needle_run: &RunId, commits: I, data: &AssociatedData) -> Option<(usize, Run)> + where I: Iterator, + { + for (idx, needle) in commits.enumerate() { + let bench = data.data[needle].benchmarks.get(benchmark); + if let Some(Ok(bench)) = bench { + if let Some(run) = bench.runs.iter().find(|run| **run == *needle_run) { + return Some((idx, run.clone())); + } + } + } + None + } + + let commit_idx = data.commit_map[data.commit]; + for missing_run in missing_runs { + let start = find_run(&benchmark.name, &missing_run, data.commits[..commit_idx].iter().rev(), &*data); + let end = find_run(&benchmark.name, &missing_run, data.commits[commit_idx + 1..].iter(), &*data); + let start_commit = start.as_ref().map(|(idx, _)| data.commits[commit_idx - 1 - idx].clone()); + let end_commit = end.as_ref().map(|(idx, _)| data.commits[commit_idx + 1 + idx].clone()); + + assert_ne!(start_commit.as_ref(), Some(data.commit)); + assert_ne!(end_commit.as_ref(), Some(data.commit)); + + let mut interpolations = data.interpolated.entry(data.commit.sha.clone()).or_insert_with(Vec::new); + let run = match (start, end) { + (Some(srun), Some(erun)) => { + let distance = srun.0 + erun.0; + let from_start = srun.0; + let interpolated_stats = interpolate_stats(&srun.1, &erun.1, distance, from_start); + let mut interpolated_run = srun.1; + interpolated_run.stats = interpolated_stats; + interpolations.push(Interpolation { + benchmark: benchmark.name.clone(), + run: Some(missing_run.clone()), + from: InterpolationSource::Middle( + start_commit.unwrap(), + end_commit.unwrap(), + ), + }); + interpolated_run + } + (Some(srun), None) => { + interpolations.push(Interpolation { + benchmark: benchmark.name.clone(), + run: Some(missing_run.clone()), + from: InterpolationSource::First( + start_commit.unwrap(), + ), + }); + srun.1 + } + (None, Some(erun)) => { + interpolations.push(Interpolation { + benchmark: benchmark.name.clone(), + run: Some(missing_run.clone()), + from: InterpolationSource::Last( + end_commit.unwrap(), + ), + }); + erun.1 + } + (None, None) => { + unreachable!("{} run in benchmark {} has no entries, but it's missing!", + missing_run, benchmark.name) + } + }; + benchmark.runs.push(run); + } +} + +fn fill_benchmark_data(benchmark_name: &str, data: &mut AssociatedData) -> Option> { + let commit_idx = data.commit_map[data.commit]; + let interpolation_entry = + data.interpolated.entry(data.commit.sha.clone()).or_insert_with(Vec::new); + + let mut start = None; + let mut end = None; + for needle_commit in data.commits[..commit_idx].iter().rev() { + let bench = data.data[needle_commit].benchmarks.get(benchmark_name); + if let Some(Ok(bench)) = bench { + start = Some((needle_commit.clone(), bench.clone())); + break; + } + } + for needle_commit in data.commits[commit_idx + 1..].iter() { + let bench = data.data[needle_commit].benchmarks.get(benchmark_name); + if let Some(Ok(bench)) = bench { + end = Some((needle_commit.clone(), bench.clone())); + break; + } + } + + match (start, end) { + // This hole is bounded on both left and + // right, so we want to linearly interpolate + // each run between these two data points. + // + // This code ignores the case where a run is + // absent in start or end. This is handled later. + (Some(start), Some(end)) => { + let distance = data.commit_map[&end.0] - data.commit_map[&start.0]; + let from_start = commit_idx - data.commit_map[&start.0]; + let start_runs = &start.1.runs; + let end_runs = &end.1.runs; + + let mut interpolated_runs = Vec::with_capacity(start_runs.len()); + + for srun in start_runs { + for erun in end_runs { + // Found pair + if srun == erun { + let interpolated_stats = interpolate_stats(&srun, &erun, distance, from_start); + let mut interpolated_run = srun.clone(); + interpolated_run.stats = interpolated_stats; + interpolated_runs.push(interpolated_run); + } + } + } + + interpolation_entry.push( + Interpolation { + benchmark: benchmark_name.to_owned(), + run: None, + from: InterpolationSource::Middle(start.0, end.0), + }); + return Some(interpolated_runs); + } + + // This hole is unbounded to the right, so + // fill in directly with data from the + // left. + (Some(start), None) => { + interpolation_entry.push( + Interpolation { + benchmark: benchmark_name.to_owned(), + run: None, + from: InterpolationSource::Last(start.0), + }); + return Some(start.1.runs); + } + + // This hole is unbounded to the left, so + // fill in directly with data from the + // right. + (None, Some(end)) => { + interpolation_entry.push( + Interpolation { + benchmark: benchmark_name.to_owned(), + run: None, + from: InterpolationSource::First(end.0), + }); + return Some(end.1.runs); + } + + // No data for this benchmark was found to + // either side. No data exists for this + // benchmark. Bail out and return the + // original (missing) data. + (None, None) => { + warn!("giving up on finding {} data for commit {:?}", + benchmark_name, data.commit); + return None; + } + } + + // we never reach here +} + +fn interpolate_stats(srun: &Run, erun: &Run, distance: usize, from_start: usize) -> Vec { + let mut interpolated_stats = Vec::with_capacity(srun.stats.len()); + for sstat in &srun.stats { + if let Some(estat) = erun.get_stat(&sstat.name) { + let slope = (estat - sstat.cnt) / (distance as f64); + let interpolated = slope * (from_start as f64) + sstat.cnt; + interpolated_stats.push(collector::Stat { + name: sstat.name.clone(), + cnt: interpolated, + }); + } + } + interpolated_stats +} diff --git a/site/src/server.rs b/site/src/server.rs index a55a93d61..fb0b18a01 100644 --- a/site/src/server.rs +++ b/site/src/server.rs @@ -16,6 +16,7 @@ use std::path::Path; use std::net::SocketAddr; use std::sync::atomic::{AtomicBool, Ordering as AtomicOrdering}; use std::cmp::Ordering; +use std::borrow::Cow; use serde::Serialize; use serde::de::DeserializeOwned; @@ -40,7 +41,7 @@ use regex::Regex; use reqwest; use git; -use util::{self, get_repo_path}; +use util::{self, get_repo_path, Interpolate}; pub use api::{self, github, status, nll_dashboard, dashboard, data, days, graph, info, CommitResponse, ServerResult}; use collector::{Date, Run, version_supports_incremental}; use collector::api::collected; @@ -50,8 +51,10 @@ use load::CurrentState; header! { (HubSignature, "X-Hub-Signature") => [String] } +static INTERPOLATED_COLOR: &str = "#fcb0f1"; + /// Data associated with a specific date -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct DateData { pub date: Date, pub commit: String, @@ -105,7 +108,7 @@ pub fn handle_nll_dashboard( body: nll_dashboard::Request, data: &InputData ) -> ServerResult { - let commit = util::find_commit(data, &body.commit, false)?.1; + let commit = util::find_commit(data, &body.commit, false, Interpolate::No)?.1; let mut points = commit.benchmarks.iter() .filter_map(|b| b.1.as_ref().ok()) .map(|bench| { @@ -163,7 +166,7 @@ pub fn handle_dashboard(data: &InputData) -> dashboard::Response { } }); - versions.push(format!("master: {}", &data.data.keys().last().unwrap().sha[0..8])); + versions.push(format!("master: {}", &data.data(Interpolate::Yes).keys().last().unwrap().sha[0..8])); let mut check_clean_average = Vec::new(); let mut check_base_incr_average = Vec::new(); @@ -196,7 +199,7 @@ pub fn handle_dashboard(data: &InputData) -> dashboard::Response { let mut opt_println_incr_points = Vec::new(); let mut benches = if version.starts_with("master") { - let data = data.data.values().last().unwrap(); + let data = data.data(Interpolate::Yes).values().last().unwrap(); let benches = data.benchmarks.iter() .filter(|(name, _)| benchmark_names.contains(name)).collect::>(); assert_eq!(benches.len(), benchmark_names.len()); @@ -273,7 +276,7 @@ pub fn handle_dashboard(data: &InputData) -> dashboard::Response { } pub fn handle_status_page(data: &InputData) -> status::Response { - let last_commit = data.data.iter().last().unwrap(); + let last_commit = data.data(Interpolate::No).iter().last().unwrap(); let mut benchmark_state = last_commit.1.benchmarks.iter() .map(|(name, res)| { @@ -324,7 +327,7 @@ pub fn handle_graph(body: graph::Request, data: &InputData) -> ServerResult, _>> = HashMap::with_capacity(data.crate_list.len() * 3); let elements = out.len(); let mut last_commit = None; let mut initial_debug_base_compile = None; @@ -334,6 +337,7 @@ pub fn handle_graph(body: graph::Request, data: &InputData) -> ServerResult ServerResult::with_capacity(elements)); let first = entry.first().map(|d| d.absolute as f32); let percent = first.map_or(0.0, |f| (value - f) / f * 100.0); entry.push(graph::GraphData { - benchmark: run.state.name(), + benchmark: run.state.name().into(), commit: commit.clone(), prev_commit: last_commit.clone(), absolute: value, percent: percent, y: if body.absolute { value } else { percent }, x: date_data.date.0.timestamp() as u64 * 1000, // all dates are since 1970 + color: { + data.interpolated.get(&commit) + .map(|c| c.iter().any(|interpolation| { + if !bench_name.starts_with(&interpolation.benchmark) { + return false; + } + if let Some(run_name) = &interpolation.run { + run == *run_name + } else { + true + } + })) + .map(|b| if b { String::from(INTERPOLATED_COLOR) } else { String::new() }) + .unwrap_or(String::new()) + } }); } if base_compile && is_println_incr { @@ -396,8 +415,8 @@ pub fn handle_graph(body: graph::Request, data: &InputData) -> ServerResult, _> = result + .entry((String::from("Summary") + appendix).into()) .or_insert_with(HashMap::new); let entry = summary.entry(state.name()).or_insert_with(Vec::new); let value = (values.iter().sum::() as f32) / (values.len() as f32); @@ -411,13 +430,25 @@ pub fn handle_graph(body: graph::Request, data: &InputData) -> ServerResult ServerResult ServerResult { +fn handle_data(body: data::Request, data: &InputData) -> ServerResult { debug!( "handle_data: start = {:?}, end = {:?}", body.start, body.end ); - let range = util::data_range(&data, &body.start, &body.end)?; + let range = util::data_range(&data, &body.start, &body.end, Interpolate::Yes)?; let mut result = range .into_iter() .map(|(_, day)| day) @@ -476,8 +511,8 @@ pub fn handle_data(body: data::Request, data: &InputData) -> ServerResult ServerResult { - let a = util::find_commit(data, &body.start, true)?; - let b = util::find_commit(data, &body.end, false)?; + let a = util::find_commit(data, &body.start, true, Interpolate::No)?; + let b = util::find_commit(data, &body.end, false, Interpolate::No)?; Ok(days::Response { a: DateData::for_day(a.1, &body.stat), b: DateData::for_day(b.1, &body.stat), @@ -896,7 +931,6 @@ impl Service for Server { match req.path() { "/perf/info" => self.handle_get(&req, handle_info), "/perf/dashboard" => self.handle_get(&req, handle_dashboard), - "/perf/data" => self.handle_post(req, handle_data), "/perf/graph" => self.handle_post(req, handle_graph), "/perf/get" => self.handle_post(req, handle_days), "/perf/nll_dashboard" => self.handle_post(req, handle_nll_dashboard), diff --git a/site/src/util.rs b/site/src/util.rs index 19a4cdb00..4cef6f1b0 100644 --- a/site/src/util.rs +++ b/site/src/util.rs @@ -15,13 +15,20 @@ use failure::Error; use chrono::Duration; +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum Interpolate { + Yes, + No, +} + pub fn find_commit<'a>( data: &'a InputData, idx: &Bound, left: bool, + interpolate: Interpolate, ) -> Result<(&'a Commit, &'a CommitData), String> { let last_month = data.last_date.0.naive_utc().date() - Duration::days(30); - for (commit, cd) in &data.data { + for (commit, cd) in data.data(interpolate) { let found = match *idx { Bound::Commit(ref sha) => commit.sha == *sha, Bound::Date(ref date) => { @@ -48,7 +55,7 @@ pub fn find_commit<'a>( } if !left && *idx == Bound::None { - return data.data + return data.data(interpolate) .iter() .last() .ok_or_else(|| format!("at least one commit")); @@ -64,12 +71,13 @@ pub fn data_range<'a>( data: &'a InputData, a: &Bound, b: &Bound, + interpolate: Interpolate, ) -> Result, String> { let mut ret = Vec::new(); let mut in_range = false; - let left_bound = find_commit(data, a, true)?.0; - let right_bound = find_commit(data, b, false)?.0; - for (commit, cd) in &data.data { + let left_bound = find_commit(data, a, true, interpolate)?.0; + let right_bound = find_commit(data, b, false, interpolate)?.0; + for (commit, cd) in data.data(interpolate) { if commit.sha == left_bound.sha { in_range = true; }