Skip to content

Commit 27d3e3d

Browse files
committed
Auto merge of #13818 - osiewicz:clean-package-perf-improvements, r=weihanglo
Clean package perf improvements ### What does this PR try to resolve? I've noticed that `cargo clean -p` execution time scales poorly with size of target directory; in my case (~250GB target directory on M1 Mac) running `cargo clean -p` takes circa 35 seconds. Notably, it's the file listing that takes that time, not deleting the package itself. That is, when running `cargo clean -p SOME_PACKAGE` twice in a row, both executions take roughly the same time. I've tracked it down to the fact that we seem quite happy to use `glob::glob` function, which iterates over contents of target dir. It also was a bit sub-optimal when it came to doing that, for which I've already filled a PR in rust-lang/glob#144 - that PR alone takes down cleaning time down to ~14 seconds. While it is a good improvement for a relatively straightforward change, this PR tries to take it even further. With glob PR applied + changes from this PR, my test case goes down to ~6 seconds. I'm pretty sure that we could squeeze this further, but I'd rather do so in a follow-up PR. Notably, this PR doesn't help with *just* super-large target directories. `cargo clean -p serde` on cargo repo (with ~7Gb target directory size) went down from ~380ms to ~100ms for me. Not too shabby. ### How should we test and review this PR? I've mostly tested it manually, running `cargo clean` against multiple different repos. ### Additional information TODO: - [x] [c770700](c770700) is not quite correct; we need to consider that it changes how progress reporting works; as is, we're gonna report all progress relatively quickly and stall at the end (when we're actually iterating over directories, globbing, removing files, that kind of jazz). I'll address that.
2 parents e420c7b + 9c1139e commit 27d3e3d

File tree

1 file changed

+59
-16
lines changed

1 file changed

+59
-16
lines changed

src/cargo/ops/cargo_clean.rs

+59-16
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@ use crate::util::interning::InternedString;
88
use crate::util::{human_readable_bytes, GlobalContext, Progress, ProgressStyle};
99
use anyhow::bail;
1010
use cargo_util::paths;
11+
use std::collections::{HashMap, HashSet};
1112
use std::fs;
1213
use std::path::{Path, PathBuf};
14+
use std::rc::Rc;
1315

1416
pub struct CleanOptions<'gctx> {
1517
pub gctx: &'gctx GlobalContext,
@@ -169,6 +171,9 @@ fn clean_specs(
169171

170172
clean_ctx.progress = Box::new(CleaningPackagesBar::new(clean_ctx.gctx, packages.len()));
171173

174+
// Try to reduce the amount of times we iterate over the same target directory by storing away
175+
// the directories we've iterated over (and cleaned for a given package).
176+
let mut cleaned_packages: HashMap<_, HashSet<_>> = HashMap::default();
172177
for pkg in packages {
173178
let pkg_dir = format!("{}-*", pkg.name());
174179
clean_ctx.progress.on_cleaning_package(&pkg.name())?;
@@ -192,7 +197,9 @@ fn clean_specs(
192197
}
193198
continue;
194199
}
195-
let crate_name = target.crate_name();
200+
let crate_name: Rc<str> = target.crate_name().into();
201+
let path_dot: &str = &format!("{crate_name}.");
202+
let path_dash: &str = &format!("{crate_name}-");
196203
for &mode in &[
197204
CompileMode::Build,
198205
CompileMode::Test,
@@ -212,28 +219,15 @@ fn clean_specs(
212219
TargetKind::Test | TargetKind::Bench => (layout.deps(), None),
213220
_ => (layout.deps(), Some(layout.dest())),
214221
};
222+
let mut dir_glob_str = escape_glob_path(dir)?;
223+
let dir_glob = Path::new(&dir_glob_str);
215224
for file_type in file_types {
216225
// Some files include a hash in the filename, some don't.
217226
let hashed_name = file_type.output_filename(target, Some("*"));
218227
let unhashed_name = file_type.output_filename(target, None);
219-
let dir_glob = escape_glob_path(dir)?;
220-
let dir_glob = Path::new(&dir_glob);
221228

222229
clean_ctx.rm_rf_glob(&dir_glob.join(&hashed_name))?;
223230
clean_ctx.rm_rf(&dir.join(&unhashed_name))?;
224-
// Remove dep-info file generated by rustc. It is not tracked in
225-
// file_types. It does not have a prefix.
226-
let hashed_dep_info = dir_glob.join(format!("{}-*.d", crate_name));
227-
clean_ctx.rm_rf_glob(&hashed_dep_info)?;
228-
let unhashed_dep_info = dir.join(format!("{}.d", crate_name));
229-
clean_ctx.rm_rf(&unhashed_dep_info)?;
230-
// Remove split-debuginfo files generated by rustc.
231-
let split_debuginfo_obj = dir_glob.join(format!("{}.*.o", crate_name));
232-
clean_ctx.rm_rf_glob(&split_debuginfo_obj)?;
233-
let split_debuginfo_dwo = dir_glob.join(format!("{}.*.dwo", crate_name));
234-
clean_ctx.rm_rf_glob(&split_debuginfo_dwo)?;
235-
let split_debuginfo_dwp = dir_glob.join(format!("{}.*.dwp", crate_name));
236-
clean_ctx.rm_rf_glob(&split_debuginfo_dwp)?;
237231

238232
// Remove the uplifted copy.
239233
if let Some(uplift_dir) = uplift_dir {
@@ -244,6 +238,31 @@ fn clean_specs(
244238
clean_ctx.rm_rf(&dep_info)?;
245239
}
246240
}
241+
let unhashed_dep_info = dir.join(format!("{}.d", crate_name));
242+
clean_ctx.rm_rf(&unhashed_dep_info)?;
243+
244+
if !dir_glob_str.ends_with(std::path::MAIN_SEPARATOR) {
245+
dir_glob_str.push(std::path::MAIN_SEPARATOR);
246+
}
247+
dir_glob_str.push('*');
248+
let dir_glob_str: Rc<str> = dir_glob_str.into();
249+
if cleaned_packages
250+
.entry(dir_glob_str.clone())
251+
.or_default()
252+
.insert(crate_name.clone())
253+
{
254+
let paths = [
255+
// Remove dep-info file generated by rustc. It is not tracked in
256+
// file_types. It does not have a prefix.
257+
(path_dash, ".d"),
258+
// Remove split-debuginfo files generated by rustc.
259+
(path_dot, ".o"),
260+
(path_dot, ".dwo"),
261+
(path_dot, ".dwp"),
262+
];
263+
clean_ctx.rm_rf_prefix_list(&dir_glob_str, &paths)?;
264+
}
265+
247266
// TODO: what to do about build_script_build?
248267
let dir = escape_glob_path(layout.incremental())?;
249268
let incremental = Path::new(&dir).join(format!("{}-*", crate_name));
@@ -322,6 +341,30 @@ impl<'gctx> CleanContext<'gctx> {
322341
Ok(())
323342
}
324343

344+
/// Removes files matching a glob and any of the provided filename patterns (prefix/suffix pairs).
345+
///
346+
/// This function iterates over files matching a glob (`pattern`) and removes those whose
347+
/// filenames start and end with specific prefix/suffix pairs. It should be more efficient for
348+
/// operations involving multiple prefix/suffix pairs, as it iterates over the directory
349+
/// only once, unlike making multiple calls to [`Self::rm_rf_glob`].
350+
fn rm_rf_prefix_list(
351+
&mut self,
352+
pattern: &str,
353+
path_matchers: &[(&str, &str)],
354+
) -> CargoResult<()> {
355+
for path in glob::glob(pattern)? {
356+
let path = path?;
357+
let filename = path.file_name().and_then(|name| name.to_str()).unwrap();
358+
if path_matchers
359+
.iter()
360+
.any(|(prefix, suffix)| filename.starts_with(prefix) && filename.ends_with(suffix))
361+
{
362+
self.rm_rf(&path)?;
363+
}
364+
}
365+
Ok(())
366+
}
367+
325368
pub fn rm_rf(&mut self, path: &Path) -> CargoResult<()> {
326369
let meta = match fs::symlink_metadata(path) {
327370
Ok(meta) => meta,

0 commit comments

Comments
 (0)