From ca2a535c34e3badbfa9258971ffc062c2cfee18c Mon Sep 17 00:00:00 2001 From: Giorgio Pogliani Date: Tue, 24 Dec 2024 23:53:35 +0100 Subject: [PATCH 1/3] improve walking directories --- Cargo.lock | 42 +++++++++++++++ Cargo.toml | 1 + src/source.rs | 142 +++++++++++++++++--------------------------------- 3 files changed, 91 insertions(+), 94 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 63a3bbd1..d21223df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -519,6 +519,28 @@ dependencies = [ "itertools", ] +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -538,6 +560,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -1345,6 +1376,16 @@ dependencies = [ "serde", ] +[[package]] +name = "jwalk" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2735847566356cd2179a2a38264839308f7079fa96e6bd5a42d740460e003c56" +dependencies = [ + "crossbeam", + "rayon", +] + [[package]] name = "lasso" version = "0.7.3" @@ -1418,6 +1459,7 @@ dependencies = [ "diffy", "futures", "glob-match", + "jwalk", "mago-ast", "mago-feedback", "mago-fixer", diff --git a/Cargo.toml b/Cargo.toml index 53fe6c18..298f137e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -138,6 +138,7 @@ config = { workspace = true } toml = { workspace = true } num_cpus = { workspace = true } diffy = { workspace = true } +jwalk = "0.8.1" [target.'cfg(target_os = "linux")'.dependencies] openssl = { workspace = true } diff --git a/src/source.rs b/src/source.rs index 2ec1f94d..50514a14 100644 --- a/src/source.rs +++ b/src/source.rs @@ -1,13 +1,8 @@ -use std::path::Path; -use std::path::PathBuf; - use ahash::HashSet; -use async_walkdir::Filtering; -use async_walkdir::WalkDir; -use futures::StreamExt; - use mago_interner::ThreadedInterner; use mago_source::SourceManager; +use std::path::Path; +use tracing::debug; use crate::config::source::SourceConfiguration; use crate::consts::PHP_STUBS; @@ -16,7 +11,7 @@ use crate::error::Error; /// Load the source manager by scanning and processing the sources /// as per the given configuration. /// -/// # Arguments +/// #_Arguments /// /// * `interner` - The interner to use for string interning. /// * `configuration` - The configuration to use for loading the sources. @@ -33,85 +28,62 @@ pub async fn load( ) -> Result { let SourceConfiguration { root, paths, includes, excludes, extensions } = configuration; - let mut starting_paths = Vec::new(); - - if paths.is_empty() { - starting_paths.push((root.clone(), true)); - } else { - for source in paths { - starting_paths.push((source.clone(), true)); - } - } - - for include in includes { - starting_paths.push((include.clone(), false)); - } - - if paths.is_empty() && includes.is_empty() { - starting_paths.push((root.clone(), true)); - } - - let excludes_set: HashSet = excludes - .iter() - .map(|exclude| { - // if it contains a wildcard, treat it as a pattern - if exclude.contains('*') { - Exclusion::Pattern(exclude.clone()) - } else { - let path = Path::new(exclude); - - if path.is_absolute() { - Exclusion::Path(path.to_path_buf()) - } else { - Exclusion::Path(root.join(path)) + let manager = SourceManager::new(interner.clone()); + let extensions: HashSet<&String> = extensions.iter().collect(); + let has_paths = !paths.is_empty(); + let has_includes = !includes.is_empty(); + let has_excludes = !excludes.is_empty(); + + let entries = jwalk::WalkDir::new(root.clone()).process_read_dir(|_, _, _, children| { + children.iter_mut().for_each(|dir_entry_result| { + if let Ok(dir_entry) = dir_entry_result { + if dir_entry.path().starts_with(".") || dir_entry.file_name.eq_ignore_ascii_case("node_modules") { + dir_entry.read_children_path = None; } } - }) - .collect(); + }); + }); - let extensions: HashSet<&String> = extensions.iter().collect(); + for entry in entries { + if let Err(_) = entry { + continue; + } - let manager = SourceManager::new(interner.clone()); - for (path, user_defined) in starting_paths.into_iter() { - let mut entries = WalkDir::new(path) - // filter out .git directories - .filter(|entry| async move { - if entry.path().starts_with(".") { - Filtering::IgnoreDir - } else { - Filtering::Continue - } - }); + let path = entry.unwrap().path(); - // Check for errors after processing all entries in the current path - while let Some(entry) = entries.next().await { - let path = entry?.path(); - if !path.is_file() { - continue; - } + if !path.is_file() { + continue; + } - // Skip user-defined sources if they are included in the `includes` list. - if user_defined && includes.iter().any(|include| path.starts_with(include)) { - continue; - } + if !is_accepted_file(&path, &extensions) { + continue; + } - // Skip excluded files and directories. - if is_excluded(&path, &excludes_set) { - continue; - } + let name = match path.strip_prefix(root.clone()) { + Ok(rel_path) => rel_path.display().to_string(), + Err(_) => path.display().to_string(), + }; - // Skip files that do not have an accepted extension. - if !is_accepted_file(&path, &extensions) { - continue; - } + if has_excludes + && excludes.iter().any(|p| { + name.starts_with(p) + || glob_match::glob_match(p, name.as_str()) + || glob_match::glob_match(p, path.to_string_lossy().as_ref()) + }) + { + mago_feedback::debug!("Skipping: {:?}", name); + continue; + } - let name = match path.strip_prefix(root) { - Ok(rel_path) => rel_path.display().to_string(), - Err(_) => path.display().to_string(), - }; + let is_path = has_paths && paths.iter().any(|p| path.starts_with(p)); - manager.insert_path(name, path.clone(), user_defined); + let is_include = has_includes && includes.iter().any(|p| path.starts_with(p)); + + if !is_path && !is_include { + continue; } + + manager.insert_path(name, path.clone(), if is_include { false } else { true }); } if include_stubs { @@ -123,18 +95,6 @@ pub async fn load( Ok(manager) } -fn is_excluded(path: &Path, excludes: &HashSet) -> bool { - for exclusion in excludes { - return match exclusion { - Exclusion::Path(p) if path.starts_with(p) => true, - Exclusion::Pattern(p) if glob_match::glob_match(p, path.to_string_lossy().as_ref()) => true, - _ => continue, - }; - } - - false -} - fn is_accepted_file(path: &Path, extensions: &HashSet<&String>) -> bool { if extensions.is_empty() { path.extension().and_then(|s| s.to_str()).map(|ext| ext.eq_ignore_ascii_case("php")).unwrap_or(false) @@ -142,9 +102,3 @@ fn is_accepted_file(path: &Path, extensions: &HashSet<&String>) -> bool { path.extension().and_then(|s| s.to_str()).map(|ext| extensions.contains(&ext.to_string())).unwrap_or(false) } } - -#[derive(Debug, Hash, Eq, PartialEq)] -enum Exclusion { - Path(PathBuf), - Pattern(String), -} From 5d89a6575f9ff065992c453c778428e085b77557 Mon Sep 17 00:00:00 2001 From: Giorgio Pogliani Date: Tue, 24 Dec 2024 23:55:23 +0100 Subject: [PATCH 2/3] remove unused import --- src/source.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/source.rs b/src/source.rs index 50514a14..04da6715 100644 --- a/src/source.rs +++ b/src/source.rs @@ -2,7 +2,6 @@ use ahash::HashSet; use mago_interner::ThreadedInterner; use mago_source::SourceManager; use std::path::Path; -use tracing::debug; use crate::config::source::SourceConfiguration; use crate::consts::PHP_STUBS; From 1685667343e06fe6089ffe28b8888fd8b89ede7e Mon Sep 17 00:00:00 2001 From: Giorgio Pogliani Date: Thu, 26 Dec 2024 23:01:36 +0100 Subject: [PATCH 3/3] Update source.rs Co-authored-by: Saif Eddin Gmati <29315886+azjezz@users.noreply.github.com> --- src/source.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/source.rs b/src/source.rs index 04da6715..e02d3539 100644 --- a/src/source.rs +++ b/src/source.rs @@ -10,7 +10,7 @@ use crate::error::Error; /// Load the source manager by scanning and processing the sources /// as per the given configuration. /// -/// #_Arguments +/// # Arguments /// /// * `interner` - The interner to use for string interning. /// * `configuration` - The configuration to use for loading the sources.