From ebd6a8073493fb034c2a79dabc1e92a06afebac6 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Wed, 15 Jan 2025 12:25:35 -0800 Subject: [PATCH] progress --- src/exe_search.rs | 50 ++++++-------------------- src/path_shared.rs | 1 - src/scan_fs.rs | 10 +++--- src/util.rs | 89 ++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 101 insertions(+), 49 deletions(-) diff --git a/src/exe_search.rs b/src/exe_search.rs index cc20a33..712e41c 100644 --- a/src/exe_search.rs +++ b/src/exe_search.rs @@ -1,14 +1,14 @@ use std::collections::HashSet; use std::env; use std::fs; -use std::os::unix::fs::PermissionsExt; use std::path::Path; use std::path::PathBuf; -use std::process::Command; use rayon::prelude::*; use crate::util::path_home; +use crate::util::is_python_exe; +use crate::util::get_absolute_path_from_exe; //------------------------------------------------------------------------------ // Provide absolute paths for directories that should be excluded from executable search. @@ -79,26 +79,6 @@ fn get_search_origins() -> HashSet<(PathBuf, bool)> { paths } -// Return True if the path points to a python executable. We assume this has already been proven to exist. -fn is_exe(path: &Path) -> bool { - match path.file_name().and_then(|f| f.to_str()) { - Some(file_name) if file_name.starts_with("python") => { - let suffix = &file_name[6..]; - // NOTE: this will not work for windows .exe - if suffix.is_empty() || suffix.chars().all(|c| c.is_ascii_digit() || c == '.') - { - match fs::metadata(path) { - Ok(md) => md.permissions().mode() & 0o111 != 0, - Err(_) => false, - } - } else { - false - } - } - _ => false, - } -} - fn is_symlink(path: &Path) -> bool { match fs::symlink_metadata(path) { Ok(metadata) => metadata.file_type().is_symlink(), @@ -106,19 +86,8 @@ fn is_symlink(path: &Path) -> bool { } } -const PY_SYS_EXE: &str = "import sys;print(sys.executable)"; -// Use the default Python to get its executable path. -fn get_exe_default() -> Option { - match Command::new("python3").arg("-c").arg(PY_SYS_EXE).output() { - Ok(output) => match std::str::from_utf8(&output.stdout) { - Ok(s) => Some(PathBuf::from(s.trim())), - Err(_) => None, - }, - Err(_) => None, - } -} -/// Try to find all Python executables given a starting directory. This will recursively search all directories that are not symlinks. +/// Try to find all Python executables given a starting directory. This will recursively search all directories that are not symlinks. All exe should be returned as absolute paths. fn find_exe_inner( path: &Path, exclude_paths: &HashSet, @@ -135,7 +104,7 @@ fn find_exe_inner( let path_cfg = path.to_path_buf().join("pyvenv.cfg"); if path_cfg.exists() { let path_exe = path.to_path_buf().join("bin/python3"); - if path_exe.exists() && is_exe(&path_exe) { + if path_exe.exists() && is_python_exe(&path_exe) { paths.push(path_exe) } } else { @@ -147,7 +116,7 @@ fn find_exe_inner( // recurse // println!("recursing: {:?}", path); paths.extend(find_exe_inner(&path, exclude_paths, recurse)); - } else if is_exe(&path) { + } else if is_python_exe(&path) { paths.push(path); } } @@ -171,7 +140,7 @@ pub(crate) fn find_exe() -> HashSet { .par_iter() .flat_map(|(path, recurse)| find_exe_inner(path, &exclude, *recurse)) .collect(); - if let Some(exe_def) = get_exe_default() { + if let Some(exe_def) = get_absolute_path_from_exe("python3") { paths.insert(exe_def); } paths @@ -185,6 +154,7 @@ mod tests { use std::fs::File; use std::os::unix::fs::symlink; use tempfile::tempdir; + use std::os::unix::fs::PermissionsExt; #[test] fn test_get_search_exclude_paths_a() { @@ -206,7 +176,7 @@ mod tests { let mut perms = fs::metadata(fp.clone()).unwrap().permissions(); perms.set_mode(0o755); // rwxr-xr-x (755) for an executable script fs::set_permissions(fp.clone(), perms).unwrap(); - assert_eq!(is_exe(&fp), false); + assert_eq!(is_python_exe(&fp), false); } #[test] @@ -217,7 +187,7 @@ mod tests { let mut perms = fs::metadata(fp.clone()).unwrap().permissions(); perms.set_mode(0o755); // rwxr-xr-x (755) for an executable script fs::set_permissions(fp.clone(), perms).unwrap(); - assert_eq!(is_exe(&fp), true); + assert_eq!(is_python_exe(&fp), true); } #[test] @@ -228,7 +198,7 @@ mod tests { let mut perms = fs::metadata(fp.clone()).unwrap().permissions(); perms.set_mode(0o755); // rwxr-xr-x (755) for an executable script fs::set_permissions(fp.clone(), perms).unwrap(); - assert_eq!(is_exe(&fp), true); + assert_eq!(is_python_exe(&fp), true); } #[test] diff --git a/src/path_shared.rs b/src/path_shared.rs index 57ed63e..6a08c19 100644 --- a/src/path_shared.rs +++ b/src/path_shared.rs @@ -5,7 +5,6 @@ use std::path::PathBuf; use std::path::MAIN_SEPARATOR; use std::sync::Arc; -use serde::de::{self, Visitor}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use crate::util::path_home; diff --git a/src/scan_fs.rs b/src/scan_fs.rs index e0317b3..570ddd8 100644 --- a/src/scan_fs.rs +++ b/src/scan_fs.rs @@ -21,7 +21,7 @@ use crate::path_shared::PathShared; use crate::scan_report::ScanReport; use crate::unpack_report::UnpackReport; use crate::ureq_client::UreqClientLive; -use crate::util::path_normalize; +use crate::util::exe_path_normalize; use crate::util::ResultDynError; use crate::validation_report::ValidationFlags; use crate::validation_report::ValidationRecord; @@ -141,8 +141,7 @@ impl<'de> Deserialize<'de> for ScanFS { } impl ScanFS { - - /// Main entry point for creatin a ScanFS + /// Main entry point for creating a ScanFS. All public creation should go through this interface. fn from_exe_to_sites( exe_to_sites: HashMap>, ) -> ResultDynError { @@ -180,7 +179,7 @@ impl ScanFS { .into_par_iter() .map(|exe| { // if normalization fails, just copy the pre-norm - let exe_norm = path_normalize(&exe).unwrap_or_else(|_| exe.clone()); + let exe_norm = exe_path_normalize(&exe).unwrap_or_else(|_| exe.clone()); let dirs = get_site_package_dirs(&exe_norm, force_usite); (exe_norm, dirs) }) @@ -775,7 +774,6 @@ mod tests { assert_eq!(matched, vec![packages[1].clone()]); } - //-------------------------------------------------------------------------- #[test] @@ -795,4 +793,4 @@ mod tests { assert_eq!(sfsd.exe_to_sites.len(), 1); assert_eq!(sfsd.package_to_sites.len(), 3); } -} \ No newline at end of file +} diff --git a/src/util.rs b/src/util.rs index f38993d..faee306 100644 --- a/src/util.rs +++ b/src/util.rs @@ -4,6 +4,9 @@ use std::path::Path; use std::path::PathBuf; use std::time::Duration; use std::time::SystemTime; +use std::process::Command; + +use std::os::unix::fs::PermissionsExt; //------------------------------------------------------------------------------ @@ -40,8 +43,46 @@ pub(crate) fn url_strip_user(url: &String) -> String { url.to_string() } +const PY_SYS_EXE: &str = "import sys;print(sys.executable)"; + +// Use the default Python to get absolute path to the exe. +pub(crate) fn get_absolute_path_from_exe(name: &str) -> Option { + match Command::new(name).arg("-c").arg(PY_SYS_EXE).output() { + Ok(output) => match std::str::from_utf8(&output.stdout) { + Ok(s) => Some(PathBuf::from(s.trim())), + Err(_) => None, + }, + Err(_) => None, + } +} + //------------------------------------------------------------------------------ +// Determine if the Path is an exe; must be an absolute path. +fn is_python_exe_file_name(path: &Path) -> bool { + match path.file_name().and_then(|f| f.to_str()) { + Some(name) if name.starts_with("python") => { + let suffix = &name[6..]; + // NOTE: this will not work for windows .exe + suffix.is_empty() || suffix.chars().all(|c| c.is_ascii_digit() || c == '.') + }, + _ => false, + } +} + +// Return True if the absolute path points to a python executable. We assume this has already been proven to exist. +pub(crate) fn is_python_exe(path: &Path) -> bool { + if is_python_exe_file_name(path) { + match fs::metadata(path) { + Ok(md) => md.permissions().mode() & 0o111 != 0, + Err(_) => false, + } + } + else { + false + } +} + pub(crate) fn path_home() -> Option { if env::consts::OS == "windows" { env::var_os("USERPROFILE").map(PathBuf::from) @@ -87,14 +128,36 @@ pub(crate) fn path_normalize(path: &Path) -> ResultDynError { println!("post conversion: {:?}", fp); } } - // Only expand relative paths if there is more than one component - if fp.is_relative() && fp.components().count() > 1 { + if fp.is_relative() { let cwd = env::current_dir().map_err(|e| e.to_string())?; fp = cwd.join(fp); } + if !fp.is_absolute() { + panic!("Could not derive absolute path {:?}", fp); + } Ok(fp) } + +pub(crate) fn exe_path_normalize(path: &Path) -> ResultDynError { + let mut fp = path.to_path_buf(); + // if given a single-component path that is a Python name, call it to get the full path to the exe + if is_python_exe_file_name(path) && path.components().count() == 1 { + match path.file_name().and_then(|f| f.to_str()) { + Some(name) => { + // TODO: do not unwrap() + fp = get_absolute_path_from_exe(name).unwrap(); + }, + _ => { + let msg = format!("cannot get absolute path from exe: {:?}", path); + return Err(msg.into()); + } + } + } + path_normalize(&fp) +} + + pub(crate) fn path_within_duration>( cache_path: P, max_dur: Duration, @@ -186,4 +249,26 @@ mod tests { assert!(path_within_duration(&fp, Duration::from_secs(60))); assert!(!path_within_duration(&fp, Duration::from_nanos(1))); } + + #[test] + fn test_is_python_exe_file_name_a() { + let temp_dir = tempdir().unwrap(); + let fp = temp_dir.path().join("python3"); + assert!(is_python_exe_file_name(&fp)); + } + + #[test] + fn test_is_python_exe_file_name_b() { + let temp_dir = tempdir().unwrap(); + let fp = temp_dir.path().join("python--"); + assert!(!is_python_exe_file_name(&fp)); + } + + #[test] + fn test_is_python_exe_file_name_c() { + let temp_dir = tempdir().unwrap(); + let fp = temp_dir.path().join("python3.12.1000"); + assert!(is_python_exe_file_name(&fp)); + } + }