From e0e83322c76e955809ec0065a447f1f1c702aa79 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 2 Jan 2025 11:14:53 -0500 Subject: [PATCH] add back sys module, move docs, reduce sys cloning, provide way to disable PATHEXT caching --- README.md | 28 +-------- src/finder.rs | 158 ++++++++++++++++++++++++++------------------------ src/helper.rs | 10 +++- src/lib.rs | 6 +- src/sys.rs | 61 +++++++++++++++++++ 5 files changed, 156 insertions(+), 107 deletions(-) diff --git a/README.md b/README.md index bbcbf0a..a72ea39 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ A Rust equivalent of Unix command "which". Locate installed executable in cross ### A note on WebAssembly -This project aims to support WebAssembly with the [wasi](https://wasi.dev/) extension. All `wasm32-wasi*` targets are officially supported. +This project aims to support WebAssembly with the [WASI](https://wasi.dev/) extension. All `wasm32-wasi*` targets are officially supported. If you need to add a conditional dependency on `which` please refer to [the relevant cargo documentation for platform specific dependencies.](https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html#platform-specific-dependencies) @@ -24,31 +24,7 @@ Here's an example of how to conditionally add `which`. You should tweak this to which = "7.0.0" ``` -### How to use in `wasm32-unknown-unknown` - -WebAssembly without wasi does not have a filesystem, but using this crate is possible in `wasm32-unknown-unknown` targets by disabling default features: - -```toml -which = { version = "...", default-features = false } -``` - -Then providing your own implementation of the `which::Sys` trait: - -```rs -use which::WhichConfig; - -struct WasmSys; - -impl which::Sys for WasmSys { - // it is up to you to implement this trait based on the - // environment you are running WebAssembly in -} - -let paths = WhichConfig::new_with_sys(WasmSys) - .all_results() - .unwrap() - .collect::>(); -``` +Note that you can disable the default features of this crate and provide a custom `which::sys::Sys` implementation to `which::WhichConfig` for use in Wasm environments without WASI. ## Examples diff --git a/src/finder.rs b/src/finder.rs index 191b92a..ae3a7ae 100644 --- a/src/finder.rs +++ b/src/finder.rs @@ -78,7 +78,6 @@ impl Finder { cwd.as_ref().map(|p| p.as_ref().display()) ); - let sys = self.sys.clone(); let binary_path_candidates = match cwd { Some(cwd) if path.has_separator() => { #[cfg(feature = "tracing")] @@ -87,22 +86,27 @@ impl Finder { path.display() ); // Search binary in cwd if the path have a path separator. - Either::Left(Self::cwd_search_candidates(sys.clone(), path, cwd)) + Either::Left(Self::cwd_search_candidates(&self.sys, path, cwd)) } _ => { #[cfg(feature = "tracing")] tracing::trace!("{} has no path seperators, so only paths in PATH environment variable will be searched.", path.display()); // Search binary in PATHs(defined in environment variable). let paths = paths.ok_or(Error::CannotGetCurrentDirAndPathListEmpty)?; - let paths = sys.env_split_paths(paths.as_ref()); + let paths = self.sys.env_split_paths(paths.as_ref()); if paths.is_empty() { return Err(Error::CannotGetCurrentDirAndPathListEmpty); } - Either::Right(Self::path_search_candidates(sys.clone(), path, paths)) + Either::Right(Self::path_search_candidates( + &self.sys, + path, + paths.into_iter(), + )) } }; - let ret = binary_path_candidates.into_iter().filter_map(move |p| { + let sys = self.sys.clone(); + let ret = binary_path_candidates.filter_map(move |p| { binary_checker .is_valid(&p, &mut nonfatal_error_handler) .then(|| correct_casing(&sys, p, &mut nonfatal_error_handler)) @@ -148,10 +152,10 @@ impl Finder { } fn cwd_search_candidates( - sys: TSys, + sys: &TSys, binary_name: PathBuf, cwd: C, - ) -> impl IntoIterator + ) -> impl Iterator where C: AsRef, { @@ -161,14 +165,14 @@ impl Finder { } fn path_search_candidates

( - sys: TSys, + sys: &TSys, binary_name: PathBuf, paths: P, - ) -> impl IntoIterator + ) -> impl Iterator where - P: IntoIterator, + P: Iterator, { - let new_paths = paths.into_iter().map({ + let new_paths = paths.map({ let sys = sys.clone(); move |p| tilde_expansion(&sys, &p).join(binary_name.clone()) }); @@ -176,79 +180,79 @@ impl Finder { Self::append_extension(sys, new_paths) } - fn append_extension

(sys: TSys, paths: P) -> impl IntoIterator + fn append_extension

(sys: &TSys, paths: P) -> impl Iterator where - P: IntoIterator, + P: Iterator, { - use std::sync::OnceLock; + struct PathsIter

+ where + P: Iterator, + { + paths: P, + current_path_with_index: Option<(PathBuf, usize)>, + path_extensions: Cow<'static, [String]>, + } - // Sample %PATHEXT%: .COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC - // PATH_EXTENSIONS is then [".COM", ".EXE", ".BAT", …]. - // (In one use of PATH_EXTENSIONS we skip the dot, but in the other we need it; - // hence its retention.) - static PATH_EXTENSIONS: OnceLock> = OnceLock::new(); + impl

Iterator for PathsIter

+ where + P: Iterator, + { + type Item = PathBuf; - paths - .into_iter() - .flat_map(move |p| -> Box> { - if !sys.is_windows() { - return Box::new(iter::once(p)); - } - - let sys = sys.clone(); - let path_extensions = PATH_EXTENSIONS.get_or_init(move || { - sys.env_var("PATHEXT") - .map(|pathext| { - pathext - .split(';') - .filter_map(|s| { - if s.as_bytes().first() == Some(&b'.') { - Some(s.to_owned()) - } else { - // Invalid segment; just ignore it. - None - } - }) - .collect() - }) - // PATHEXT not being set or not being a proper Unicode string is exceedingly - // improbable and would probably break Windows badly. Still, don't crash: - .unwrap_or_default() - }); - // Check if path already have executable extension - if has_executable_extension(&p, path_extensions) { + fn next(&mut self) -> Option { + if self.path_extensions.is_empty() { + self.paths.next() + } else if let Some((p, index)) = self.current_path_with_index.take() { + let next_index = index + 1; + if next_index < self.path_extensions.len() { + self.current_path_with_index = Some((p.clone(), next_index)); + } + // Append the extension. + let mut p = p.into_os_string(); + p.push(&self.path_extensions[index]); + let ret = PathBuf::from(p); #[cfg(feature = "tracing")] - tracing::trace!( - "{} already has an executable extension, not modifying it further", - p.display() - ); - Box::new(iter::once(p)) + tracing::trace!("possible extension: {}", ret.display()); + Some(ret) } else { - #[cfg(feature = "tracing")] - tracing::trace!( - "{} has no extension, using PATHEXT environment variable to infer one", - p.display() - ); - // Appended paths with windows executable extensions. - // e.g. path `c:/windows/bin[.ext]` will expand to: - // [c:/windows/bin.ext] - // c:/windows/bin[.ext].COM - // c:/windows/bin[.ext].EXE - // c:/windows/bin[.ext].CMD - // ... - Box::new( - iter::once(p.clone()).chain(path_extensions.iter().map(move |e| { - // Append the extension. - let mut p = p.clone().into_os_string(); - p.push(e); - let ret = PathBuf::from(p); - #[cfg(feature = "tracing")] - tracing::trace!("possible extension: {}", ret.display()); - ret - })), - ) + let p = self.paths.next()?; + if has_executable_extension(&p, &self.path_extensions) { + #[cfg(feature = "tracing")] + tracing::trace!( + "{} already has an executable extension, not modifying it further", + p.display() + ); + } else { + #[cfg(feature = "tracing")] + tracing::trace!( + "{} has no extension, using PATHEXT environment variable to infer one", + p.display() + ); + // Appended paths with windows executable extensions. + // e.g. path `c:/windows/bin[.ext]` will expand to: + // [c:/windows/bin.ext] + // c:/windows/bin[.ext].COM + // c:/windows/bin[.ext].EXE + // c:/windows/bin[.ext].CMD + // ... + self.current_path_with_index = Some((p.clone(), 0)); + } + Some(p) } - }) + } + } + + let path_extensions = if sys.is_windows() { + sys.env_windows_path_ext() + } else { + Cow::Borrowed(Default::default()) + }; + + PathsIter { + paths, + current_path_with_index: None, + path_extensions, + } } } diff --git a/src/helper.rs b/src/helper.rs index eb96891..ad2a9db 100644 --- a/src/helper.rs +++ b/src/helper.rs @@ -6,7 +6,7 @@ pub fn has_executable_extension, S: AsRef>(path: T, pathext: match ext { Some(ext) => pathext .iter() - .any(|e| ext.eq_ignore_ascii_case(&e.as_ref()[1..])), + .any(|e| !e.as_ref().is_empty() && ext.eq_ignore_ascii_case(&e.as_ref()[1..])), _ => false, } } @@ -37,4 +37,12 @@ mod test { &[".COM", ".EXE", ".CMD"] )); } + + #[test] + fn test_invalid_exts() { + assert!(!has_executable_extension( + PathBuf::from("foo.bar"), + &["", "."] + )); + } } diff --git a/src/lib.rs b/src/lib.rs index 4e92226..355ca12 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,7 @@ mod checker; mod error; mod finder; mod helper; -mod sys; +pub mod sys; use std::fmt; use std::path; @@ -30,7 +30,7 @@ use std::ffi::{OsStr, OsString}; use crate::checker::CompositeChecker; pub use crate::error::*; use crate::finder::Finder; -pub use sys::*; +use crate::sys::Sys; /// Find an executable binary's path by name. /// @@ -304,7 +304,7 @@ impl WhichConfig { } impl WhichConfig { - /// Creates a new `WhichConfig` with the given system. + /// Creates a new `WhichConfig` with the given `sys::Sys`. /// /// This is useful for providing all the system related /// functionality to this crate. diff --git a/src/sys.rs b/src/sys.rs index 10d683e..6f7350d 100644 --- a/src/sys.rs +++ b/src/sys.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::env::VarError; use std::ffi::OsStr; use std::ffi::OsString; @@ -19,6 +20,34 @@ pub trait SysMetadata { fn is_file(&self) -> bool; } +/// Represents the system that `which` interacts with to get information +/// about the environment and file system. +/// +/// ### How to use in Wasm without WASI +/// +/// WebAssembly without WASI does not have a filesystem, but using this crate is possible in `wasm32-unknown-unknown` targets by disabling default features: +/// +/// ```toml +/// which = { version = "...", default-features = false } +/// ``` +/// +// Then providing your own implementation of the `which::sys::Sys` trait: +/// +/// ```rs +/// use which::WhichConfig; +/// +/// struct WasmSys; +/// +/// impl which::sys::Sys for WasmSys { +/// // it is up to you to implement this trait based on the +/// // environment you are running WebAssembly in +/// } +/// +/// let paths = WhichConfig::new_with_sys(WasmSys) +/// .all_results() +/// .unwrap() +/// .collect::>(); +/// ``` pub trait Sys: Clone { type ReadDirEntry: SysReadDirEntry; type Metadata: SysMetadata; @@ -42,6 +71,38 @@ pub trait Sys: Clone { None => Err(VarError::NotPresent), } } + /// Gets and parses the PATHEXT environment variable on Windows. + /// + /// Override this to disable globally caching the parsed PATHEXT. + fn env_windows_path_ext(&self) -> Cow<'static, [String]> { + use std::sync::OnceLock; + + // Sample %PATHEXT%: .COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC + // PATH_EXTENSIONS is then [".COM", ".EXE", ".BAT", …]. + // (In one use of PATH_EXTENSIONS we skip the dot, but in the other we need it; + // hence its retention.) + static PATH_EXTENSIONS: OnceLock> = OnceLock::new(); + let path_extensions = PATH_EXTENSIONS.get_or_init(|| { + self.env_var("PATHEXT") + .map(|pathext| { + pathext + .split(';') + .filter_map(|s| { + if s.as_bytes().first() == Some(&b'.') { + Some(s.to_owned()) + } else { + // Invalid segment; just ignore it. + None + } + }) + .collect() + }) + // PATHEXT not being set or not being a proper Unicode string is exceedingly + // improbable and would probably break Windows badly. Still, don't crash: + .unwrap_or_default() + }); + Cow::Borrowed(path_extensions) + } /// Gets the metadata of the provided path, following symlinks. fn metadata(&self, path: &Path) -> io::Result; /// Gets the metadata of the provided path, not following symlinks.