From 4fc6cceba416e9df3ceb84308c67d11e2ebb022c Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Mon, 8 Jan 2024 19:43:31 -0500 Subject: [PATCH] Reuse a temp dir per device Try to use the system tempdir first, then make a tempdir per device the root directories reside on. This has several benefits: - Most of the time, we'll use the common system tempdir entirely - We will now exclude the tempdir we use for each root: it used to be possible for the temp files to be picked up by the reader thread: this is now only possible if we cross into a different device than all root dirs. --- crates/applesauce/src/lib.rs | 1 + crates/applesauce/src/scan.rs | 15 ++++-- crates/applesauce/src/threads/mod.rs | 22 ++++++-- crates/applesauce/src/threads/writer.rs | 18 +++---- crates/applesauce/src/tmpdir_paths.rs | 71 +++++++++++++++++++++++++ 5 files changed, 109 insertions(+), 18 deletions(-) create mode 100644 crates/applesauce/src/tmpdir_paths.rs diff --git a/crates/applesauce/src/lib.rs b/crates/applesauce/src/lib.rs index 5aef797..4406e48 100644 --- a/crates/applesauce/src/lib.rs +++ b/crates/applesauce/src/lib.rs @@ -19,6 +19,7 @@ mod rfork_storage; mod scan; mod seq_queue; mod threads; +mod tmpdir_paths; mod xattr; use libc::c_char; diff --git a/crates/applesauce/src/scan.rs b/crates/applesauce/src/scan.rs index fff906d..ca10962 100644 --- a/crates/applesauce/src/scan.rs +++ b/crates/applesauce/src/scan.rs @@ -1,10 +1,12 @@ use crate::progress::Progress; +use crate::tmpdir_paths::TmpdirPaths; use ignore::WalkState; +use std::collections::HashSet; use std::fs::FileType; use std::path::{Path, PathBuf}; pub struct Walker<'a, P> { - paths: ignore::WalkParallel, + paths: ignore::WalkBuilder, progress: &'a P, } @@ -21,13 +23,18 @@ impl<'a, P: Progress + Send + Sync> Walker<'a, P> { }); Self { - paths: builder.build_parallel(), + paths: builder, progress, } } - pub fn run(self, f: impl Fn(FileType, PathBuf) + Send + Sync) { - self.paths.run(|| { + pub fn run(self, tmpdirs: &TmpdirPaths, f: impl Fn(FileType, PathBuf) + Send + Sync) { + let ignored_dirs: HashSet = tmpdirs.paths().map(PathBuf::from).collect(); + let mut paths = self.paths; + let walker = paths + .filter_entry(move |entry| !ignored_dirs.contains(entry.path())) + .build_parallel(); + walker.run(|| { Box::new(|entry| { handle_entry(entry, self.progress, &f); WalkState::Continue diff --git a/crates/applesauce/src/threads/mod.rs b/crates/applesauce/src/threads/mod.rs index b40a299..51de56e 100644 --- a/crates/applesauce/src/threads/mod.rs +++ b/crates/applesauce/src/threads/mod.rs @@ -1,12 +1,13 @@ use crate::info::FileCompressionState; use crate::progress::{self, Progress, SkipReason}; +use crate::tmpdir_paths::TmpdirPaths; use crate::{info, scan, Stats}; use applesauce_core::compressor; use std::num::NonZeroUsize; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::thread::{self, JoinHandle}; -use std::{fmt, mem}; +use std::{fmt, io, mem}; pub mod compressing; pub mod reader; @@ -41,6 +42,7 @@ pub struct OperationContext { mode: Mode, stats: Stats, finished_stats: crossbeam_channel::Sender, + tempdirs: TmpdirPaths, verify: bool, } @@ -50,9 +52,15 @@ impl OperationContext { mode, stats: Stats::default(), finished_stats, + tempdirs: TmpdirPaths::new(), verify, } } + + fn add_dst(&mut self, dst: &Path) -> io::Result<()> { + let metadata = dst.metadata()?; + self.tempdirs.add_dst(dst, &metadata) + } } impl Drop for OperationContext { @@ -131,12 +139,18 @@ impl BackgroundThreads { P::Task: Send + Sync + 'static, { let (finished_stats, finished_stats_rx) = crossbeam_channel::bounded(1); - let operation = Arc::new(OperationContext::new(mode, finished_stats, verify)); + let mut operation = OperationContext::new(mode, finished_stats, verify); + let walker = scan::Walker::new( + paths + .into_iter() + .inspect(|path| operation.add_dst(path).unwrap()), + progress, + ); + let operation = Arc::new(operation); let stats = &operation.stats; let chan = self.reader.chan(); - let walker = scan::Walker::new(paths, progress); - walker.run(|file_type, path| { + walker.run(&operation.tempdirs, |file_type, path| { // We really only want to deal with files, not symlinks to files, or fifos, etc. #[allow(clippy::filetype_is_file)] if !file_type.is_file() { diff --git a/crates/applesauce/src/threads/writer.rs b/crates/applesauce/src/threads/writer.rs index 4e3c7b8..73e6b97 100644 --- a/crates/applesauce/src/threads/writer.rs +++ b/crates/applesauce/src/threads/writer.rs @@ -7,7 +7,6 @@ use std::fs::{File, Metadata}; use std::io::{BufRead, BufReader, BufWriter, Seek, Write}; use std::os::fd::AsRawFd; use std::os::macos::fs::MetadataExt; -use std::path::Path; use std::sync::Arc; use std::{cmp, io, ptr}; use tempfile::NamedTempFile; @@ -104,7 +103,7 @@ impl Handler { ) -> io::Result<()> { let uncompressed_file_size = item.metadata.len(); - let mut tmp_file = tmp_file_for(&item.context.path)?; + let mut tmp_file = tmp_file_for(&item)?; copy_xattrs(&item.file, tmp_file.as_file())?; let mut writer = @@ -165,7 +164,7 @@ impl Handler { } fn write_uncompressed_file(&mut self, item: WorkItem) -> io::Result<()> { - let mut tmp_file = tmp_file_for(&item.context.path)?; + let mut tmp_file = tmp_file_for(&item)?; copy_xattrs(&item.file, tmp_file.as_file())?; let chunks = @@ -212,13 +211,12 @@ impl WorkHandler for Handler { } } -#[tracing::instrument(level="debug", skip_all, err, fields(path=%path.display()))] -fn tmp_file_for(path: &Path) -> io::Result { - let mut builder = tempfile::Builder::new(); - if let Some(name) = path.file_name() { - builder.prefix(name); - } - builder.tempfile_in(path.parent().ok_or(io::ErrorKind::InvalidInput)?) +#[tracing::instrument(level="debug", skip_all, err, fields(path=%item.context.path.display()))] +fn tmp_file_for(item: &WorkItem) -> io::Result { + item.context + .operation + .tempdirs + .tempfile_for(&item.context.path, &item.metadata) } #[tracing::instrument(level = "debug", skip_all, err)] diff --git a/crates/applesauce/src/tmpdir_paths.rs b/crates/applesauce/src/tmpdir_paths.rs new file mode 100644 index 0000000..86cde74 --- /dev/null +++ b/crates/applesauce/src/tmpdir_paths.rs @@ -0,0 +1,71 @@ +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::fs::Metadata; +use std::io; +use std::os::macos::fs::MetadataExt; +use std::path::Path; +use tempfile::{NamedTempFile, TempDir}; + +const TEMPDIR_PREFIX: &str = "applesauce_tmp"; +const TEMPFILE_PREFIX: &str = "applesauce_tmp"; + +#[derive(Debug)] +pub struct TmpdirPaths { + /// Map from device to temp dir + dirs: HashMap, +} + +impl TmpdirPaths { + pub fn new() -> Self { + let mut dirs = HashMap::new(); + let system = TempDir::with_prefix(TEMPDIR_PREFIX); + match system { + Ok(system) => match system.path().metadata() { + Ok(system_metadata) => { + dirs.insert(system_metadata.st_dev(), system); + } + Err(e) => { + tracing::warn!("failed to get metadata for system temp dir: {e}"); + } + }, + Err(e) => { + tracing::warn!("failed to create temp dir in system temp dir: {e}"); + } + } + + Self { dirs } + } + + pub fn paths(&self) -> impl Iterator + '_ { + self.dirs.values().map(|dir| dir.path()) + } + + pub fn add_dst(&mut self, dst: &Path, metadata: &Metadata) -> io::Result<()> { + let device = metadata.st_dev(); + match self.dirs.entry(device) { + Entry::Occupied(_) => {} + Entry::Vacant(entry) => { + let dir = TempDir::with_prefix_in(TEMPDIR_PREFIX, dst)?; + entry.insert(dir); + } + } + Ok(()) + } + + pub fn tempfile_for(&self, path: &Path, metadata: &Metadata) -> io::Result { + let device = metadata.st_dev(); + match self.dirs.get(&device) { + Some(dir) => tempfile::Builder::new() + .prefix(TEMPFILE_PREFIX) + .tempfile_in(dir.path()), + None => { + let parent = path + .parent() + .ok_or_else(|| io::Error::other("expected path to have a parent"))?; + tempfile::Builder::new() + .prefix(TEMPFILE_PREFIX) + .tempfile_in(parent) + } + } + } +}