diff --git a/DESIGN.md b/DESIGN.md index 486be75..5c4eafd 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -138,7 +138,8 @@ completed **directories** is read. ### The cache files Row maintains the state of the workflow in several files: -* `values.json` +* `directories.json` + * Last time the workspace was modified. * Cached copies of the user-provided static value file. * `completed.postcard` * Completion status for each **action**. diff --git a/doc/src/guide/concepts/cache.md b/doc/src/guide/concepts/cache.md index 40c2633..1fdbc04 100644 --- a/doc/src/guide/concepts/cache.md +++ b/doc/src/guide/concepts/cache.md @@ -14,7 +14,7 @@ invalid when: > To recover from such a change, execute: > ```bash -> row clean --value +> row clean --directory > ``` ## Submitted jobs diff --git a/doc/src/row/clean.md b/doc/src/row/clean.md index be2fcfb..83744bc 100644 --- a/doc/src/row/clean.md +++ b/doc/src/row/clean.md @@ -2,7 +2,7 @@ Usage ```bash -row clean [OPTIONS] <--value|--submitted|--completed|--all> +row clean [OPTIONS] <--directory|--submitted|--completed|--all> ``` `row clean` safely removes cache files generated by **row**. The @@ -19,13 +19,13 @@ Remove all caches. Remove the cache of completed actions on directories. -### `--submitted` +### `--directory` -Remove the cache of submitted jobs. +Remove the directory value cache. -### `--value` +### `--submitted` -Remove the directory value cache. +Remove the cache of submitted jobs. ### `--force` diff --git a/src/cli/clean.rs b/src/cli/clean.rs index ef10132..ff52d34 100644 --- a/src/cli/clean.rs +++ b/src/cli/clean.rs @@ -7,8 +7,8 @@ use crate::cli::GlobalOptions; use row::project::Project; use row::MultiProgressContainer; use row::{ - COMPLETED_CACHE_FILE_NAME, DATA_DIRECTORY_NAME, SUBMITTED_CACHE_FILE_NAME, - VALUE_CACHE_FILE_NAME, + COMPLETED_CACHE_FILE_NAME, DATA_DIRECTORY_NAME, DIRECTORY_CACHE_FILE_NAME, + SUBMITTED_CACHE_FILE_NAME, }; #[derive(Args, Debug)] @@ -25,9 +25,9 @@ pub struct Arguments { #[group(required = true, multiple = false)] #[allow(clippy::struct_excessive_bools)] pub struct Selection { - /// Remove the value cache. + /// Remove the directory cache. #[arg(long, display_order = 0)] - value: bool, + directory: bool, /// Remove the submitted cache. #[arg(long, display_order = 0)] @@ -97,8 +97,8 @@ pub fn clean( } } } - if selection.value || selection.all { - let path = data_directory.join(VALUE_CACHE_FILE_NAME); + if selection.directory || selection.all { + let path = data_directory.join(DIRECTORY_CACHE_FILE_NAME); info!("Removing '{}'.", path.display()); if let Err(error) = fs::remove_file(&path) { match error.kind() { diff --git a/src/lib.rs b/src/lib.rs index ebaaa35..0cb2987 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,7 +27,7 @@ pub const DATA_DIRECTORY_NAME: &str = ".row"; pub const COMPLETED_DIRECTORY_NAME: &str = "completed"; pub const MIN_PROGRESS_BAR_SIZE: usize = 1; -pub const VALUE_CACHE_FILE_NAME: &str = "values.json"; +pub const DIRECTORY_CACHE_FILE_NAME: &str = "directories.json"; pub const COMPLETED_CACHE_FILE_NAME: &str = "completed.postcard"; pub const SUBMITTED_CACHE_FILE_NAME: &str = "submitted.postcard"; diff --git a/src/state.rs b/src/state.rs index 00ddcf2..7d2f351 100644 --- a/src/state.rs +++ b/src/state.rs @@ -6,17 +6,31 @@ use std::collections::{HashMap, HashSet}; use std::fs::{self, File}; use std::io; use std::io::prelude::*; +use std::os::unix::fs::MetadataExt; use std::path::PathBuf; use crate::workflow::Workflow; use crate::{ progress_styles, workspace, Error, MultiProgressContainer, COMPLETED_CACHE_FILE_NAME, - COMPLETED_DIRECTORY_NAME, DATA_DIRECTORY_NAME, MIN_PROGRESS_BAR_SIZE, - SUBMITTED_CACHE_FILE_NAME, VALUE_CACHE_FILE_NAME, + COMPLETED_DIRECTORY_NAME, DATA_DIRECTORY_NAME, DIRECTORY_CACHE_FILE_NAME, + MIN_PROGRESS_BAR_SIZE, SUBMITTED_CACHE_FILE_NAME, }; type SubmittedJobs = HashMap>; +/// Directory cache +/// +/// Cache the directory values and store the last modified time. +/// +#[derive(Debug, Default, Deserialize, PartialEq, Serialize)] +pub struct DirectoryCache { + /// File system modification time of the workspace. + modified_time: (i64, i64), + + /// Directory values. + values: HashMap, +} + /// The state of the project. /// /// `State` collects the following information on the workspace and manages cache files @@ -30,8 +44,8 @@ type SubmittedJobs = HashMap>; /// #[derive(Debug, Default, Deserialize, PartialEq, Serialize)] pub struct State { - /// The cached value of each directory. - values: HashMap, + /// The directory cache. + directory_cache: DirectoryCache, /// Completed directories for each action. completed: HashMap>, @@ -43,7 +57,7 @@ pub struct State { completed_file_names: Vec, /// Set to true when `values` is modified from the on-disk cache. - values_modified: bool, + directories_modified: bool, /// Set to true when `completed` is modified from the on-disk cache. completed_modified: bool, @@ -55,7 +69,7 @@ pub struct State { impl State { /// Get the directory values. pub fn values(&self) -> &HashMap { - &self.values + &self.directory_cache.values } /// Get the set of directories completed for a given action. @@ -140,8 +154,8 @@ impl State { /// List all directories in the state. pub fn list_directories(&self) -> Vec { trace!("Listing all directories in project."); - let mut result = Vec::with_capacity(self.values.len()); - result.extend(self.values.keys().cloned()); + let mut result = Vec::with_capacity(self.values().len()); + result.extend(self.values().keys().cloned()); result } @@ -152,11 +166,11 @@ impl State { /// pub fn from_cache(workflow: &Workflow) -> Result { let mut state = State { - values: Self::read_value_cache(workflow)?, + directory_cache: Self::read_directory_cache(workflow)?, completed: Self::read_completed_cache(workflow)?, submitted: Self::read_submitted_cache(workflow)?, completed_file_names: Vec::new(), - values_modified: false, + directories_modified: false, completed_modified: false, submitted_modified: false, }; @@ -171,17 +185,17 @@ impl State { Ok(state) } - /// Read the value cache from disk. - fn read_value_cache(workflow: &Workflow) -> Result, Error> { + /// Read the directory cache from disk. + fn read_directory_cache(workflow: &Workflow) -> Result { let data_directory = workflow.root.join(DATA_DIRECTORY_NAME); - let value_file = data_directory.join(VALUE_CACHE_FILE_NAME); + let directory_file = data_directory.join(DIRECTORY_CACHE_FILE_NAME); - match fs::read(&value_file) { + match fs::read(&directory_file) { Ok(bytes) => { - debug!("Reading cache '{}'.", value_file.display().to_string()); + debug!("Reading cache '{}'.", directory_file.display().to_string()); - let result = - serde_json::from_slice(&bytes).map_err(|e| Error::JSONParse(value_file, e))?; + let result = serde_json::from_slice(&bytes) + .map_err(|e| Error::JSONParse(directory_file, e))?; Ok(result) } @@ -189,12 +203,15 @@ impl State { io::ErrorKind::NotFound => { trace!( "'{}' not found, initializing default values.", - value_file.display().to_string() + directory_file.display().to_string() ); - Ok(HashMap::new()) + Ok(DirectoryCache { + modified_time: (0, 0), + values: HashMap::new(), + }) } - _ => Err(Error::FileRead(value_file, error)), + _ => Err(Error::FileRead(directory_file, error)), }, } } @@ -265,9 +282,9 @@ impl State { workflow: &Workflow, multi_progress: &mut MultiProgressContainer, ) -> Result<(), Error> { - if self.values_modified { - self.save_value_cache(workflow)?; - self.values_modified = false; + if self.directories_modified { + self.save_directory_cache(workflow)?; + self.directories_modified = false; } if self.completed_modified { @@ -283,22 +300,23 @@ impl State { Ok(()) } - /// Save the value cache to the filesystem. - fn save_value_cache(&self, workflow: &Workflow) -> Result<(), Error> { + /// Save the directory cache to the filesystem. + fn save_directory_cache(&self, workflow: &Workflow) -> Result<(), Error> { let data_directory = workflow.root.join(DATA_DIRECTORY_NAME); - let value_file = data_directory.join(VALUE_CACHE_FILE_NAME); + let directory_cache_file = data_directory.join(DIRECTORY_CACHE_FILE_NAME); debug!( - "Saving value cache: '{}'.", - value_file.display().to_string() + "Saving directory cache: '{}'.", + directory_cache_file.display().to_string() ); - let out_bytes: Vec = serde_json::to_vec(&self.values) - .map_err(|e| Error::JSONSerialize(value_file.clone(), e))?; + let out_bytes: Vec = serde_json::to_vec(&self.directory_cache) + .map_err(|e| Error::JSONSerialize(directory_cache_file.clone(), e))?; fs::create_dir_all(&data_directory) .map_err(|e| Error::DirectoryCreate(data_directory, e))?; - fs::write(&value_file, out_bytes).map_err(|e| Error::FileWrite(value_file.clone(), e))?; + fs::write(&directory_cache_file, out_bytes) + .map_err(|e| Error::FileWrite(directory_cache_file.clone(), e))?; Ok(()) } @@ -398,48 +416,60 @@ impl State { debug!("Synchronizing workspace '{}'.", workspace_path.display()); - // TODO: get workspace metadata. Store mtime in the cache. Then call `list_directories` - // only when the current mtime is different from the value in the cache. - let filesystem_directories: HashSet = - HashSet::from_iter(workspace::list_directories(workflow, multi_progress)?); - - //////////////////////////////////////////////// - // First, synchronize the values. - // Make a copy of the directories to remove. - let directories_to_remove: Vec = self - .values - .keys() - .filter(|&x| !filesystem_directories.contains(x)) - .cloned() - .collect(); + let mut directories_to_add = Vec::new(); - if directories_to_remove.is_empty() { - trace!("No directories to remove from the value cache."); + // Check if the workspace directory has been modified since we last updated the cache. + let metadata = fs::metadata(workspace_path.clone()) + .map_err(|e| Error::DirectoryRead(workspace_path.clone(), e))?; + let current_modified_time = (metadata.mtime(), metadata.mtime_nsec()); + if current_modified_time == self.directory_cache.modified_time { + trace!("The workspace has not been modified."); } else { - self.values_modified = true; - } + trace!("The workspace has been modified, updating the cache."); + self.directories_modified = true; + self.directory_cache.modified_time = current_modified_time; + + let filesystem_directories: HashSet = + HashSet::from_iter(workspace::list_directories(workflow, multi_progress)?); + + //////////////////////////////////////////////// + // First, synchronize the values. + // Make a copy of the directories to remove. + let directories_to_remove: Vec = self + .directory_cache + .values + .keys() + .filter(|&x| !filesystem_directories.contains(x)) + .cloned() + .collect(); - // Then remove them. - for directory in directories_to_remove { - trace!("Removing '{}' from the value cache", directory.display()); - self.values.remove(&directory); - } + if directories_to_remove.is_empty() { + trace!("No directories to remove from the directory cache."); + } + // Then remove them. + for directory in directories_to_remove { + trace!( + "Removing '{}' from the directory cache", + directory.display() + ); + self.directory_cache.values.remove(&directory); + } - // Make a copy of the directories to be added. - let directories_to_add: Vec = filesystem_directories - .iter() - .filter(|&x| !self.values.contains_key(x)) - .cloned() - .collect(); + // Make a copy of the directories to be added. + directories_to_add = filesystem_directories + .iter() + .filter(|&x| !self.directory_cache.values.contains_key(x)) + .cloned() + .collect(); - if directories_to_add.is_empty() { - trace!("No directories to add to the value cache."); - } else { - trace!( - "Adding {} directories to the workspace.", - directories_to_add.len() - ); - self.values_modified = true; + if directories_to_add.is_empty() { + trace!("No directories to add to the directory cache."); + } else { + trace!( + "Adding {} directories to the workspace.", + directories_to_add.len() + ); + } } // Read value files from the directories. @@ -465,7 +495,7 @@ impl State { /////////////////////////////////////////// // Wait for launched threads to finish and merge results. - self.values.extend(directory_values.get()?); + self.directory_cache.values.extend(directory_values.get()?); let new_complete = new_complete.get()?; if !new_complete.is_empty() { @@ -512,7 +542,7 @@ impl State { for directories in self.completed.values_mut() { let directories_to_remove: Vec = directories .iter() - .filter(|d| !self.values.contains_key(*d)) + .filter(|d| !self.directory_cache.values.contains_key(*d)) .cloned() .collect(); @@ -545,7 +575,7 @@ impl State { for directory_map in self.submitted.values_mut() { let directories_to_remove: Vec = directory_map .keys() - .filter(|d| !self.values.contains_key(*d)) + .filter(|d| !self.directory_cache.values.contains_key(*d)) .cloned() .collect(); @@ -698,7 +728,7 @@ mod tests { let mut state = State::default(); let result = state.synchronize_workspace(&workflow, 2, &mut multi_progress); assert!(result.is_ok()); - assert_eq!(state.values.len(), 0); + assert_eq!(state.values().len(), 0); } #[test] @@ -723,15 +753,18 @@ mod tests { let workflow = Workflow::open_str(temp.path(), workflow).unwrap(); let mut state = State::default(); - state.values.insert(PathBuf::from("dir4"), Value::Null); + state + .directory_cache + .values + .insert(PathBuf::from("dir4"), Value::Null); let result = state.synchronize_workspace(&workflow, 2, &mut multi_progress); assert!(result.is_ok()); - assert_eq!(state.values.len(), 3); - assert!(state.values.contains_key(&PathBuf::from("dir1"))); - assert!(state.values.contains_key(&PathBuf::from("dir2"))); - assert!(state.values.contains_key(&PathBuf::from("dir3"))); + assert_eq!(state.values().len(), 3); + assert!(state.values().contains_key(&PathBuf::from("dir1"))); + assert!(state.values().contains_key(&PathBuf::from("dir2"))); + assert!(state.values().contains_key(&PathBuf::from("dir3"))); } #[test] @@ -754,9 +787,9 @@ mod tests { let result = state.synchronize_workspace(&workflow, 2, &mut multi_progress); assert!(result.is_ok()); - assert_eq!(state.values.len(), 1); - assert!(state.values.contains_key(&PathBuf::from("dir1"))); - assert_eq!(state.values[&PathBuf::from("dir1")].as_i64(), Some(10)); + assert_eq!(state.values().len(), 1); + assert!(state.values().contains_key(&PathBuf::from("dir1"))); + assert_eq!(state.values()[&PathBuf::from("dir1")].as_i64(), Some(10)); } fn setup_completion_directories(temp: &TempDir, n: usize) -> String { @@ -804,13 +837,13 @@ products = ["g"] let result = state.synchronize_workspace(&workflow, 2, &mut multi_progress); assert!(result.is_ok()); - assert_eq!(state.values.len(), n); + assert_eq!(state.values().len(), n); assert!(state.completed.contains_key("b")); assert!(state.completed.contains_key("e")); for i in 0..n { let directory = PathBuf::from(format!("dir{i}")); #[allow(clippy::cast_sign_loss)] - let value = state.values[&directory].as_i64().unwrap() as usize; + let value = state.values()[&directory].as_i64().unwrap() as usize; assert_eq!(value, i); if i < n / 2 { @@ -841,6 +874,7 @@ products = ["g"] let mut state = State::default(); for i in 0..n { state + .directory_cache .values .insert(PathBuf::from(format!("dir{i}")), Value::Null); } @@ -850,7 +884,7 @@ products = ["g"] let result = state.synchronize_workspace(&workflow, 2, &mut multi_progress); assert!(result.is_ok()); - assert_eq!(state.values.len(), n); + assert_eq!(state.values().len(), n); assert!(!state.completed.contains_key("b")); assert!(!state.completed.contains_key("e")); } @@ -883,7 +917,7 @@ products = ["g"] let result = state.synchronize_workspace(&workflow, 2, &mut multi_progress); assert!(result.is_ok()); - assert_eq!(state.values.len(), n); + assert_eq!(state.values().len(), n); assert!(state.completed.contains_key("b")); assert!(state.completed.contains_key("e")); assert!(!state.completed.contains_key("z")); @@ -894,7 +928,7 @@ products = ["g"] for i in 0..n { let directory = PathBuf::from(format!("dir{i}")); #[allow(clippy::cast_sign_loss)] - let value = state.values[&directory].as_i64().unwrap() as usize; + let value = state.values()[&directory].as_i64().unwrap() as usize; assert_eq!(value, i); if i < n / 2 { diff --git a/src/workflow.rs b/src/workflow.rs index 6b4ea65..2ca714d 100644 --- a/src/workflow.rs +++ b/src/workflow.rs @@ -33,6 +33,7 @@ pub struct Workflow { /// The submission options #[serde(default)] pub submit_options: HashMap, + // TODO: refactor handling of submit options into more general action defaults. /// The actions. #[serde(default)]