diff --git a/doc/src/clusters/cluster.md b/doc/src/clusters/cluster.md index 1496d31..939ae44 100644 --- a/doc/src/clusters/cluster.md +++ b/doc/src/clusters/cluster.md @@ -51,6 +51,11 @@ be one of: * `"slurm"` * `"bash"` +## submit_options + +`cluster.submit_options`: **array** of **strings** - Scheduler submission options that +are passed to every job on this cluster. + ## partition `cluster.partition`: **array** of **tables** - Define the scheduler partitions that diff --git a/doc/src/release-notes.md b/doc/src/release-notes.md index a12612b..763f23e 100644 --- a/doc/src/release-notes.md +++ b/doc/src/release-notes.md @@ -7,6 +7,7 @@ * Edit links to documentation pages. * New arguments to `show status` display actions that are in the requested states: `--completed`, `--eligible`, `--submitted`, and `--waiting`. +* `cluster.submit_options` configuration option in `clusters.toml`. *Changed:* @@ -15,6 +16,8 @@ * `show status` hides actions with 0 directories by default. Pass `--all` to show all actions. * `clean` now cleans all caches by default. +* Submit jobs with `--constraint="scratch"` by default on Delta. +* Submit jobs with `--constraint="nvme"` by default on Frontier. *Fixed:* diff --git a/src/builtin.rs b/src/builtin.rs index 999052a..ca62a30 100644 --- a/src/builtin.rs +++ b/src/builtin.rs @@ -72,6 +72,7 @@ fn andes() -> Cluster { name: "andes".into(), identify: IdentificationMethod::ByEnvironment("LMOD_SYSTEM_NAME".into(), "andes".into()), scheduler: SchedulerType::Slurm, + submit_options: Vec::new(), partition: vec![ // Auto-detected partitions: batch Partition { @@ -92,6 +93,7 @@ fn anvil() -> Cluster { name: "anvil".into(), identify: IdentificationMethod::ByEnvironment("RCAC_CLUSTER".into(), "anvil".into()), scheduler: SchedulerType::Slurm, + submit_options: Vec::new(), partition: vec![ // Auto-detected partitions: shared | wholenode | gpu Partition { @@ -149,6 +151,7 @@ fn delta() -> Cluster { name: "delta".into(), identify: IdentificationMethod::ByEnvironment("LMOD_SYSTEM_NAME".into(), "Delta".into()), scheduler: SchedulerType::Slurm, + submit_options: vec!["--constraint=\"scratch\"".to_string()], partition: vec![ // Auto-detected partitions: cpu | gpuA100x4 Partition { @@ -206,6 +209,7 @@ fn frontier() -> Cluster { name: "frontier".into(), identify: IdentificationMethod::ByEnvironment("LMOD_SYSTEM_NAME".into(), "frontier".into()), scheduler: SchedulerType::Slurm, + submit_options: vec!["--constraint=\"nvme\"".to_string()], partition: vec![ // Auto-detected partitions: batch Partition { @@ -225,6 +229,7 @@ fn greatlakes() -> Cluster { name: "greatlakes".into(), identify: IdentificationMethod::ByEnvironment("CLUSTER_NAME".into(), "greatlakes".into()), scheduler: SchedulerType::Slurm, + submit_options: Vec::new(), partition: vec![ // Auto-detected partitions: standard | gpu_mig40,gpu | gpu. Partition { @@ -295,6 +300,7 @@ fn none() -> Cluster { name: "none".into(), identify: IdentificationMethod::Always(true), scheduler: SchedulerType::Bash, + submit_options: Vec::new(), partition: vec![Partition { name: "none".into(), ..Partition::default() diff --git a/src/cluster.rs b/src/cluster.rs index 0ceb529..d245eca 100644 --- a/src/cluster.rs +++ b/src/cluster.rs @@ -46,6 +46,10 @@ pub struct Cluster { /// The partitions in the cluster's queue. pub partition: Vec, + + /// Submit options to include in every job submitted to this cluster. + #[serde(default)] + pub submit_options: Vec, } /// Methods to identify clusters. @@ -400,30 +404,35 @@ mod tests { identify: IdentificationMethod::Always(false), scheduler: SchedulerType::Bash, partition: Vec::new(), + submit_options: Vec::new(), }, Cluster { name: "cluster1".into(), identify: IdentificationMethod::ByEnvironment("_row_select".into(), "a".into()), scheduler: SchedulerType::Bash, partition: Vec::new(), + submit_options: Vec::new(), }, Cluster { name: "cluster2".into(), identify: IdentificationMethod::ByEnvironment("_row_select".into(), "b".into()), scheduler: SchedulerType::Bash, partition: Vec::new(), + submit_options: Vec::new(), }, Cluster { name: "cluster3".into(), identify: IdentificationMethod::Always(true), scheduler: SchedulerType::Bash, partition: Vec::new(), + submit_options: Vec::new(), }, Cluster { name: "cluster4".into(), identify: IdentificationMethod::ByEnvironment("_row_Select".into(), "b".into()), scheduler: SchedulerType::Bash, partition: Vec::new(), + submit_options: Vec::new(), }, ]; let cluster_configuration = Configuration { cluster: clusters }; @@ -591,6 +600,7 @@ mod tests { identify: IdentificationMethod::Always(true), scheduler: SchedulerType::Bash, partition: partitions, + submit_options: Vec::new(), }; let cpu_resources = Resources { @@ -728,6 +738,7 @@ name = "b" assert_eq!(cluster.name, "a"); assert_eq!(cluster.identify, IdentificationMethod::Always(true)); assert_eq!(cluster.scheduler, SchedulerType::Bash); + assert!(cluster.submit_options.is_empty()); assert_eq!( cluster.partition, vec![Partition { @@ -748,6 +759,7 @@ name = "b" name = "a" identify.by_environment = ["b", "c"] scheduler = "slurm" +submit_options = ["option1", "option2"] [[cluster.partition]] name = "d" @@ -777,6 +789,7 @@ account_suffix = "-gpu" IdentificationMethod::ByEnvironment("b".into(), "c".into()) ); assert_eq!(cluster.scheduler, SchedulerType::Slurm); + assert_eq!(cluster.submit_options, vec!["option1", "option2"]); assert_eq!( cluster.partition, vec![Partition { diff --git a/src/scheduler/bash.rs b/src/scheduler/bash.rs index 2c903aa..0fb3fd9 100644 --- a/src/scheduler/bash.rs +++ b/src/scheduler/bash.rs @@ -539,6 +539,7 @@ mod tests { scheduler: SchedulerType::Bash, identify: IdentificationMethod::Always(false), partition: Vec::new(), + submit_options: Vec::new(), }; let script = Bash::new(cluster, launchers) .make_script(&action, &directories) diff --git a/src/scheduler/slurm.rs b/src/scheduler/slurm.rs index 231d714..859e9a5 100644 --- a/src/scheduler/slurm.rs +++ b/src/scheduler/slurm.rs @@ -115,6 +115,11 @@ impl Scheduler for Slurm { let minutes = (total + 59) / 60; let _ = writeln!(preamble, "#SBATCH --time={minutes}"); + // Add global cluster submit options first so that users can override them. + for option in &self.cluster.submit_options { + let _ = writeln!(preamble, "#SBATCH {option}"); + } + // Use provided submission options if let Some(submit_options) = action.submit_options.get(&self.cluster.name) { if let Some(ref account) = submit_options.account { @@ -299,6 +304,7 @@ mod tests { identify: IdentificationMethod::Always(false), scheduler: SchedulerType::Slurm, partition: vec![Partition::default()], + submit_options: Vec::new(), }; let slurm = Slurm::new(cluster, launchers.by_cluster("cluster")); @@ -323,6 +329,27 @@ mod tests { assert!(script.contains("#SBATCH --time=180")); } + #[test] + #[parallel] + fn cluster_submit_options() { + let (action, directories, mut slurm) = setup(); + slurm.cluster.submit_options = vec!["--option=value".to_string()]; + + let script = slurm + .make_script(&action, &directories) + .expect("valid script"); + println!("{script}"); + + assert!(script.contains("#SBATCH --job-name=action")); + assert!(script.contains("#SBATCH --ntasks=1")); + assert!(!script.contains("#SBATCH --account")); + assert!(script.contains("#SBATCH --partition=partition")); + assert!(!script.contains("#SBATCH --cpus-per-task")); + assert!(!script.contains("#SBATCH --gpus-per-task")); + assert!(script.contains("#SBATCH --time=180")); + assert!(script.contains("#SBATCH --option=value")); + } + #[test] #[parallel] fn ntasks() { @@ -421,6 +448,7 @@ mod tests { name: "cluster".into(), identify: IdentificationMethod::Always(false), scheduler: SchedulerType::Slurm, + submit_options: Vec::new(), partition: vec![Partition { memory_per_cpu: Some("a".into()), ..Partition::default() @@ -447,6 +475,7 @@ mod tests { name: "cluster".into(), identify: IdentificationMethod::Always(false), scheduler: SchedulerType::Slurm, + submit_options: Vec::new(), partition: vec![Partition { memory_per_gpu: Some("b".into()), ..Partition::default() @@ -475,6 +504,7 @@ mod tests { name: "cluster".into(), identify: IdentificationMethod::Always(false), scheduler: SchedulerType::Slurm, + submit_options: Vec::new(), partition: vec![Partition { cpus_per_node: Some(10), ..Partition::default() @@ -503,6 +533,7 @@ mod tests { name: "cluster".into(), identify: IdentificationMethod::Always(false), scheduler: SchedulerType::Slurm, + submit_options: Vec::new(), partition: vec![Partition { gpus_per_node: Some(5), ..Partition::default()