Skip to content

Commit

Permalink
Merge pull request #550 from ACCESS-NRI/545-fix-job-yaml-qstat
Browse files Browse the repository at this point in the history
Fix PBS scheduler qstat call so pbs info is logged to `job.yaml`
  • Loading branch information
jo-basevi authored Jan 20, 2025
2 parents 20a8e76 + 807176f commit 7ecb962
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 35 deletions.
4 changes: 2 additions & 2 deletions payu/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import payu.envmod as envmod
from payu.fsops import is_conda
from payu.models import index as supported_models
from payu.schedulers import index as scheduler_index
from payu.schedulers import index as scheduler_index, DEFAULT_SCHEDULER_CONFIG
import payu.subcommands

# Default configuration
Expand Down Expand Up @@ -164,7 +164,7 @@ def submit_job(script, config, vars=None):
"""Submit a userscript the scheduler."""

# TODO: Temporary stub to replicate the old approach
sched_name = config.get('scheduler', 'pbs')
sched_name = config.get('scheduler', DEFAULT_SCHEDULER_CONFIG)
sched_type = scheduler_index[sched_name]
sched = sched_type()
cmd = sched.submit(script, config, vars)
Expand Down
13 changes: 8 additions & 5 deletions payu/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from payu.fsops import list_archive_dirs
from payu.fsops import run_script_command
from payu.fsops import needs_subprocess_shell
from payu.schedulers.pbs import get_job_info, pbs_env_init, get_job_id
from payu.schedulers import index as scheduler_index, DEFAULT_SCHEDULER_CONFIG
from payu.models import index as model_index
import payu.profilers
from payu.runlog import Runlog
Expand Down Expand Up @@ -672,7 +672,10 @@ def run(self, *user_flags):

self.finish_time = datetime.datetime.now()

info = get_job_info()
scheduler_name = self.config.get('scheduler', DEFAULT_SCHEDULER_CONFIG)
scheduler = scheduler_index[scheduler_name]()

info = scheduler.get_job_info()

if info is None:
# Not being run under PBS, reverse engineer environment
Expand Down Expand Up @@ -718,10 +721,10 @@ def run(self, *user_flags):
error_log_dir = os.path.join(self.archive_path, 'error_logs')
mkdir_p(error_log_dir)

# NOTE: This is PBS-specific
job_id = get_job_id(short=False)
# NOTE: This is only implemented for PBS scheduler
job_id = scheduler.get_job_id(short=False)

if job_id == '':
if job_id == '' or job_id is None:
job_id = str(self.run_id)[:6]

for fname in self.output_fnames:
Expand Down
2 changes: 2 additions & 0 deletions payu/schedulers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@
'pbs': PBS,
'slurm': Slurm,
}

DEFAULT_SCHEDULER_CONFIG = 'pbs'
66 changes: 38 additions & 28 deletions payu/schedulers/pbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import sys
import shlex
import subprocess
from typing import Any, Dict, Optional

import payu.envmod as envmod
from payu.fsops import check_exe_path
Expand Down Expand Up @@ -137,43 +138,53 @@ def submit(self, pbs_script, pbs_config, pbs_vars=None, python_exe=None):

return cmd

def get_job_id(self, short: bool = True) -> Optional[str]:
"""Get PBS job ID
# TODO: These support functions can probably be integrated into the class
Parameters
----------
short: bool, default True
Return shortened form of the job ID
def get_job_id(short=True):
"""
Return PBS job id
"""
Returns
----------
Optional[str]
Job id if defined, None otherwise
"""

jobid = os.environ.get('PBS_JOBID', '')
jobid = os.environ.get('PBS_JOBID', '')

if short:
# Strip off '.rman2'
jobid = jobid.split('.')[0]
if short:
# Strip off '.rman2'
jobid = jobid.split('.')[0]

return(jobid)
return(jobid)

def get_job_info(self) -> Optional[Dict[str, Any]]:
"""
Get information about the job from the PBS server
def get_job_info():
"""
Get information about the job from the PBS server
"""
jobid = get_job_id()
Returns
----------
Optional[Dict[str, Any]]
Dictionary of information extracted from qstat output
"""
jobid = self.get_job_id()

info = None
info = None

if not jobid == '':
info = get_qstat_info('-ft {0}'.format(jobid), 'Job Id:')
if not jobid == '':
info = get_qstat_info('-ft {0}'.format(jobid), 'Job Id:')

if info is not None:
# Select the dict for this job (there should only be one
# entry in any case)
info = info['Job Id: {}'.format(jobid)]
if info is not None:
# Select the dict for this job (there should only be one
# entry in any case)
info = info['Job Id: {}'.format(jobid)]

# Add the jobid to the dict and then return
info['Job_ID'] = jobid
# Add the jobid to the dict and then return
info['Job_ID'] = jobid

return info
return info


def pbs_env_init():
Expand Down Expand Up @@ -202,9 +213,8 @@ def pbs_env_init():
# even if still fails return None
@retry(stop=stop_after_delay(10), retry_error_callback=lambda a: None)
def get_qstat_info(qflag, header, projects=None, users=None):

qstat = os.path.join(os.environ['PBS_EXEC'], 'bin', 'qstat')
cmd = '{} {}'.format(qstat, qflag)
# qstat command seems to be accessible from the path on PBS jobs
cmd = f'qstat {qflag}'

cmd = shlex.split(cmd)
output = subprocess.check_output(cmd)
Expand Down
28 changes: 28 additions & 0 deletions payu/schedulers/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
# expanded to provide greater functionality in the future.


from typing import Any, Dict, Optional


class Scheduler(object):
"""Abstract scheduler class."""

Expand All @@ -17,3 +20,28 @@ def __init__(self):

def submit(self, pbs_script, pbs_config, pbs_vars=None, python_exe=None):
raise NotImplementedError

def get_job_info(self) -> Optional[Dict[str, Any]]:
"""Get information about the currently running job
Returns
----------
Optional[Dict[str, Any]]
Dictionary of information queried from the scheduler
"""
pass

def get_job_id(self, short: bool = True) -> Optional[str]:
"""Get scheduler-specific job ID
Parameters
----------
short: bool, default True
Return shortened form of the job ID
Returns
----------
Optional[str]
Job id if defined, None otherwise
"""
pass

0 comments on commit 7ecb962

Please sign in to comment.