From 074ae3ecc8669506c9e9374fb8e79d5f9717ba45 Mon Sep 17 00:00:00 2001 From: PalNilsson Date: Mon, 6 Aug 2018 11:38:17 +0200 Subject: [PATCH] Overwritten "Payload exceeded max allowed memory" fix --- ATLASExperiment.py | 7 +++++++ CHANGES | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/ATLASExperiment.py b/ATLASExperiment.py index 9542de6c..10ed2dea 100644 --- a/ATLASExperiment.py +++ b/ATLASExperiment.py @@ -1439,6 +1439,13 @@ def interpretPayloadStdout(self, job, res, getstatusoutput_was_interrupted, curr else: job.pilotErrorDiag = "Payload failed due to unknown reason (check payload stdout)" job.result[2] = error.ERR_UNKNOWN + + # Any errors due to signals can be ignored if the job was killed because of out of memory + if os.path.exists(os.path.join(job.workdir, "MEMORYEXCEEDED")): + tolog("Ignoring any previously detected errors (like signals) since MEMORYEXCEEDED file was found") + job.pilotErrorDiag = "Payload exceeded maximum allowed memory" + job.result[2] = error.ERR_PAYLOADEXCEEDMAXMEM + tolog("!!FAILED!!3000!! %s" % (job.pilotErrorDiag)) # set the trf diag error diff --git a/CHANGES b/CHANGES index 69199e1d..5404aff7 100644 --- a/CHANGES +++ b/CHANGES @@ -113,6 +113,10 @@ Log tailing (requested by R. Walker) list_replicas() - Specifying --pfn in rucio download, stageIn(), which will prevent list_replicas() from being used on server side (rucio_sitemover) +Overwritten "Payload exceeded max allowed memory" fix +- Now setting ERR_PAYLOADEXCEEDMAXMEM if MEMORYEXCEEDED file detected at the end of interpretPayloadStdout() to prevent + signal error from being set instead. Requested by R. Walker (ATLASExperiment) + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// TODO: