From 8c81f6cb72f0c7f91a3495c6b97da41464b42f78 Mon Sep 17 00:00:00 2001 From: Ward D Date: Wed, 22 Feb 2023 14:11:27 +0100 Subject: [PATCH] bugfixes (#127) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - sanitize é - fex uploads can be disabled in the config - mismatch changes in the demuxsheet are reflected in the email + mqc report as well. --- ChangeLog | 2 + dissectBCL.ini | 1 + docs/config.rst | 1 + src/dissectBCL/demux.py | 23 ++++++++++-- src/dissectBCL/fakeNews.py | 75 +++++++++++++++++++++----------------- src/dissectBCL/misc.py | 2 + tests/test_demux.py | 5 ++- 7 files changed, 71 insertions(+), 38 deletions(-) diff --git a/ChangeLog b/ChangeLog index d3a4a37..964e65d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,8 @@ CHANGES ======= +* changelog + v0.2.0 ------ diff --git a/dissectBCL.ini b/dissectBCL.ini index 6948ff0..29cd72d 100644 --- a/dissectBCL.ini +++ b/dissectBCL.ini @@ -10,6 +10,7 @@ tempDir=/path/to/tempDir [Internals] PIs=[pi1,pi2,pi3,pi4,pi5] seqDir=seqfolderstr +fex=False [parkour] pullURL=parkour.pull.url/api diff --git a/docs/config.rst b/docs/config.rst index 45c6991..6e04f8c 100644 --- a/docs/config.rst +++ b/docs/config.rst @@ -40,6 +40,7 @@ Inside this block there are two elements: #. PIs: a list of principal investigators. #. seqDir: the directory inside a PI's directory where the sequencing data can be deposited. +#. fex: Boolean that indicates if an external project (PI not in PIs list) should be packed as a tar and uploaded using fexsend. If a project is from an internal PI, it will be copied over into: diff --git a/src/dissectBCL/demux.py b/src/dissectBCL/demux.py index 0109d7e..6d1da08 100644 --- a/src/dissectBCL/demux.py +++ b/src/dissectBCL/demux.py @@ -318,12 +318,18 @@ def readDemuxSheet(demuxSheet): We check for: - 'mask' (overridecycles) - indices used. + - mismatches definition ''' with open(demuxSheet) as f: sampleStatus = False nesLis = [] + mmdic = {} for line in f: line = line.strip() + if line.startswith('BarcodeMismatchesIndex1'): + mmdic['BarcodeMismatchesIndex1'] = int(line.split(',')[1]) + if line.startswith('BarcodeMismatchesIndex2'): + mmdic['BarcodeMismatchesIndex2'] = int(line.split(',')[1]) if line.startswith('OverrideCycles'): mask = line.replace( 'OverrideCycles', '' @@ -353,8 +359,7 @@ def readDemuxSheet(demuxSheet): mask except NameError: mask = None - - return (mask, df, dualIx) + return (mask, df, dualIx, mmdic) def parseStats(outputFolder, ssdf): @@ -444,7 +449,19 @@ def demux(sampleSheet, flowcell, config): logging.warning( "demuxSheet for {} already exists!".format(outLane) ) - manual_mask, manual_df, manual_dualIx = readDemuxSheet(demuxOut) + manual_mask, manual_df, manual_dualIx, man_mmdic = readDemuxSheet( + demuxOut + ) + if ( + sampleSheet.ssDic[outLane]['mismatch'] != man_mmdic + ): + logging.info( + "mismatch dic is changed from {} into {}".format( + sampleSheet.ssDic[outLane]['mismatch'], + man_mmdic + ) + ) + sampleSheet.ssDic[outLane]['mismatch'] = man_mmdic # if mask is changed, update: # Mask if ( diff --git a/src/dissectBCL/fakeNews.py b/src/dissectBCL/fakeNews.py index 027d49c..37d0cae 100644 --- a/src/dissectBCL/fakeNews.py +++ b/src/dissectBCL/fakeNews.py @@ -203,7 +203,6 @@ def multiQC_yaml(config, flowcell, ssDic, project, laneFolder): ssdf = ssDic['sampleSheet'][ ssDic['sampleSheet']['Sample_Project'] == project ] - # data string genstats mqcData = "# format: 'tsv'\n" mqcData += "# plot_type: 'generalstats'\n" @@ -464,40 +463,48 @@ def shipFiles(outPath, config): getDiskSpace(enduserBase)[1] )] else: - shipDicStat = "Uploaded" - laneStr = fqcPath.split('/')[-2] - # If the same tarball is already present, replace it. - fexList = check_output( - [ - 'fexsend', - '-l', - config['communication']['fromAddress'] - ] - ).decode("utf-8").replace("\n", " ").split(' ') - logging.info("fexList: {}".format(fexList)) - tarBall = laneStr + '_' + project + '.tar' - if tarBall in fexList: - fexRm = [ - 'fexsend', - '-d', - tarBall, + if config['Internals']['fex']: + shipDic[project] = "Ignored( by config)" + logging.info( + "No fex upload for {} because of config".format(project) + ) + else: + shipDicStat = "Uploaded" + laneStr = fqcPath.split('/')[-2] + # If the same tarball is already present, replace it. + fexList = check_output( + [ + 'fexsend', + '-l', + config['communication']['fromAddress'] + ] + ).decode("utf-8").replace("\n", " ").split(' ') + logging.info("fexList: {}".format(fexList)) + tarBall = laneStr + '_' + project + '.tar' + if tarBall in fexList: + fexRm = [ + 'fexsend', + '-d', + tarBall, + config['communication']['fromAddress'] + ] + logging.info( + "Purging {} existing fex with:".format(project) + ) + logging.info("fexRm") + fexdel = Popen(fexRm) + fexdel.wait() + shipDicStat = "Replaced" + fexer = "tar cf - {} {} | fexsend -s {}.tar {}".format( + projectPath, + fqcPath, + laneStr + '_' + project, config['communication']['fromAddress'] - ] - logging.info("Purging {} existing fex with:".format(project)) - logging.info("fexRm") - fexdel = Popen(fexRm) - fexdel.wait() - shipDicStat = "Replaced" - fexer = "tar cf - {} {} | fexsend -s {}.tar {}".format( - projectPath, - fqcPath, - laneStr + '_' + project, - config['communication']['fromAddress'] - ) - logging.info("Pushing {} to fex with:".format(project)) - logging.info(fexer) - os.system(fexer) - shipDic[project] = shipDicStat + ) + logging.info("Pushing {} to fex with:".format(project)) + logging.info(fexer) + os.system(fexer) + shipDic[project] = shipDicStat # Ship multiQC reports. seqFacDir = os.path.join( config['Dirs']['seqFacDir'], diff --git a/src/dissectBCL/misc.py b/src/dissectBCL/misc.py index c4f0935..006d253 100644 --- a/src/dissectBCL/misc.py +++ b/src/dissectBCL/misc.py @@ -357,6 +357,7 @@ def umlautDestroyer(germanWord): ''' _u = 'ü'.encode() + _ec = 'é'.encode() _U = 'Ü'.encode() _a = 'ä'.encode() _A = 'Ä'.encode() @@ -366,6 +367,7 @@ def umlautDestroyer(germanWord): _string = germanWord.encode() _string = _string.replace(_u, b'u') + _string = _string.replace(_ec, b'e') _string = _string.replace(_U, b'U') _string = _string.replace(_a, b'a') _string = _string.replace(_A, b'A') diff --git a/tests/test_demux.py b/tests/test_demux.py index d8ad37d..cbcfa40 100644 --- a/tests/test_demux.py +++ b/tests/test_demux.py @@ -22,10 +22,13 @@ def test_hamming2Mismatch(self): class Test_demuxSheet_Files(): def test_readDemuxSheet(self): - mask, df, dualIx = readDemuxSheet(os.path.join( + mask, df, dualIx, manDic = readDemuxSheet(os.path.join( os.path.dirname(os.path.realpath(__file__)), 'test_demux', 'demuxSheet.csv' )) + assert manDic == { + 'BarcodeMismatchesIndex1':1, 'BarcodeMismatchesIndex2':1 + } assert mask == 'Y101;I8N2;I8N16;Y101' assert df.size == 75 assert all(df.columns == pd.Index(