Skip to content

Commit

Permalink
MiSeq (#146)
Browse files Browse the repository at this point in the history
  • Loading branch information
WardDeb authored May 31, 2023
2 parents a9d1b29 + b431b6b commit fcb70a3
Show file tree
Hide file tree
Showing 8 changed files with 264 additions and 189 deletions.
2 changes: 1 addition & 1 deletion contaminome.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ eukaryotes:
taxid: 10090
Drosophila melanogaster:
URL: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/215/GCF_000001215.4_Release_6_plus_ISO1_MT/GCF_000001215.4_Release_6_plus_ISO1_MT_genomic.fna.gz
vulgarname: drosophila
vulgarname: fly
accession: GCF_000001215.4
taxid: 7227
Aedes aegypti:
Expand Down
1 change: 1 addition & 0 deletions dissectBCL.ini
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ kraken2db=/path/to/kraken2_contaminome/contaminomedb

[misc]
mpiImg=/path/to/multiqc_headerimg.jpg
krakenExpl="<font size="2"> Kraken is used to classify the reads and to detect contamination. <br> For this we use a *custom* database, with a simplified taxonomical hierarchy (that no longer resembles any true taxonomical classification. <br> In brief, by default we screen for: <li><b>eukaryotes</b> (human, mouse, fly, mosquito, lamprey, medaka, c-elegans, yeast, zebrafish and the moss-piglet)</li> <li><b>prokaryotes</b> (Ecoli, pseudomonas, mycoplasma and haemophilus influenza)</li> <li><b>viruses</b> (sars-cov2, influenza A,B & C, norwalk virus, rhinoviruses, drosophila C virus, phiX and lambda phage )</li> <li><b>custom databases</b> (ERCC spikes, univec core DB)</li> Note that for human, mouse, fly and mosquito we scan for mitochondrial and ribosomal contamination separately). <br> Only the top (most abundant) five hits and unclassified hits are shown, all other hits are grouped under an 'other' tag.</font>"

[communication]
deepSeq=email@seqfacility.de
Expand Down
22 changes: 22 additions & 0 deletions src/dissectBCL/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,30 @@ def parseRunInfo(self):
flowcellID = i.text
return seqRecipe, lanes, instrument, flowcellID

# Validate successful run.
def validateRunCompletion(self):
"""
validates succesfull completion status in xml.
"""
logging.info("validateRunCompletion")
if self.sequencer == 'Miseq':
tree = ET.parse(self.runCompletionStatus)
root = tree.getroot()
for i in root.iter():
if i.tag == 'CompletionStatus':
_status = i.text
else:
# no RunCompletionStatus.xml in novaseq, assume succes.
_status = 'SuccessfullyCompleted'
return (_status)

def __init__(
self,
name,
bclPath,
origSS,
runInfo,
runCompStat,
inBaseDir,
outBaseDir,
logFile,
Expand All @@ -101,12 +119,14 @@ def __init__(
self.bclPath = bclPath
self.origSS = origSS
self.runInfo = runInfo
self.runCompletionStatus = runCompStat
self.inBaseDir = inBaseDir
self.outBaseDir = outBaseDir
self.logFile = logFile
self.config = config
# Run filesChecks
self.filesExist()
self.succesfullrun = self.validateRunCompletion()
# populate runInfo vars.
self.seqRecipe, \
self.lanes, \
Expand All @@ -121,6 +141,8 @@ def asdict(self):
'bclPath': self.bclPath,
'original sampleSheet': self.origSS,
'runInfo': self.runInfo,
'runCompletionStatus': self.runCompletionStatus,
'sucessfulRun': self.succesfullrun,
'inBaseDir': self.inBaseDir,
'outBaseDir': self.outBaseDir,
'dissect logFile': self.logFile,
Expand Down
302 changes: 166 additions & 136 deletions src/dissectBCL/demux.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,150 +441,180 @@ def parseStats(outputFolder, ssdf):

def demux(sampleSheet, flowcell, config):
logging.warning("Demux module")
for outLane in sampleSheet.ssDic:
logging.info("Demuxing {}".format(outLane))
# Check outDir
outputFolder = os.path.join(flowcell.outBaseDir, outLane)
if not os.path.exists(outputFolder):
os.mkdir(outputFolder)
logging.info("{} created.".format(outputFolder))
else:
logging.info("{} already exists. Moving on.".format(outputFolder))
# Write the demuxSheet in the outputfolder
demuxOut = os.path.join(outputFolder, "demuxSheet.csv")
# Don't remake if demuxSheet exist
if not os.path.exists(demuxOut):
logging.info("Writing demuxSheet for {}".format(outLane))
writeDemuxSheet(
demuxOut,
sampleSheet.ssDic[outLane],
sampleSheet.laneSplitStatus
)
else:
logging.warning(
"demuxSheet for {} already exists!".format(outLane)
)
manual_mask, manual_df, manual_dualIx, man_mmdic = readDemuxSheet(
demuxOut
# Double check for run failure
if flowcell.succesfullrun != 'SuccessfullyCompleted':
for outLane in sampleSheet.ssDic:
outputFolder = os.path.join(
flowcell.outBaseDir, outLane
)
if (
sampleSheet.ssDic[outLane]['mismatch'] != man_mmdic
):
logging.info(
"mismatch dic is changed from {} into {}".format(
sampleSheet.ssDic[outLane]['mismatch'],
man_mmdic
)
)
sampleSheet.ssDic[outLane]['mismatch'] = man_mmdic
# if mask is changed, update:
# Mask
if (
'mask' in sampleSheet.ssDic[outLane]
and manual_mask != sampleSheet.ssDic[outLane]['mask']
):
logging.info(
"Mask is changed from {} into {}.".format(
sampleSheet.ssDic[outLane]['mask'],
manual_mask
)
)
sampleSheet.ssDic[outLane]['mask'] = manual_mask
# dualIx status
if (
'dualIx' in sampleSheet.ssDic[outLane]
and manual_dualIx != sampleSheet.ssDic[outLane]['dualIx']
):
if not os.path.exists(outputFolder):
os.mkdir(outputFolder)
Path(
os.path.join(outputFolder, 'run.failed')
).touch()
return ('sequencingfailed')
else:
for outLane in sampleSheet.ssDic:
logging.info("Demuxing {}".format(outLane))
# Check outDir
outputFolder = os.path.join(flowcell.outBaseDir, outLane)
if not os.path.exists(outputFolder):
os.mkdir(outputFolder)
logging.info("{} created.".format(outputFolder))
else:
logging.info(
"dualIx is changed from {} into {}.".format(
sampleSheet.ssDic[outLane]['dualIx'],
manual_dualIx
)
"{} already exists. Moving on.".format(outputFolder)
)
sampleSheet.ssDic[outLane]['dualIx'] = manual_dualIx

# sampleSheet
sampleSheet.ssDic[outLane]['sampleSheet'] = matchingSheets(
sampleSheet.ssDic[outLane]['sampleSheet'],
manual_df
)
# Check for 'bak file' existence.
if os.path.exists(demuxOut + '.bak'):
sampleSheet.ssDic[outLane]['P5RC'] = True
# Don't run bcl-convert if we have the touched flag.
if not os.path.exists(
os.path.join(outputFolder, 'bclconvert.done')
):
# Run bcl-convert
bclOpts = [
config['software']['bclconvert'],
'--output-directory', outputFolder,
'--force',
'--bcl-input-directory', flowcell.bclPath,
'--sample-sheet', demuxOut,
'--bcl-num-conversion-threads', "20",
'--bcl-num-compression-threads', "20",
"--bcl-sampleproject-subdirectories", "true",
]
if not sampleSheet.laneSplitStatus:
bclOpts.append('--no-lane-splitting')
bclOpts.append('true')
logging.info("Starting BCLConvert")
logging.info(" ".join(bclOpts))
bclRunner = Popen(
bclOpts,
stdout=PIPE
)
exitcode = bclRunner.wait()
if exitcode == 0:
logging.info("bclConvert exit {}".format(exitcode))
if flowcell.succesfullrun != 'SuccessfullyCompleted':
print("In failure if.")
Path(
os.path.join(outputFolder, 'bclconvert.done')
os.path.join(outputFolder, 'run.failed')
).touch()
if flowcell.sequencer == 'MiSeq':
if differentialDiagnosis(
outputFolder,
sampleSheet.ssDic[outLane]['dualIx'],
):
logging.info("P5 RC triggered.")
# Purge existing reports.
logging.info("Purge existing Reports folder")
shutil.rmtree(
os.path.join(outputFolder, 'Reports')
)
bclRunner = Popen(
bclOpts,
stdout=PIPE
mailHome(
"{} ignored".format(flowcell.name),
"RunCompletionStatus is not successfullycompleted.\n" +
"Marked for failure and ignored for the future.",
config,
toCore=True
)
break
# Write the demuxSheet in the outputfolder
demuxOut = os.path.join(outputFolder, "demuxSheet.csv")
# Don't remake if demuxSheet exist
if not os.path.exists(demuxOut):
logging.info("Writing demuxSheet for {}".format(outLane))
writeDemuxSheet(
demuxOut,
sampleSheet.ssDic[outLane],
sampleSheet.laneSplitStatus
)
else:
logging.warning(
"demuxSheet for {} already exists!".format(outLane)
)
man_mask, man_df, man_dualIx, man_mmdic = readDemuxSheet(
demuxOut
)
if (
sampleSheet.ssDic[outLane]['mismatch'] != man_mmdic
):
logging.info(
"mismatch dic is changed from {} into {}".format(
sampleSheet.ssDic[outLane]['mismatch'],
man_mmdic
)
exitcode = bclRunner.wait()
logging.info(
"bclConvert P5fix exit {}".format(exitcode)
)
sampleSheet.ssDic[outLane]['mismatch'] = man_mmdic
# if mask is changed, update:
# Mask
if (
'mask' in sampleSheet.ssDic[outLane]
and man_mask != sampleSheet.ssDic[outLane]['mask']
):
logging.info(
"Mask is changed from {} into {}.".format(
sampleSheet.ssDic[outLane]['mask'],
man_mask
)
# Update the sampleSheet with proper RC'ed indices.
sampleSheet.ssDic[outLane][
'sampleSheet'
] = matchingSheets(
sampleSheet.ssDic[outLane]['sampleSheet'],
readDemuxSheet(demuxOut, what='df')
)
sampleSheet.ssDic[outLane]['mask'] = man_mask
# dualIx status
if (
'dualIx' in sampleSheet.ssDic[outLane]
and man_dualIx != sampleSheet.ssDic[outLane]['dualIx']
):
logging.info(
"dualIx is changed from {} into {}.".format(
sampleSheet.ssDic[outLane]['dualIx'],
man_dualIx
)
sampleSheet.ssDic[outLane]['P5RC'] = True
)
sampleSheet.ssDic[outLane]['dualIx'] = man_dualIx

# sampleSheet
sampleSheet.ssDic[outLane]['sampleSheet'] = matchingSheets(
sampleSheet.ssDic[outLane]['sampleSheet'],
man_df
)
# Check for 'bak file' existence.
if os.path.exists(demuxOut + '.bak'):
sampleSheet.ssDic[outLane]['P5RC'] = True
else:
sampleSheet.ssDic[outLane]['P5RC'] = False
else:
logging.critical("bclConvert exit {}".format(exitcode))
mailHome(
outLane,
'BCL-convert exit {}. Investigate.'.format(
exitcode
),
config,
toCore=True
)
sys.exit(1)
# Don't run bcl-convert if we have the touched flag.
if not os.path.exists(
os.path.join(outputFolder, 'bclconvert.done')
):
# Run bcl-convert
bclOpts = [
config['software']['bclconvert'],
'--output-directory', outputFolder,
'--force',
'--bcl-input-directory', flowcell.bclPath,
'--sample-sheet', demuxOut,
'--bcl-num-conversion-threads', "20",
'--bcl-num-compression-threads', "20",
"--bcl-sampleproject-subdirectories", "true",
]
if not sampleSheet.laneSplitStatus:
bclOpts.append('--no-lane-splitting')
bclOpts.append('true')
logging.info("Starting BCLConvert")
logging.info(" ".join(bclOpts))
bclRunner = Popen(
bclOpts,
stdout=PIPE
)
exitcode = bclRunner.wait()
if exitcode == 0:
logging.info("bclConvert exit {}".format(exitcode))
Path(
os.path.join(outputFolder, 'bclconvert.done')
).touch()
if flowcell.sequencer == 'MiSeq':
if differentialDiagnosis(
outputFolder,
sampleSheet.ssDic[outLane]['dualIx'],
):
logging.info("P5 RC triggered.")
# Purge existing reports.
logging.info("Purge existing Reports folder")
shutil.rmtree(
os.path.join(outputFolder, 'Reports')
)
bclRunner = Popen(
bclOpts,
stdout=PIPE
)
exitcode = bclRunner.wait()
logging.info(
"bclConvert P5fix exit {}".format(exitcode)
)
# Update the sampleSheet with proper RC'ed indices.
sampleSheet.ssDic[outLane][
'sampleSheet'
] = matchingSheets(
sampleSheet.ssDic[outLane]['sampleSheet'],
readDemuxSheet(demuxOut, what='df')
)
sampleSheet.ssDic[outLane]['P5RC'] = True
else:
sampleSheet.ssDic[outLane]['P5RC'] = False
else:
logging.critical("bclConvert exit {}".format(exitcode))
mailHome(
outLane,
'BCL-convert exit {}. Investigate.'.format(
exitcode
),
config,
toCore=True
)
sys.exit(1)

logging.info("Parsing stats for {}".format(outLane))
sampleSheet.ssDic[outLane]['sampleSheet'] = parseStats(
outputFolder,
sampleSheet.ssDic[outLane]['sampleSheet']
)
return (0)
logging.info("Parsing stats for {}".format(outLane))
sampleSheet.ssDic[outLane]['sampleSheet'] = parseStats(
outputFolder,
sampleSheet.ssDic[outLane]['sampleSheet']
)
return (0)
Loading

0 comments on commit fcb70a3

Please sign in to comment.