-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* fix name * keep stderr open for common convention * add useful script * update how singletons are counted * fix the last missing bc * alphanumeric check
- Loading branch information
Showing
7 changed files
with
98 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#! /usr/bin/env python | ||
|
||
import os | ||
import re | ||
import sys | ||
import argparse | ||
import subprocess | ||
import pysam | ||
|
||
parser = argparse.ArgumentParser( | ||
prog='separate_singletons', | ||
description='Isolate singleton and non-singleton linked-read BAM records into separate files.', | ||
usage = "separate_singletons -t threads -b barcode_tag -s singletons.bam input.bam > output.bam", | ||
) | ||
parser.add_argument("-b", dest = "bx_tag", metavar = "barcode_tag", type=str, default = "BX", help="The header tag with the barcode (default: %(default)s)") | ||
parser.add_argument("-s", dest = "singletons", metavar = "singletons_file", type=str, default = "singletons.bam", help="Name of output singleton file (default: %(default)s)") | ||
parser.add_argument("-t", dest = "threads", metavar="threads", type=int, default = 4, help="Number of threads to use (default: %(default)s)") | ||
parser.add_argument('input', type = str, help = "Input bam file") | ||
if len(sys.argv) == 1: | ||
parser.print_help(sys.stderr) | ||
sys.exit(1) | ||
|
||
args = parser.parse_args() | ||
if args.threads <1: | ||
parser.error("Threads supplied to -t ({args.threads}) must be positive (e.g. >1)") | ||
if not os.path.exists(args.input): | ||
parser.error(f"{args.input} was not found") | ||
if len(args.bx_tag) != 2 or args.bx_tag.isalnum(): | ||
parser.error(f"The header tag supplied to -b ({args.bx_tag}) must be alphanumeric and exactly two characters long") | ||
|
||
invalid_pattern = re.compile(r'[AaBbCcDd]00') | ||
sorted_bam = f"{args.input[:-4]}.bxsort.bam" | ||
subprocess.run(f"samtools sort -@ {args.threads} -o {sorted_bam} -t {args.bx_tag} {args.input}".split(), stderr=sys.stderr) | ||
with ( | ||
pysam.AlignmentFile(sorted_bam, "rb", check_sq=False) as infile, | ||
pysam.AlignmentFile(sys.stdout, "wb", template=infile) as nonsingleton, | ||
pysam.AlignmentFile(args.singletons, "wb", template=infile) as singleton, | ||
): | ||
record_store = [] | ||
read_count = 0 | ||
last_barcode = None | ||
for record in infile: | ||
try: | ||
barcode = record.get_tag(args.bx_tag) | ||
if isinstance(barcode, int): | ||
pass # an int from an MI-tharype tag | ||
elif invalid_pattern.search(barcode): | ||
continue | ||
except KeyError: | ||
continue | ||
# write the stored records when the barcode changes | ||
if last_barcode and barcode != last_barcode: | ||
if read_count > 1: | ||
[nonsingleton.write(i) for i in record_store] | ||
else: | ||
[singleton.write(i) for i in record_store] | ||
# reset the record store and read count | ||
record_store = [] | ||
read_count = 0 | ||
|
||
record_store.append(record) | ||
if record.is_forward: | ||
# +1 for a forward read, whether it is paired or not | ||
read_count += 1 | ||
elif record.is_reverse and not record.is_paired: | ||
# +1 for reverse only if it's unpaired, so the paired read doesn't count twice | ||
read_count += 1 | ||
# update the last barcode with the current one | ||
last_barcode = barcode | ||
# After the for loop ends | ||
if record_store: | ||
if read_count > 1: | ||
for i in record_store: | ||
nonsingleton.write(i) | ||
else: | ||
for i in record_store: | ||
singleton.write(i) | ||
|
||
# final housekeeping to remove intermediate | ||
os.remove(sorted_bam) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,9 @@ | ||
#! /usr/bin/env bash | ||
|
||
if [[ -z "$1" ]]; then | ||
echo -e "\n Split a BAM file with BX:Z tags into 2 files, one with valid ACBD barcodes (stdout), one with invalid ACBD barcodes (stderr)." | ||
echo -e "\n [usage] separate_validbx input.bam > valid.bam 2> invalid.bam" | ||
echo -e "\n Split a BAM file with BX:Z tags into 2 files, one with valid ACBD barcodes (stdout), one with invalid ACBD barcodes." | ||
echo -e "\n [usage] separate_validbx invalid.bam input.bam > valid.bam" | ||
exit | ||
fi | ||
|
||
samtools view -e '[BX]!~"[ABCD]0{2,4}"' --unoutput /dev/stderr $1 | ||
samtools view -e '[BX]!~"[ABCD]0{2,4}"' --unoutput $1 $2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters