-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
use python instead of awk to extract diamond reads
- Loading branch information
1 parent
0ada1f5
commit f8843f3
Showing
14 changed files
with
103 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#!/usr/bin/env python | ||
|
||
import argparse | ||
import subprocess | ||
import os | ||
|
||
def parse_args(args=None): | ||
Description = "Extract reads of specified taxonomic ID from the output of DIAMOND/blastx classification." | ||
parser = argparse.ArgumentParser(description=Description) | ||
parser.add_argument("--tsv", required=True, help="Path to the DIAMOND output TSV file.") | ||
parser.add_argument("--taxid", required=True, help="Taxonomic ID to extract the reads") | ||
parser.add_argument("--evalue", required=True, help="A e-value to filter the DIAMOND classification result.") | ||
parser.add_argument("--prefix", required=True, help="Prefix for output files") | ||
parser.add_argument("--single_end", action="store_true", help="Flag for single_end reads") | ||
parser.add_argument("--fastq", required=True, nargs='+', help="Paths to input FASTQ files.") | ||
|
||
|
||
return parser.parse_args(args) | ||
|
||
def extract_reads_by_taxid(tsv_path, taxid, evalue, fastq_paths, single_end, prefix): | ||
read_id_file = f"{prefix}_readID.txt" | ||
|
||
# Step1: Filter the DIAMOND tsv file line-by-line and collect read IDs by taxid. | ||
with open(tsv_path, 'r') as tsv, open(read_id_file, 'w') as out: | ||
for line in tsv: | ||
parts = line.strip().split('\t') | ||
current_taxid = parts[1] | ||
if current_taxid == taxid and parts[2] < evalue: | ||
out.write(parts[0] + "\n") | ||
|
||
# Step2: Extract reads using seqkit | ||
output_files = [] | ||
|
||
if single_end: | ||
output_file = f"{prefix}_{taxid}.extracted_diamond_reads.fastq" | ||
subprocess.run(["seqkit", "grep", "-f", read_id_file, fastq_paths[0], "-o", output_file], check=True) | ||
output_files.append(output_file) | ||
else: | ||
output_file1 = f"{prefix}_{taxid}.extracted_diamond_read1.fastq" | ||
output_file2 = f"{prefix}_{taxid}.extracted_diamond_read2.fastq" | ||
subprocess.run(["seqkit", "grep", "-f", read_id_file, fastq_paths[0], "-o", output_file1], check=True) | ||
subprocess.run(["seqkit", "grep", "-f", read_id_file, fastq_paths[1], "-o", output_file2], check=True) | ||
output_files.extend([output_file1, output_file2]) | ||
|
||
# Clean up temporary files | ||
os.remove(read_id_file) | ||
|
||
return output_files | ||
|
||
def main(args=None): | ||
args = parse_args(args) | ||
extract_reads_by_taxid( | ||
tsv_path=args.tsv, | ||
taxid=args.taxid, | ||
evalue=args.evalue, | ||
prefix=args.prefix, | ||
single_end=args.single_end, | ||
fastq_paths=args.fastq | ||
) | ||
|
||
if __name__ == "__main__": | ||
main() | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
name: extractdiamondreads | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
dependencies: | ||
- bioconda::seqkit=2.9.0 | ||
- conda-forge::python=3.13.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters