diff --git a/chew/common.py b/chew/common.py index 01d052a..07dd9fb 100644 --- a/chew/common.py +++ b/chew/common.py @@ -125,9 +125,11 @@ class Site: alt: str -def load_sites(genome_release: str) -> typing.List[Site]: +def load_sites(genome_release: str, sites_suffix: str) -> typing.List[Site]: logger.info("Loading sites .bed.gz for %s", genome_release) - path_gz = os.path.join(os.path.dirname(__file__), "data", f"{genome_release}_sites.bed.gz") + path_gz = os.path.join( + os.path.dirname(__file__), "data", f"{genome_release}_{sites_suffix}.bed.gz" + ) result = [] with gzip.open(path_gz, "rt") as inputf: for line in inputf: diff --git a/chew/data/GRCh37_sitesX.bed.gz b/chew/data/GRCh37_sitesX.bed.gz index 5320024..325c543 100644 Binary files a/chew/data/GRCh37_sitesX.bed.gz and b/chew/data/GRCh37_sitesX.bed.gz differ diff --git a/chew/data/GRCh38_sitesX.bed.gz b/chew/data/GRCh38_sitesX.bed.gz index 555372b..d4a5d44 100644 Binary files a/chew/data/GRCh38_sitesX.bed.gz and b/chew/data/GRCh38_sitesX.bed.gz differ diff --git a/chew/fingerprint.py b/chew/fingerprint.py index 53aca73..10644d6 100644 --- a/chew/fingerprint.py +++ b/chew/fingerprint.py @@ -178,7 +178,7 @@ def snps_step_call( logger.info("Reading sites BED (%s)...", bed_file) sites = { "%s%s:%s" % (chr_prefix, site.chrom, site.pos): (0, 0, float("nan")) - for site in load_sites(genome_release) + for site in load_sites(genome_release, sites_suffix) } logger.info("Converting VCF to fingerprint...") with vcfpy.Reader.from_path(path_calls) as vcf_reader: @@ -346,7 +346,7 @@ def run(config: Config): else: samtools_idxstats_out = None - if config.step_bcftools_roh: + if config.step_bcftools_roh and autosomal_fingerprint is not None: roh_txt_contents = bcftools_roh_step( sample=sample, release=genome_release, autosomal_fingerprint=autosomal_fingerprint ) diff --git a/chew/roh.py b/chew/roh.py index b253fad..c77ff4e 100644 --- a/chew/roh.py +++ b/chew/roh.py @@ -53,7 +53,7 @@ def create_vcf_header(sample: str, release: str) -> vcfpy.Header: def write_vcf(tmpdir: str, sample: str, release: str, autosomal_fingerprint) -> str: logger.info("Constructing VCF header...") vcf_header = create_vcf_header(sample, release) - sites = load_sites(release) + sites = load_sites(release, "sites") autosomal_mask = autosomal_fingerprint[0] autosomal_is_alt = autosomal_fingerprint[1] autosomal_hom_alt = autosomal_fingerprint[2] diff --git a/chew/stats.py b/chew/stats.py index b7eff8b..07c5208 100644 --- a/chew/stats.py +++ b/chew/stats.py @@ -109,9 +109,14 @@ def compute_sample_stats(container) -> SampleStats: header = extract_header(container) autosomal_fingerprint = container["autosomal_fingerprint"] - autosomal_mask = autosomal_fingerprint[0] - autosomal_is_alt = autosomal_fingerprint[1] - autosomal_hom_alt = autosomal_fingerprint[2] + if autosomal_fingerprint: + autosomal_mask = autosomal_fingerprint[0] + autosomal_is_alt = autosomal_fingerprint[1] + autosomal_hom_alt = autosomal_fingerprint[2] + else: + autosomal_mask = np.zeros(0, dtype=bool) + autosomal_is_alt = np.zeros(0, dtype=bool) + autosomal_hom_alt = np.zeros(0, dtype=bool) if "autosomal_aafs" in header.fields: var_het = compute_autosomal_aafs(container) diff --git a/requirements/base.txt b/requirements/base.txt index cc5e100..5de11ed 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,11 +1,12 @@ +attrs +cattrs +click logzero -tqdm numpy -vcfpy -pysam pandas plotly +pyarrow +pysam scipy -click -attrs -cattrs +tqdm +vcfpy diff --git a/tests/test_run_fingerprint.py b/tests/test_run_fingerprint.py index b51a0e2..9e8695b 100644 --- a/tests/test_run_fingerprint.py +++ b/tests/test_run_fingerprint.py @@ -25,7 +25,7 @@ def test_smoke_test_run_fingerprint(cli_runner, path_tests, tmpdir): "--input-bam", str(path_tests / "data" / "igsr.HG00102.TP73.bam"), "--output-fingerprint", - str(tmpdir / "out"), + str(tmpdir / "out.npz"), ], ) @@ -59,7 +59,7 @@ def test_smoke_test_run_fingerprint(cli_runner, path_tests, tmpdir): def test_fingerprint_bam_grch37( cli_runner: CliRunner, tmpdir: LocalPath, path_tests: Path, path_ref: str, path_bam: str ): - cli_runner.invoke( + result = cli_runner.invoke( cli, [ "fingerprint", @@ -68,9 +68,10 @@ def test_fingerprint_bam_grch37( "--input-bam", path_bam, "--output-fingerprint", - str(tmpdir / "out"), + str(tmpdir / "out.npz"), ], ) + assert result.exit_code == 0, result # Check that output path exists and is similar to all finger prints. output = tmpdir / "out.npz"