diff --git a/chew/common.py b/chew/common.py index 01d052a..07dd9fb 100644 --- a/chew/common.py +++ b/chew/common.py @@ -125,9 +125,11 @@ class Site: alt: str -def load_sites(genome_release: str) -> typing.List[Site]: +def load_sites(genome_release: str, sites_suffix: str) -> typing.List[Site]: logger.info("Loading sites .bed.gz for %s", genome_release) - path_gz = os.path.join(os.path.dirname(__file__), "data", f"{genome_release}_sites.bed.gz") + path_gz = os.path.join( + os.path.dirname(__file__), "data", f"{genome_release}_{sites_suffix}.bed.gz" + ) result = [] with gzip.open(path_gz, "rt") as inputf: for line in inputf: diff --git a/chew/data/GRCh37_sitesX.bed.gz b/chew/data/GRCh37_sitesX.bed.gz index 5320024..325c543 100644 Binary files a/chew/data/GRCh37_sitesX.bed.gz and b/chew/data/GRCh37_sitesX.bed.gz differ diff --git a/chew/data/GRCh38_sitesX.bed.gz b/chew/data/GRCh38_sitesX.bed.gz index 555372b..891e87c 100644 Binary files a/chew/data/GRCh38_sitesX.bed.gz and b/chew/data/GRCh38_sitesX.bed.gz differ diff --git a/chew/fingerprint.py b/chew/fingerprint.py index 53aca73..1339c0a 100644 --- a/chew/fingerprint.py +++ b/chew/fingerprint.py @@ -178,7 +178,7 @@ def snps_step_call( logger.info("Reading sites BED (%s)...", bed_file) sites = { "%s%s:%s" % (chr_prefix, site.chrom, site.pos): (0, 0, float("nan")) - for site in load_sites(genome_release) + for site in load_sites(genome_release, sites_suffix) } logger.info("Converting VCF to fingerprint...") with vcfpy.Reader.from_path(path_calls) as vcf_reader: @@ -346,7 +346,7 @@ def run(config: Config): else: samtools_idxstats_out = None - if config.step_bcftools_roh: + if config.step_bcftools_roh and autosomal_fingerprint: roh_txt_contents = bcftools_roh_step( sample=sample, release=genome_release, autosomal_fingerprint=autosomal_fingerprint ) diff --git a/chew/roh.py b/chew/roh.py index b253fad..c77ff4e 100644 --- a/chew/roh.py +++ b/chew/roh.py @@ -53,7 +53,7 @@ def create_vcf_header(sample: str, release: str) -> vcfpy.Header: def write_vcf(tmpdir: str, sample: str, release: str, autosomal_fingerprint) -> str: logger.info("Constructing VCF header...") vcf_header = create_vcf_header(sample, release) - sites = load_sites(release) + sites = load_sites(release, "sites") autosomal_mask = autosomal_fingerprint[0] autosomal_is_alt = autosomal_fingerprint[1] autosomal_hom_alt = autosomal_fingerprint[2] diff --git a/chew/stats.py b/chew/stats.py index b7eff8b..07c5208 100644 --- a/chew/stats.py +++ b/chew/stats.py @@ -109,9 +109,14 @@ def compute_sample_stats(container) -> SampleStats: header = extract_header(container) autosomal_fingerprint = container["autosomal_fingerprint"] - autosomal_mask = autosomal_fingerprint[0] - autosomal_is_alt = autosomal_fingerprint[1] - autosomal_hom_alt = autosomal_fingerprint[2] + if autosomal_fingerprint: + autosomal_mask = autosomal_fingerprint[0] + autosomal_is_alt = autosomal_fingerprint[1] + autosomal_hom_alt = autosomal_fingerprint[2] + else: + autosomal_mask = np.zeros(0, dtype=bool) + autosomal_is_alt = np.zeros(0, dtype=bool) + autosomal_hom_alt = np.zeros(0, dtype=bool) if "autosomal_aafs" in header.fields: var_het = compute_autosomal_aafs(container)