Skip to content

Commit

Permalink
fix: resolving chrX het/hom issue (#68) (#69)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Feb 21, 2024
1 parent 62990a7 commit 15cec04
Show file tree
Hide file tree
Showing 8 changed files with 26 additions and 17 deletions.
6 changes: 4 additions & 2 deletions chew/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,11 @@ class Site:
alt: str


def load_sites(genome_release: str) -> typing.List[Site]:
def load_sites(genome_release: str, sites_suffix: str) -> typing.List[Site]:
logger.info("Loading sites .bed.gz for %s", genome_release)
path_gz = os.path.join(os.path.dirname(__file__), "data", f"{genome_release}_sites.bed.gz")
path_gz = os.path.join(
os.path.dirname(__file__), "data", f"{genome_release}_{sites_suffix}.bed.gz"
)
result = []
with gzip.open(path_gz, "rt") as inputf:
for line in inputf:
Expand Down
Binary file modified chew/data/GRCh37_sitesX.bed.gz
Binary file not shown.
Binary file modified chew/data/GRCh38_sitesX.bed.gz
Binary file not shown.
4 changes: 2 additions & 2 deletions chew/fingerprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def snps_step_call(
logger.info("Reading sites BED (%s)...", bed_file)
sites = {
"%s%s:%s" % (chr_prefix, site.chrom, site.pos): (0, 0, float("nan"))
for site in load_sites(genome_release)
for site in load_sites(genome_release, sites_suffix)
}
logger.info("Converting VCF to fingerprint...")
with vcfpy.Reader.from_path(path_calls) as vcf_reader:
Expand Down Expand Up @@ -346,7 +346,7 @@ def run(config: Config):
else:
samtools_idxstats_out = None

if config.step_bcftools_roh:
if config.step_bcftools_roh and autosomal_fingerprint is not None:
roh_txt_contents = bcftools_roh_step(
sample=sample, release=genome_release, autosomal_fingerprint=autosomal_fingerprint
)
Expand Down
2 changes: 1 addition & 1 deletion chew/roh.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def create_vcf_header(sample: str, release: str) -> vcfpy.Header:
def write_vcf(tmpdir: str, sample: str, release: str, autosomal_fingerprint) -> str:
logger.info("Constructing VCF header...")
vcf_header = create_vcf_header(sample, release)
sites = load_sites(release)
sites = load_sites(release, "sites")
autosomal_mask = autosomal_fingerprint[0]
autosomal_is_alt = autosomal_fingerprint[1]
autosomal_hom_alt = autosomal_fingerprint[2]
Expand Down
11 changes: 8 additions & 3 deletions chew/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,14 @@ def compute_sample_stats(container) -> SampleStats:
header = extract_header(container)

autosomal_fingerprint = container["autosomal_fingerprint"]
autosomal_mask = autosomal_fingerprint[0]
autosomal_is_alt = autosomal_fingerprint[1]
autosomal_hom_alt = autosomal_fingerprint[2]
if autosomal_fingerprint:
autosomal_mask = autosomal_fingerprint[0]
autosomal_is_alt = autosomal_fingerprint[1]
autosomal_hom_alt = autosomal_fingerprint[2]
else:
autosomal_mask = np.zeros(0, dtype=bool)
autosomal_is_alt = np.zeros(0, dtype=bool)
autosomal_hom_alt = np.zeros(0, dtype=bool)

if "autosomal_aafs" in header.fields:
var_het = compute_autosomal_aafs(container)
Expand Down
13 changes: 7 additions & 6 deletions requirements/base.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
attrs
cattrs
click
logzero
tqdm
numpy
vcfpy
pysam
pandas
plotly
pyarrow
pysam
scipy
click
attrs
cattrs
tqdm
vcfpy
7 changes: 4 additions & 3 deletions tests/test_run_fingerprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def test_smoke_test_run_fingerprint(cli_runner, path_tests, tmpdir):
"--input-bam",
str(path_tests / "data" / "igsr.HG00102.TP73.bam"),
"--output-fingerprint",
str(tmpdir / "out"),
str(tmpdir / "out.npz"),
],
)

Expand Down Expand Up @@ -59,7 +59,7 @@ def test_smoke_test_run_fingerprint(cli_runner, path_tests, tmpdir):
def test_fingerprint_bam_grch37(
cli_runner: CliRunner, tmpdir: LocalPath, path_tests: Path, path_ref: str, path_bam: str
):
cli_runner.invoke(
result = cli_runner.invoke(
cli,
[
"fingerprint",
Expand All @@ -68,9 +68,10 @@ def test_fingerprint_bam_grch37(
"--input-bam",
path_bam,
"--output-fingerprint",
str(tmpdir / "out"),
str(tmpdir / "out.npz"),
],
)
assert result.exit_code == 0, result

# Check that output path exists and is similar to all finger prints.
output = tmpdir / "out.npz"
Expand Down

0 comments on commit 15cec04

Please sign in to comment.