Skip to content

Commit

Permalink
Merge pull request #60 from monarch-initiative/59-allow-for-multiple-…
Browse files Browse the repository at this point in the history
…output-formats-from-exomiser

59 allow for multiple output formats from exomiser
  • Loading branch information
julesjacobsen authored Jun 10, 2024
2 parents ce16ddd + c4db66f commit 16adc74
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 6 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ tool_specific_configuration_options:
phenotype_data_version: 2302
cache_type:
cache_caffeine_spec:
output_formats: [JSON,HTML] # options include HTML, JSON, TSV_VARIANT, TSV_GENE, VCF
post_process:
score_name: combinedScore
sort_order: DESCENDING
Expand Down
1 change: 1 addition & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ tool_specific_configuration_options:
# either none, simple, or caffeine
cache_type: none
cache_caffeine_spec:
output_formats: JSON # can be HTML, JSON, TSV_VARIANT, TSV_GENE, VCF
post_process:
# For Exomiser, valid ranking methods include combinedScore, priorityScore, variantScore or pValue
score_name: combinedScore
Expand Down
34 changes: 28 additions & 6 deletions src/pheval_exomiser/prepare/create_batch_commands.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#!/usr/bin/python
import tempfile
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
from typing import List, Optional

import click
from phenopackets import Family, Phenopacket
Expand Down Expand Up @@ -30,9 +29,10 @@ class ExomiserCommandLineArguments:
raw_results_dir: Path or None = None
variant_analysis: bool or None = None
output_options_file: Optional[Path] = None
output_formats: List[str] or None = None


def get_all_files_from_output_opt_directory(output_options_dir: Path) -> list[Path] or None:
def get_all_files_from_output_opt_directory(output_options_dir: Path) -> List[Path] or None:
"""Obtain all output options files if directory is specified - otherwise returns none."""
return None if output_options_dir is None else all_files(output_options_dir)

Expand All @@ -46,10 +46,11 @@ def __init__(
phenopacket_path: Path,
phenopacket: Phenopacket or Family,
variant_analysis: bool,
output_options_dir_files: list[Path] or None,
output_options_dir_files: List[Path] or None,
output_options_file: Path or None,
raw_results_dir: Path or None,
analysis_yaml: Path or None,
output_formats: List[str] or None,
):
self.environment = environment
self.phenopacket_path = phenopacket_path
Expand All @@ -59,6 +60,7 @@ def __init__(
self.output_options_file = output_options_file
self.results_dir = raw_results_dir
self.analysis_yaml = analysis_yaml
self.output_formats = output_formats

def assign_output_options_file(self) -> Path or None:
"""Return the path of a single output option yaml if specified,
Expand Down Expand Up @@ -86,13 +88,15 @@ def add_phenotype_only_arguments(self) -> ExomiserCommandLineArguments:
else None
),
raw_results_dir=RAW_RESULTS_TARGET_DIRECTORY_DOCKER,
output_formats=self.output_formats,
)
elif self.environment == "local":
return ExomiserCommandLineArguments(
sample=Path(self.phenopacket_path),
variant_analysis=self.variant_analysis,
output_options_file=output_options_file,
raw_results_dir=self.results_dir,
output_formats=self.output_formats,
)

def add_variant_analysis_arguments(self, vcf_dir: Path) -> ExomiserCommandLineArguments:
Expand All @@ -109,6 +113,7 @@ def add_variant_analysis_arguments(self, vcf_dir: Path) -> ExomiserCommandLineAr
variant_analysis=self.variant_analysis,
raw_results_dir=self.results_dir,
analysis_yaml=self.analysis_yaml,
output_formats=self.output_formats,
)
elif self.environment == "docker":
return ExomiserCommandLineArguments(
Expand Down Expand Up @@ -143,7 +148,8 @@ def create_command_arguments(
output_options_dir: Path or None = None,
output_options_file: Path or None = None,
analysis_yaml: Path or None = None,
) -> list[ExomiserCommandLineArguments]:
output_formats: List[str] or None = None,
) -> List[ExomiserCommandLineArguments]:
"""Return a list of Exomiser command line arguments for a directory of phenopackets."""
phenopacket_paths = files_with_suffix(phenopacket_dir, ".json")
commands = []
Expand All @@ -160,6 +166,7 @@ def create_command_arguments(
output_options_file,
results_dir,
analysis_yaml,
output_formats,
).add_command_line_arguments(vcf_dir)
)
return commands
Expand Down Expand Up @@ -212,10 +219,22 @@ def write_output_options(self, command_arguments: ExomiserCommandLineArguments)
except IOError:
print("Error writing ", self.file)

def write_output_format(self, command_arguments: ExomiserCommandLineArguments) -> None:
"""Write output formats for Exomiser raw result output."""
try:
(
self.file.write(" --output-format " + ",".join(command_arguments.output_formats))
if command_arguments.output_formats is not None
else None
)
except IOError:
print("Error writing ", self.file)

def write_analysis_command(self, command_arguments: ExomiserCommandLineArguments):
self.write_basic_analysis_command(command_arguments)
self.write_results_dir(command_arguments)
self.write_output_options(command_arguments)
self.write_output_format(command_arguments)
self.file.write("\n")

def write_basic_phenotype_only_command(
Expand All @@ -239,6 +258,7 @@ def write_basic_phenotype_only_command(
def write_phenotype_only_command(self, command_arguments: ExomiserCommandLineArguments):
self.write_basic_phenotype_only_command(command_arguments)
self.write_output_options(command_arguments)
self.write_output_format(command_arguments)
self.file.write("\n")

def write_local_commands(self, command_arguments: ExomiserCommandLineArguments):
Expand All @@ -261,7 +281,7 @@ class BatchFileWriter:

def __init__(
self,
command_arguments_list: list[ExomiserCommandLineArguments],
command_arguments_list: List[ExomiserCommandLineArguments],
variant_analysis: bool,
output_dir: Path,
batch_prefix: str,
Expand Down Expand Up @@ -326,6 +346,7 @@ def create_batch_file(
results_dir: Path,
output_options_dir: Path = None,
output_options_file: Path = None,
output_formats: List[str] = None,
) -> None:
"""Create Exomiser batch files."""
command_arguments = create_command_arguments(
Expand All @@ -337,6 +358,7 @@ def create_batch_file(
output_options_dir,
output_options_file,
analysis,
output_formats,
)
(
BatchFileWriter(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pathlib import Path
from typing import List

from pydantic import BaseModel, Field

Expand Down Expand Up @@ -54,6 +55,7 @@ class ExomiserConfigurations(BaseModel):
analysis_configuration_file (Path): The file name of the analysis configuration file located in the input_dir
max_jobs (int): Maximum number of jobs to run in a batch
application_properties (ApplicationProperties): application.properties configurations
output_formats: List(str): List of raw output formats.
post_process (PostProcessing): Post-processing configurations
"""

Expand All @@ -62,4 +64,5 @@ class ExomiserConfigurations(BaseModel):
analysis_configuration_file: Path = Field(...)
max_jobs: int = Field(...)
application_properties: ApplicationProperties = Field(...)
output_formats: List[str] = Field(None)
post_process: PostProcessing = Field(...)
6 changes: 6 additions & 0 deletions src/pheval_exomiser/run/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ def prepare_batch_files(
"""Prepare the exomiser batch files"""
print("...preparing batch files...")
vcf_dir_name = Path(testdata_dir).joinpath("vcf")
output_formats = (
config.output_formats + ["JSON"]
if config.output_formats and "JSON" not in config.output_formats
else config.output_formats
)
create_batch_file(
environment=config.environment,
analysis=input_dir.joinpath(config.analysis_configuration_file),
Expand All @@ -43,6 +48,7 @@ def prepare_batch_files(
output_options_dir=None,
results_dir=raw_results_dir,
variant_analysis=variant_analysis,
output_formats=output_formats,
)


Expand Down
11 changes: 11 additions & 0 deletions tests/test_create_batch_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ def setUpClass(cls) -> None:
output_options_file=None,
raw_results_dir=Path("/path/to/results_dir"),
analysis_yaml=Path("/path/to/exomiser_analysis.yaml"),
output_formats=["JSON"],
)
cls.command_creator_output_options_file = CommandCreator(
environment="local",
Expand All @@ -152,6 +153,7 @@ def setUpClass(cls) -> None:
),
raw_results_dir=Path("/path/to/results_dir"),
analysis_yaml=Path("/path/to/exomiser_analysis.yaml"),
output_formats=["JSON"],
)
cls.command_creator_none = CommandCreator(
environment="local",
Expand All @@ -162,6 +164,7 @@ def setUpClass(cls) -> None:
output_options_file=None,
raw_results_dir=Path("/path/to/results_dir"),
analysis_yaml=Path("/path/to/exomiser_analysis.yaml"),
output_formats=["JSON"],
)
cls.command_creator_phenotype_only = CommandCreator(
environment="local",
Expand All @@ -172,6 +175,7 @@ def setUpClass(cls) -> None:
output_options_file=None,
raw_results_dir=Path("/path/to/results_dir"),
analysis_yaml=None,
output_formats=["JSON", "HTML"],
)
cls.command_creator_phenotype_only_output_options = CommandCreator(
environment="local",
Expand All @@ -184,6 +188,7 @@ def setUpClass(cls) -> None:
),
raw_results_dir=Path("/path/to/results_dir"),
analysis_yaml=None,
output_formats=["JSON"],
)

def test_assign_output_options_file_from_dir(self):
Expand All @@ -210,6 +215,7 @@ def test_add_phenotype_only_arguments(self):
vcf_assembly=None,
raw_results_dir=Path("/path/to/results_dir"),
variant_analysis=False,
output_formats=["JSON", "HTML"],
),
)

Expand All @@ -225,6 +231,7 @@ def test_add_phenotype_only_arguments_output_options(self):
output_options_file=Path(
"/full/path/to/some/alternate/output_options/phenopacket-output_options.yml"
),
output_formats=["JSON"],
),
)

Expand All @@ -243,6 +250,7 @@ def test_add_variant_analysis_arguments(self):
"/full/path/to/some/alternate/output_options/phenopacket-output_options.yml"
),
analysis_yaml=Path("/path/to/exomiser_analysis.yaml"),
output_formats=["JSON"],
),
)

Expand All @@ -256,6 +264,7 @@ def test_add_variant_analysis_arguments_none(self):
raw_results_dir=Path("/path/to/results_dir"),
variant_analysis=False,
analysis_yaml=Path("/path/to/exomiser_analysis.yaml"),
output_formats=["JSON"],
),
)

Expand All @@ -274,6 +283,7 @@ def test_add_command_line_arguments(self):
"/full/path/to/some/alternate/output_options/phenopacket-output_options.yml"
),
analysis_yaml=Path("/path/to/exomiser_analysis.yaml"),
output_formats=["JSON"],
),
)

Expand All @@ -286,5 +296,6 @@ def test_add_command_line_arguments_phenotype_only(self):
vcf_assembly=None,
raw_results_dir=Path("/path/to/results_dir"),
variant_analysis=False,
output_formats=["JSON", "HTML"],
),
)
1 change: 1 addition & 0 deletions tests/test_write_application_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def setUp(cls) -> None:
hg19_whitelist_path="2302_hg19_clinvar_whitelist.tsv.gz",
hg38_whitelist_path="2302_hg38_clinvar_whitelist.tsv.gz",
),
output_formats=["JSON"],
post_process=PostProcessing(score_name="combinedScore", sort_order="descending"),
),
)
Expand Down

0 comments on commit 16adc74

Please sign in to comment.