Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added an option to the scramble phenopackets command to allow for a local cached ontology file #360

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion src/pheval/cli_pheval_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,17 +106,29 @@ def semsim_scramble_command(
default="noisy_phenopackets",
type=Path,
)
@click.option(
"--local-ontology-cache",
"-l",
metavar="PATH",
required=False,
help="Path to the local ontology cache, e.g., path to the hp.obo.",
default=None,
type=Path,
)
def scramble_phenopackets_command(
phenopacket_path: Path,
phenopacket_dir: Path,
scramble_factor: float,
output_dir: Path,
local_ontology_cache: Path,
):
"""Generate noisy phenopackets from existing ones."""
if phenopacket_path is None and phenopacket_dir is None:
raise InputError("Either a phenopacket or phenopacket directory must be specified")
else:
scramble_phenopackets(output_dir, phenopacket_path, phenopacket_dir, scramble_factor)
scramble_phenopackets(
output_dir, phenopacket_path, phenopacket_dir, scramble_factor, local_ontology_cache
)


@click.command("semsim-comparison")
Expand Down
38 changes: 27 additions & 11 deletions src/pheval/prepare/create_noisy_phenopackets.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,20 @@
)


def load_ontology():
def load_ontology(local_cached_ontology: Path = None) -> ProntoImplementation:
"""
Load the Human Phenotype Ontology (HPO).

Args:
local_cached_ontology(Path): Path to the local cached ontology.
Returns:
ProntoImplementation: An instance of ProntoImplementation containing the loaded HPO.
"""
resource = OntologyResource(slug="hp.obo", local=False)
return ProntoImplementation(resource)
if local_cached_ontology is None:
resource = OntologyResource(slug="hp.obo", local=False)
return ProntoImplementation(resource)
else:
resource = OntologyResource(slug=local_cached_ontology, local=True)
return ProntoImplementation(resource)


class HpoRandomiser:
Expand Down Expand Up @@ -203,7 +208,7 @@ def add_noise_to_phenotypic_profile(


def create_scrambled_phenopacket(
output_dir: Path, phenopacket_path: Path, scramble_factor: float
output_dir: Path, phenopacket_path: Path, scramble_factor: float, local_cached_ontology: Path
) -> None:
"""
Create a scrambled version of a Phenopacket.
Expand All @@ -212,8 +217,9 @@ def create_scrambled_phenopacket(
output_dir (Path): The directory to store the output scrambled Phenopacket.
phenopacket_path (Path): The path to the original Phenopacket file.
scramble_factor (float): A factor determining the level of scrambling for phenotypic features.
local_cached_ontology (Path): The path to the local cached ontology.
"""
ontology = load_ontology()
ontology = load_ontology(local_cached_ontology)
hpo_randomiser = HpoRandomiser(ontology, scramble_factor)
phenopacket = phenopacket_reader(phenopacket_path)
created_noisy_phenopacket = add_noise_to_phenotypic_profile(
Expand All @@ -227,7 +233,7 @@ def create_scrambled_phenopacket(


def create_scrambled_phenopackets(
output_dir: Path, phenopacket_dir: Path, scramble_factor: float
output_dir: Path, phenopacket_dir: Path, scramble_factor: float, local_cached_ontology: Path
) -> None:
"""
Create scrambled versions of Phenopackets within a directory.
Expand All @@ -236,8 +242,9 @@ def create_scrambled_phenopackets(
output_dir (Path): The directory to store the output scrambled Phenopackets.
phenopacket_dir (Path): The directory containing the original Phenopacket files.
scramble_factor (float): A factor determining the level of scrambling for phenotypic features.
local_cached_ontology (Path): The path to the local cached ontology.
"""
ontology = load_ontology()
ontology = load_ontology(local_cached_ontology)
hpo_randomiser = HpoRandomiser(ontology, scramble_factor)
phenopacket_files = files_with_suffix(phenopacket_dir, ".json")
for phenopacket_path in phenopacket_files:
Expand All @@ -252,7 +259,11 @@ def create_scrambled_phenopackets(


def scramble_phenopackets(
output_dir: Path, phenopacket_path: Path, phenopacket_dir: Path, scramble_factor: float
output_dir: Path,
phenopacket_path: Path,
phenopacket_dir: Path,
scramble_factor: float,
local_cached_ontology: Path,
) -> None:
"""
Create scrambled phenopackets from either a single phenopacket or a directory of phenopackets.
Expand All @@ -262,9 +273,14 @@ def scramble_phenopackets(
phenopacket_path (Path): The path to a single Phenopacket file (if applicable).
phenopacket_dir (Path): The directory containing multiple Phenopacket files (if applicable).
scramble_factor (float): A factor determining the level of scrambling for phenotypic features.
local_cached_ontology (Path): The path to the local cached ontology.
"""
output_dir.mkdir(exist_ok=True)
if phenopacket_path is not None:
create_scrambled_phenopacket(output_dir, phenopacket_path, scramble_factor)
create_scrambled_phenopacket(
output_dir, phenopacket_path, scramble_factor, local_cached_ontology
)
elif phenopacket_dir is not None:
create_scrambled_phenopackets(output_dir, phenopacket_dir, scramble_factor)
create_scrambled_phenopackets(
output_dir, phenopacket_dir, scramble_factor, local_cached_ontology
)
Loading