From 854f093378b0eb4da4322d30d48a41ecf0cb268b Mon Sep 17 00:00:00 2001 From: Yasemin Bridges Date: Thu, 9 May 2024 13:48:56 +0100 Subject: [PATCH] remove phenopackets with no observed phenotypes --- src/pheval/prepare/prepare_corpus.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/pheval/prepare/prepare_corpus.py b/src/pheval/prepare/prepare_corpus.py index dc857ecd2..649585c54 100644 --- a/src/pheval/prepare/prepare_corpus.py +++ b/src/pheval/prepare/prepare_corpus.py @@ -39,6 +39,11 @@ def prepare_corpus( output_dir.joinpath("phenopackets").mkdir(exist_ok=True, parents=True) for phenopacket_path in all_files(phenopacket_dir): phenopacket_util = PhenopacketUtil(phenopacket_reader(phenopacket_path)) + if not phenopacket_util.observed_phenotypic_features(): + info_log.warning( + f"Removed {phenopacket_path.name} from the corpus due to no observed phenotypic features." + ) + continue if variant_analysis: if phenopacket_util.check_incomplete_variant_record(): info_log.warning(