From f746c85ac04f6b53850688cb3a2b6a4d983aef1a Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Mon, 27 Nov 2023 10:52:39 +0100 Subject: [PATCH 1/4] rename attribute `_families` to `_mfs` --- .../metabolomics/gnps/gnps_molecular_family_loader.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py b/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py index 125300c2..9c1fd689 100644 --- a/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py +++ b/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py @@ -32,7 +32,7 @@ def __init__(self, file: str | PathLike): >>> print(loader.families[0].spectra_ids) {'1', '3', '7', ...} """ - self._families: list[MolecularFamily | SingletonFamily] = [] + self._mfs: list[MolecularFamily | SingletonFamily] = [] self._file = file self._validate() @@ -46,7 +46,7 @@ def families(self) -> list[MolecularFamily]: list[MolecularFamily]: List of all molecular family objects with their spectra ids. """ - return self._families + return self._mfs def _validate(self): """Validate the GNPS molecular family file.""" @@ -93,8 +93,8 @@ def _load(self) -> None: for spectrum_id in spectra_ids: family = SingletonFamily() ## uuid as family id family.spectra_ids = set([spectrum_id]) - self._families.append(family) + self._mfs.append(family) else: family = MolecularFamily(family_id) family.spectra_ids = spectra_ids - self._families.append(family) + self._mfs.append(family) From 6e31783493f071b68e188bfa03c1f2fa28640d3d Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Wed, 22 Nov 2023 09:18:40 +0100 Subject: [PATCH 2/4] change property `families` to method `get_mfs` - change property to method - rename `families` to `get_mfs` --- src/nplinker/metabolomics/abc.py | 3 +-- src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py | 3 +-- tests/metabolomics/test_gnps_molecular_family_loader.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/nplinker/metabolomics/abc.py b/src/nplinker/metabolomics/abc.py index 97a83424..d27e5632 100644 --- a/src/nplinker/metabolomics/abc.py +++ b/src/nplinker/metabolomics/abc.py @@ -13,9 +13,8 @@ def spectra(self) -> Sequence[Spectrum]: class MolecularFamilyLoaderBase(ABC): - @property @abstractmethod - def families(self) -> Sequence[MolecularFamily]: + def get_mfs(self) -> Sequence[MolecularFamily]: ... diff --git a/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py b/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py index 9c1fd689..00aa733f 100644 --- a/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py +++ b/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py @@ -38,8 +38,7 @@ def __init__(self, file: str | PathLike): self._validate() self._load() - @property - def families(self) -> list[MolecularFamily]: + def get_mfs(self) -> list[MolecularFamily]: """Get all molecular families. Returns: diff --git a/tests/metabolomics/test_gnps_molecular_family_loader.py b/tests/metabolomics/test_gnps_molecular_family_loader.py index 0285c70d..cd428782 100644 --- a/tests/metabolomics/test_gnps_molecular_family_loader.py +++ b/tests/metabolomics/test_gnps_molecular_family_loader.py @@ -9,7 +9,7 @@ ) def test_has_molecular_families(workflow, num_families, num_spectra, gnps_mf_files): loader = GNPSMolecularFamilyLoader(gnps_mf_files[workflow]) - actual = loader.families + actual = loader.get_mfs assert len(actual) == num_families # test molecular family with id "1" has correct number of spectra ids mf = [mf for mf in actual if mf.family_id == "1"][0] From 97f099e1acbaa50431a53a817a053777efb18a9e Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Mon, 27 Nov 2023 10:57:00 +0100 Subject: [PATCH 3/4] add parameter `keep_singleton` to method `get_mfs` --- src/nplinker/metabolomics/abc.py | 13 +++++++++++-- .../gnps/gnps_molecular_family_loader.py | 18 +++++++++++++----- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/nplinker/metabolomics/abc.py b/src/nplinker/metabolomics/abc.py index d27e5632..c1a6fd76 100644 --- a/src/nplinker/metabolomics/abc.py +++ b/src/nplinker/metabolomics/abc.py @@ -14,8 +14,17 @@ def spectra(self) -> Sequence[Spectrum]: class MolecularFamilyLoaderBase(ABC): @abstractmethod - def get_mfs(self) -> Sequence[MolecularFamily]: - ... + def get_mfs(self, keep_singleton: bool) -> Sequence[MolecularFamily]: + """Get MolecularFamily objects. + + Args: + keep_singleton(bool): True to keep singleton molecular families. A + singleton molecular family is a molecular family that contains + only one spectrum. + + Returns: + Sequence[MolecularFamily]: a list of MolecularFamily objects. + """ class FileMappingLoaderBase(ABC): diff --git a/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py b/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py index 00aa733f..e2083e68 100644 --- a/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py +++ b/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py @@ -38,14 +38,22 @@ def __init__(self, file: str | PathLike): self._validate() self._load() - def get_mfs(self) -> list[MolecularFamily]: - """Get all molecular families. + def get_mfs(self, keep_singleton: bool = False) -> list[MolecularFamily]: + """Get MolecularFamily objects. + + Args: + keep_singleton(bool): True to keep singleton molecular families. A + singleton molecular family is a molecular family that contains + only one spectrum. Returns: - list[MolecularFamily]: List of all molecular family objects with - their spectra ids. + list[MolecularFamily]: A list of MolecularFamily objects with their + spectra ids. """ - return self._mfs + mfs = self._mfs + if not keep_singleton: + mfs = [mf for mf in mfs if not mf.is_singleton()] + return mfs def _validate(self): """Validate the GNPS molecular family file.""" From efe2148ac61236bee0eaea3ff4acbdadf93380e6 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Mon, 27 Nov 2023 11:15:18 +0100 Subject: [PATCH 4/4] update unit tests for `GNPSMolecularFamilyLoader` --- .../test_gnps_molecular_family_loader.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/metabolomics/test_gnps_molecular_family_loader.py b/tests/metabolomics/test_gnps_molecular_family_loader.py index cd428782..9f3ad4f3 100644 --- a/tests/metabolomics/test_gnps_molecular_family_loader.py +++ b/tests/metabolomics/test_gnps_molecular_family_loader.py @@ -4,12 +4,22 @@ @pytest.mark.parametrize( - "workflow, num_families, num_spectra", - [(GNPSFormat.SNETS, 25769, 19), (GNPSFormat.SNETSV2, 6902, 10), (GNPSFormat.FBMN, 1105, 5)], + "workflow, num_families, num_spectra, keep_singleton", + [ + (GNPSFormat.SNETS, 25769, 19, True), + (GNPSFormat.SNETSV2, 6902, 10, True), + (GNPSFormat.FBMN, 1105, 5, True), + (GNPSFormat.SNETS, 29, 19, False), + (GNPSFormat.SNETSV2, 72, 10, False), + (GNPSFormat.FBMN, 60, 5, False), + ], ) -def test_has_molecular_families(workflow, num_families, num_spectra, gnps_mf_files): +def test_gnps_molecular_family_loader( + workflow, num_families, num_spectra, keep_singleton, gnps_mf_files +): + """Test GNPSMolecularFamilyLoader class.""" loader = GNPSMolecularFamilyLoader(gnps_mf_files[workflow]) - actual = loader.get_mfs + actual = loader.get_mfs(keep_singleton=keep_singleton) assert len(actual) == num_families # test molecular family with id "1" has correct number of spectra ids mf = [mf for mf in actual if mf.family_id == "1"][0]