From 7d4b8cfe0bbd1a29317f9577b7a839cbe44956e9 Mon Sep 17 00:00:00 2001 From: Pakman450 Date: Thu, 30 Mar 2023 00:37:39 -0400 Subject: [PATCH 01/10] feat: start mol2 reader --- datamol/io.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/datamol/io.py b/datamol/io.py index 5457625e..3369e64c 100644 --- a/datamol/io.py +++ b/datamol/io.py @@ -274,6 +274,45 @@ def to_sdf( writer.write(mol) writer.close() +def read_mol2file( + urlpath: Union[str, os.PathLike, IO], + sanitize: bool = True, + cleanupSubstructures: bool = True, + remove_hs: bool = True, + fail_if_invalid: bool = False, +) -> List[Mol]: + + with fsspec.open(urlpath, compression="infer") as f: + fReadLines = f.readlines() + for line in fReadLines: + # with fsspec.open(urlpath, mode="w") as f: + mols = rdmolfiles.MolFromMol2File( + urlpath, + sanitize=sanitize, + removeHs=remove_hs, + cleanupSubstructures=cleanupSubstructures + ) + + return mols + +# def read_mol2block( +# mol2block: str, +# sanitize: bool = True, +# cleanupSubstructures: bool = True, +# remove_hs: bool = True, +# fail_if_invalid: bool = False, +# ) -> Optional[dm.Mol]: + +# mol = rdmolfiles.MolFromMol2Block( +# mol2block, +# sanitize=sanitize, +# removeHs=remove_hs, +# cleanupSubstructures=cleanupSubstructures, +# ) +# if mol is None and fail_if_invalid: +# raise ValueError(f"Invalid molecule: {mol2block}") + +# return mol def read_molblock( molblock: str, From 88bc15af67a7c045c6ea426ab3a4f350241b058d Mon Sep 17 00:00:00 2001 From: Pakman450 Date: Thu, 30 Mar 2023 14:27:44 -0400 Subject: [PATCH 02/10] feat: complete read_mol2 function --- datamol/io.py | 61 ++++++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/datamol/io.py b/datamol/io.py index 3369e64c..e450fdbb 100644 --- a/datamol/io.py +++ b/datamol/io.py @@ -282,37 +282,44 @@ def read_mol2file( fail_if_invalid: bool = False, ) -> List[Mol]: + """Read a Mol2 File + + Args: + urlpath: Path to a file or a file-like object. Path can be remote or local. + sanitize: Whether to sanitize the molecules. + remove_hs: Whether to remove the existing hydrogens in the SDF files. + cleanupSubstructures: Whether to clean up substructure in the Mol2 Files. + fail_if_invalid: If set to true, the parser will raise an exception if the molecule is invalid + instead of returning None. + """ + + block=[] + mols=[] with fsspec.open(urlpath, compression="infer") as f: fReadLines = f.readlines() + #reversing due to ambiguous end line for mol2 files + fReadLines.reverse() for line in fReadLines: - # with fsspec.open(urlpath, mode="w") as f: - mols = rdmolfiles.MolFromMol2File( - urlpath, - sanitize=sanitize, - removeHs=remove_hs, - cleanupSubstructures=cleanupSubstructures - ) - - return mols - -# def read_mol2block( -# mol2block: str, -# sanitize: bool = True, -# cleanupSubstructures: bool = True, -# remove_hs: bool = True, -# fail_if_invalid: bool = False, -# ) -> Optional[dm.Mol]: + #ignores any header info + if b"#" not in line: + block.append(str(line,'utf-8')) + #since reversed, this is the 'end' a mol2 + if b'@MOLECULE' in line: + block.reverse() + mol2block = ",".join(block).replace(',','') + mol = rdmolfiles.MolFromMol2Block( + mol2block, + sanitize=sanitize, + removeHs=remove_hs, + cleanupSubstructures=cleanupSubstructures, + ) + if mol is None and fail_if_invalid: + raise ValueError(f"Invalid molecule: {mol2block}") + mols.append(mol) + block=[] -# mol = rdmolfiles.MolFromMol2Block( -# mol2block, -# sanitize=sanitize, -# removeHs=remove_hs, -# cleanupSubstructures=cleanupSubstructures, -# ) -# if mol is None and fail_if_invalid: -# raise ValueError(f"Invalid molecule: {mol2block}") - -# return mol + mols.reverse() + return mols def read_molblock( molblock: str, From ec90cb2b7f307a5824ba98972c8f5a5515315886 Mon Sep 17 00:00:00 2001 From: Pakman450 Date: Thu, 30 Mar 2023 14:31:02 -0400 Subject: [PATCH 03/10] build: add init info for read_mol2file --- datamol/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datamol/__init__.py b/datamol/__init__.py index f5f4aad9..99a3928a 100644 --- a/datamol/__init__.py +++ b/datamol/__init__.py @@ -119,6 +119,7 @@ from .io import to_sdf from .io import to_smi from .io import read_smi +from .io import read_mol2file from .io import read_molblock from .io import to_molblock from .io import to_xlsx From ef0115a4e57178293a2b0834401d420b33553713 Mon Sep 17 00:00:00 2001 From: Pakman450 Date: Thu, 30 Mar 2023 14:53:12 -0400 Subject: [PATCH 04/10] test: add test function for test_read_mol2 --- tests/data/test.mol2 | 62 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_io.py | 15 +++++++++++ 2 files changed, 77 insertions(+) create mode 100644 tests/data/test.mol2 diff --git a/tests/data/test.mol2 b/tests/data/test.mol2 new file mode 100644 index 00000000..7bb2aaa6 --- /dev/null +++ b/tests/data/test.mol2 @@ -0,0 +1,62 @@ +@MOLECULE +mol_first +11 11 1 0 0 +SMALL +AMBER ff14SB + +@ATOM + 1 C1 -0.0167 1.3778 0.0096 C.ar 1 UNK 0.0267 + 2 C2 0.0021 -0.0041 0.0020 C.ar 1 UNK -0.0438 + 3 C3 1.2218 -0.6631 -0.0131 C.ar 1 UNK -0.0592 + 4 C4 2.3820 0.0960 -0.0201 C.ar 1 UNK -0.0438 + 5 C5 2.2849 1.4746 -0.0118 C.ar 1 UNK 0.0267 + 6 N6 1.1072 2.0677 0.0026 N.ar 1 UNK -0.2647 + 7 H7 -0.9627 1.8988 0.0169 H 1 UNK 0.0840 + 8 H8 -0.9217 -0.5635 0.0075 H 1 UNK 0.0639 + 9 H9 1.2671 -1.7422 -0.0190 H 1 UNK 0.0624 + 10 H10 3.3495 -0.3839 -0.0316 H 1 UNK 0.0639 + 11 H11 3.1838 2.0731 -0.0171 H 1 UNK 0.0840 +@BOND + 1 1 6 ar + 2 1 2 ar + 3 1 7 1 + 4 2 3 ar + 5 2 8 1 + 6 3 4 ar + 7 3 9 1 + 8 4 5 ar + 9 4 10 1 + 10 5 6 ar + 11 5 11 1 +@SUBSTRUCTURE + 1 UNK 1 RESIDUE 4 A UNK 0 ROOT + +@MOLECULE +mol_sec +9 9 1 0 0 +SMALL +AMBER ff14SB + + +@ATOM + 1 C1 1.2973 -0.3859 -0.0124 C.2 1 UNK 0.0838 + 2 N2 0.0021 -0.0041 0.0020 N.pl3 1 UNK -0.3106 + 3 H3 -0.7708 -0.5902 0.0062 H 1 UNK 0.1532 + 4 C4 -0.0165 1.3646 0.0095 C.2 1 UNK 0.0120 + 5 C5 1.2671 1.7717 -0.0005 C.2 1 UNK 0.0422 + 6 N6 2.0482 0.6814 -0.0138 N.2 1 UNK -0.2480 + 7 H7 1.6529 -1.4057 -0.0216 H 1 UNK 0.1014 + 8 H8 -0.8923 1.9965 0.0173 H 1 UNK 0.0806 + 9 H9 1.6079 2.7966 0.0017 H 1 UNK 0.0854 +@BOND + 1 1 6 2 + 2 1 2 1 + 3 1 7 1 + 4 2 3 1 + 5 2 4 1 + 6 4 5 2 + 7 4 8 1 + 8 5 6 1 + 9 5 9 1 +@SUBSTRUCTURE + 1 UNK 1 RESIDUE 4 A UNK 0 ROOT \ No newline at end of file diff --git a/tests/test_io.py b/tests/test_io.py index b23d5281..ffd58963 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -240,6 +240,21 @@ def test_sdf_props_and_conformer_preserved(tmp_path): assert conf.Is3D() np.testing.assert_almost_equal(conf.GetPositions(), pos, decimal=4) +def test_read_mol2(datadir): + data_path = datadir / "test.mol2" + + # to list of mols + mols = dm.read_mol2file(data_path) + + for mol in mols: + assert isinstance(mol, Chem.rdchem.Mol) + + firstMol = dm.to_mol('c1ccncc1') + secondMol = dm.to_mol('c1c[nH]cn1') + + assert dm.same_mol(mols[0], firstMol) + assert dm.same_mol(mols[1], secondMol) + def test_read_save_molblock(): mol = dm.to_mol("Cn1c(=O)c2c(ncn2C)n(C)c1=O") From 1016d35a60bb3f352a1cc072a44b9ffa2ed8d7e3 Mon Sep 17 00:00:00 2001 From: Pakman450 Date: Thu, 30 Mar 2023 14:59:55 -0400 Subject: [PATCH 05/10] docs: copy and edit .rst file for mol2 reader --- news/my-feature-or-branch.rst | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 news/my-feature-or-branch.rst diff --git a/news/my-feature-or-branch.rst b/news/my-feature-or-branch.rst new file mode 100644 index 00000000..da589d92 --- /dev/null +++ b/news/my-feature-or-branch.rst @@ -0,0 +1,23 @@ +**Added:** + +* A multi-mol2 file reader that converts into rdkit objects + +**Changed:** + +* + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* + +**Security:** + +* From 39b2c7fe79a75b4bf143df9921cfa45989dc3c8a Mon Sep 17 00:00:00 2001 From: Pakman450 Date: Thu, 30 Mar 2023 17:47:00 -0400 Subject: [PATCH 06/10] fix: reformat io and test_io by using black --- datamol/io.py | 27 ++++++++++++++------------- tests/test_io.py | 7 ++++--- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/datamol/io.py b/datamol/io.py index e450fdbb..6fd5bf2b 100644 --- a/datamol/io.py +++ b/datamol/io.py @@ -274,14 +274,14 @@ def to_sdf( writer.write(mol) writer.close() + def read_mol2file( urlpath: Union[str, os.PathLike, IO], sanitize: bool = True, cleanupSubstructures: bool = True, remove_hs: bool = True, - fail_if_invalid: bool = False, + fail_if_invalid: bool = False, ) -> List[Mol]: - """Read a Mol2 File Args: @@ -292,21 +292,21 @@ def read_mol2file( fail_if_invalid: If set to true, the parser will raise an exception if the molecule is invalid instead of returning None. """ - - block=[] - mols=[] + + block = [] + mols = [] with fsspec.open(urlpath, compression="infer") as f: fReadLines = f.readlines() - #reversing due to ambiguous end line for mol2 files + # reversing due to ambiguous end line for mol2 files fReadLines.reverse() for line in fReadLines: - #ignores any header info + # ignores any header info if b"#" not in line: - block.append(str(line,'utf-8')) - #since reversed, this is the 'end' a mol2 - if b'@MOLECULE' in line: + block.append(str(line, "utf-8")) + # since reversed, this is the 'end' a mol2 + if b"@MOLECULE" in line: block.reverse() - mol2block = ",".join(block).replace(',','') + mol2block = ",".join(block).replace(",", "") mol = rdmolfiles.MolFromMol2Block( mol2block, sanitize=sanitize, @@ -316,11 +316,12 @@ def read_mol2file( if mol is None and fail_if_invalid: raise ValueError(f"Invalid molecule: {mol2block}") mols.append(mol) - block=[] - + block = [] + mols.reverse() return mols + def read_molblock( molblock: str, sanitize: bool = True, diff --git a/tests/test_io.py b/tests/test_io.py index ffd58963..af4f3499 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -240,17 +240,18 @@ def test_sdf_props_and_conformer_preserved(tmp_path): assert conf.Is3D() np.testing.assert_almost_equal(conf.GetPositions(), pos, decimal=4) + def test_read_mol2(datadir): data_path = datadir / "test.mol2" - + # to list of mols mols = dm.read_mol2file(data_path) for mol in mols: assert isinstance(mol, Chem.rdchem.Mol) - firstMol = dm.to_mol('c1ccncc1') - secondMol = dm.to_mol('c1c[nH]cn1') + firstMol = dm.to_mol("c1ccncc1") + secondMol = dm.to_mol("c1c[nH]cn1") assert dm.same_mol(mols[0], firstMol) assert dm.same_mol(mols[1], secondMol) From cf0e733dab36eac693d4f31ad1cbc9449c434d18 Mon Sep 17 00:00:00 2001 From: Pakman450 Date: Thu, 30 Mar 2023 18:57:19 -0400 Subject: [PATCH 07/10] fix: commit to snake case --- datamol/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datamol/io.py b/datamol/io.py index 6fd5bf2b..9dff20e3 100644 --- a/datamol/io.py +++ b/datamol/io.py @@ -275,7 +275,7 @@ def to_sdf( writer.close() -def read_mol2file( +def read_mol2_file( urlpath: Union[str, os.PathLike, IO], sanitize: bool = True, cleanupSubstructures: bool = True, From 57efe730d6c746941f552a7695600431ea3cda01 Mon Sep 17 00:00:00 2001 From: Pakman450 Date: Thu, 30 Mar 2023 18:57:47 -0400 Subject: [PATCH 08/10] tests: add None and raise exception case --- datamol/__init__.py | 2 +- tests/data/test.mol2 | 183 ++++++++++++++++++++++++++++++++++++++++++- tests/test_io.py | 20 ++++- 3 files changed, 199 insertions(+), 6 deletions(-) diff --git a/datamol/__init__.py b/datamol/__init__.py index 99a3928a..374cd491 100644 --- a/datamol/__init__.py +++ b/datamol/__init__.py @@ -119,7 +119,7 @@ from .io import to_sdf from .io import to_smi from .io import read_smi -from .io import read_mol2file +from .io import read_mol2_file from .io import read_molblock from .io import to_molblock from .io import to_xlsx diff --git a/tests/data/test.mol2 b/tests/data/test.mol2 index 7bb2aaa6..1abb2f4f 100644 --- a/tests/data/test.mol2 +++ b/tests/data/test.mol2 @@ -59,4 +59,185 @@ AMBER ff14SB 8 5 6 1 9 5 9 1 @SUBSTRUCTURE - 1 UNK 1 RESIDUE 4 A UNK 0 ROOT \ No newline at end of file + 1 UNK 1 RESIDUE 4 A UNK 0 ROOT + +@MOLECULE +mol_third +9 9 1 0 0 +SMALL +AMBER ff14SB + + +@ATOM + 1 C1 1.2973 -0.3859 -0.0124 C 1 UNK 0.0838 + 2 N2 0.0021 -0.0041 0.0020 N 1 UNK -0.3106 + 3 H3 -0.7708 -0.5902 0.0062 H 1 UNK 0.1532 + 4 C4 -0.0165 1.3646 0.0095 C 1 UNK 0.0120 + 5 C5 1.2671 1.7717 -0.0005 C 1 UNK 0.0422 + 6 N6 2.0482 0.6814 -0.0138 N 1 UNK -0.2480 + 7 H7 1.6529 -1.4057 -0.0216 H 1 UNK 0.1014 + 8 H8 -0.8923 1.9965 0.0173 H 1 UNK 0.0806 + 9 H9 1.6079 2.7966 0.0017 H 1 UNK 0.0854 +@BOND + 1 1 6 2 + 2 1 2 1 + 3 1 7 1 + 4 2 3 1 + 5 2 4 1 + 6 4 5 2 + 7 4 8 1 + 8 5 6 1 + 9 5 9 1 +@SUBSTRUCTURE + 1 UNK 1 RESIDUE 4 A UNK 0 ROOT + +@MOLECULE +mol_sec_f +9 9 1 0 0 +SMALL +AMBER ff14SB + + +@ATOM + 1 C1 1.2973 -0.3859 -0.0124 C.2 1 UNK 0.0838 + 2 N2 0.0021 -0.0041 0.0020 N.pl3 1 UNK -0.3106 + 3 H3 -0.7708 -0.5902 0.0062 H 1 UNK 0.1532 + 4 C4 -0.0165 1.3646 0.0095 C.2 1 UNK 0.0120 + 5 C5 1.2671 1.7717 -0.0005 C.2 1 UNK 0.0422 + 6 N6 2.0482 0.6814 -0.0138 N.2 1 UNK -0.2480 + 7 H7 1.6529 -1.4057 -0.0216 H 1 UNK 0.1014 + 8 H8 -0.8923 1.9965 0.0173 H 1 UNK 0.0806 + 9 H9 1.6079 2.7966 0.0017 H 1 UNK 0.0854 + + 1 1 6 2 + 2 1 2 1 + 3 1 7 1 + 4 2 3 1 + 5 2 4 1 + 6 4 5 2 + 7 4 8 1 + 8 5 6 1 + 9 5 9 1 +@SUBSTRUCTURE + 1 UNK 1 RESIDUE 4 A UNK 0 ROOT + +@MOLECULE +mol_sec_f1 +9 9 1 0 0 +SMALL +AMBER ff14SB + + + + 1 C1 1.2973 -0.3859 -0.0124 C.2 1 UNK 0.0838 + 2 N2 0.0021 -0.0041 0.0020 N.pl3 1 UNK -0.3106 + 3 H3 -0.7708 -0.5902 0.0062 H 1 UNK 0.1532 + 4 C4 -0.0165 1.3646 0.0095 C.2 1 UNK 0.0120 + 5 C5 1.2671 1.7717 -0.0005 C.2 1 UNK 0.0422 + 6 N6 2.0482 0.6814 -0.0138 N.2 1 UNK -0.2480 + 7 H7 1.6529 -1.4057 -0.0216 H 1 UNK 0.1014 + 8 H8 -0.8923 1.9965 0.0173 H 1 UNK 0.0806 + 9 H9 1.6079 2.7966 0.0017 H 1 UNK 0.0854 +@BOND + 1 1 6 2 + 2 1 2 1 + 3 1 7 1 + 4 2 3 1 + 5 2 4 1 + 6 4 5 2 + 7 4 8 1 + 8 5 6 1 + 9 5 9 1 +@SUBSTRUCTURE + 1 UNK 1 RESIDUE 4 A UNK 0 ROOT + +@MOLECULE +mol_sec_f3 +9 9 1 0 0 +SMALL +AMBER ff14SB + +@ATOM +@BOND + 1 1 6 2 + 2 1 2 1 + 3 1 7 1 + 4 2 3 1 + 5 2 4 1 + 6 4 5 2 + 7 4 8 1 + 8 5 6 1 + 9 5 9 1 +@SUBSTRUCTURE + 1 UNK 1 RESIDUE 4 A UNK 0 ROOT + +@MOLECULE +mol_sec_f4 +9 9 1 0 0 +SMALL +AMBER ff14SB + +@ATOM +@BOND +@SUBSTRUCTURE + 1 UNK 1 RESIDUE 4 A UNK 0 ROOT + + + +@MOLECULE + + + +@ATOM + 1 C1 1.2973 -0.3859 -0.0124 C.2 1 UNK 0.0838 + 2 N2 0.0021 -0.0041 0.0020 N.pl3 1 UNK -0.3106 + 3 H3 -0.7708 -0.5902 0.0062 H 1 UNK 0.1532 + 4 C4 -0.0165 1.3646 0.0095 C.2 1 UNK 0.0120 + 5 C5 1.2671 1.7717 -0.0005 C.2 1 UNK 0.0422 + 6 N6 2.0482 0.6814 -0.0138 N.2 1 UNK -0.2480 + 7 H7 1.6529 -1.4057 -0.0216 H 1 UNK 0.1014 + 8 H8 -0.8923 1.9965 0.0173 H 1 UNK 0.0806 + 9 H9 1.6079 2.7966 0.0017 H 1 UNK 0.0854 + + 1 1 6 2 + 2 1 2 1 + 3 1 7 1 + 4 2 3 1 + 5 2 4 1 + 6 4 5 2 + 7 4 8 1 + 8 5 6 1 + 9 5 9 1 +@SUBSTRUCTURE + 1 UNK 1 RESIDUE 4 A UNK 0 ROOT + +@MOLECULE +mol_sec +9 9 1 0 0 +SMALL +AMBER ff14SB + + +@ATOM + 1 C1 1.2973 -0.3859 -0.0124 C 1 UNK 0.0838 + 2 N2 0.0021 -0.0041 0.0020 N 1 UNK -0.3106 + 3 H3 -0.7708 -0.5902 0.0062 H 1 UNK 0.1532 + 4 C4 -0.0165 1.3646 0.0095 C 1 UNK 0.0120 + 5 C5 1.2671 1.7717 -0.0005 C 1 UNK 0.0422 + 6 N6 2.0482 0.6814 -0.0138 N 1 UNK -0.2480 + 7 H7 1.6529 -1.4057 -0.0216 H 1 UNK 0.1014 + 8 H8 -0.8923 1.9965 0.0173 H 1 UNK 0.0806 + 9 H9 1.6079 2.7966 0.0017 H 1 UNK 0.0854 +@BOND + 1 1 6 2 + 2 1 2 1 + 3 1 7 1 + 4 2 3 1 + 5 2 4 1 + 6 4 5 2 + 7 4 8 1 + 8 5 6 1 + 9 5 9 1 +@SUBSTRUCTURE + 1 UNK 1 RESIDUE 4 A UNK 0 ROOT + diff --git a/tests/test_io.py b/tests/test_io.py index af4f3499..37905ff6 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -245,16 +245,28 @@ def test_read_mol2(datadir): data_path = datadir / "test.mol2" # to list of mols - mols = dm.read_mol2file(data_path) - - for mol in mols: - assert isinstance(mol, Chem.rdchem.Mol) + mols = dm.read_mol2_file(data_path) + + assert isinstance(mols[0], Chem.rdchem.Mol) + assert isinstance(mols[1], Chem.rdchem.Mol) + assert isinstance(mols[2], Chem.rdchem.Mol) + # cases where mol2 formats are damaged + assert mols[3] is None + assert mols[4] is None + assert mols[5] is None + assert mols[6] is None + assert mols[7] is None firstMol = dm.to_mol("c1ccncc1") secondMol = dm.to_mol("c1c[nH]cn1") assert dm.same_mol(mols[0], firstMol) assert dm.same_mol(mols[1], secondMol) + assert dm.same_mol(mols[2], secondMol) + + # a case where exception is raised because of None values + with pytest.raises(ValueError): + mols = dm.read_mol2_file(data_path, fail_if_invalid=True) def test_read_save_molblock(): From 24b7cf43216f4b8bdda2f3f9bcccebb62451ff82 Mon Sep 17 00:00:00 2001 From: Pakman450 Date: Thu, 30 Mar 2023 19:24:31 -0400 Subject: [PATCH 09/10] fix: change to snake case for correct one --- datamol/__init__.py | 2 +- datamol/io.py | 6 +++--- tests/test_io.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/datamol/__init__.py b/datamol/__init__.py index 374cd491..99a3928a 100644 --- a/datamol/__init__.py +++ b/datamol/__init__.py @@ -119,7 +119,7 @@ from .io import to_sdf from .io import to_smi from .io import read_smi -from .io import read_mol2_file +from .io import read_mol2file from .io import read_molblock from .io import to_molblock from .io import to_xlsx diff --git a/datamol/io.py b/datamol/io.py index 9dff20e3..731d0281 100644 --- a/datamol/io.py +++ b/datamol/io.py @@ -275,10 +275,10 @@ def to_sdf( writer.close() -def read_mol2_file( +def read_mol2file( urlpath: Union[str, os.PathLike, IO], sanitize: bool = True, - cleanupSubstructures: bool = True, + cleanup_substructures: bool = True, remove_hs: bool = True, fail_if_invalid: bool = False, ) -> List[Mol]: @@ -311,7 +311,7 @@ def read_mol2_file( mol2block, sanitize=sanitize, removeHs=remove_hs, - cleanupSubstructures=cleanupSubstructures, + cleanupSubstructures=cleanup_substructures, ) if mol is None and fail_if_invalid: raise ValueError(f"Invalid molecule: {mol2block}") diff --git a/tests/test_io.py b/tests/test_io.py index 37905ff6..2e29a00f 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -245,7 +245,7 @@ def test_read_mol2(datadir): data_path = datadir / "test.mol2" # to list of mols - mols = dm.read_mol2_file(data_path) + mols = dm.read_mol2file(data_path) assert isinstance(mols[0], Chem.rdchem.Mol) assert isinstance(mols[1], Chem.rdchem.Mol) @@ -266,7 +266,7 @@ def test_read_mol2(datadir): # a case where exception is raised because of None values with pytest.raises(ValueError): - mols = dm.read_mol2_file(data_path, fail_if_invalid=True) + mols = dm.read_mol2file(data_path, fail_if_invalid=True) def test_read_save_molblock(): From 5f1b80560b0e50bc7326254796bbc854f66d3961 Mon Sep 17 00:00:00 2001 From: Pakman450 Date: Thu, 30 Mar 2023 19:44:15 -0400 Subject: [PATCH 10/10] fix: fix build docs by changing to snakecase --- datamol/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datamol/io.py b/datamol/io.py index 731d0281..935d66e9 100644 --- a/datamol/io.py +++ b/datamol/io.py @@ -288,7 +288,7 @@ def read_mol2file( urlpath: Path to a file or a file-like object. Path can be remote or local. sanitize: Whether to sanitize the molecules. remove_hs: Whether to remove the existing hydrogens in the SDF files. - cleanupSubstructures: Whether to clean up substructure in the Mol2 Files. + cleanup_substructures: Whether to clean up substructure in the Mol2 Files. fail_if_invalid: If set to true, the parser will raise an exception if the molecule is invalid instead of returning None. """