From 9b1ab198dd34f15914b75261a9da116d4614a7ee Mon Sep 17 00:00:00 2001 From: Jennifer A Clark Date: Tue, 7 Jan 2025 14:38:34 -0500 Subject: [PATCH] Throw error for "explicit" hydrogens that aren't in molecule graph (#1983) * Resolve explicit Hs unrepresented in graph * Add test * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * black * changelog * Update docstrings and comments, replace GetTotalNumHs() with GetNumExplicitHs() * Apply suggestions from code review Co-authored-by: Jeff Wagner --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: jaclark5 Co-authored-by: Jeff Wagner --- docs/releasehistory.md | 2 ++ openff/toolkit/_tests/test_toolkits.py | 21 +++++++++++++++++++++ openff/toolkit/utils/rdkit_wrapper.py | 18 ++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/docs/releasehistory.md b/docs/releasehistory.md index d6c35cea6..18cf8b10e 100644 --- a/docs/releasehistory.md +++ b/docs/releasehistory.md @@ -16,6 +16,7 @@ Releases follow the `major.minor.micro` scheme recommended by [PEP440](https://w ### Bugfixes - [PR #1971](https://github.com/openforcefield/openff-toolkit/pull/1971): Fixes bug where OpenEyeToolkitWrapper would write coordinate-less PDB atoms if insertion_code or chain_id was an empty string ([Issue #1967](https://github.com/openforcefield/openff-toolkit/issues/1967)) +- [PR #1983](https://github.com/openforcefield/openff-toolkit/pull/1983): Fixes issue with implicit hydrogens slipping though checks to result in radicals later. [Issue #936](https://github.com/openforcefield/openff-toolkit/issues/936) and [Issue #1696](https://github.com/openforcefield/openff-toolkit/issues/1696) ### Performance improvements @@ -30,6 +31,7 @@ Releases follow the `major.minor.micro` scheme recommended by [PEP440](https://w ## 0.16.7 ### Bugfixes + - [PR #1971](https://github.com/openforcefield/openff-toolkit/pull/1971): Fixes bug where OpenEyeToolkitWrapper would write coordinate-less PDB atoms if insertion_code or chain_id was an empty string ([Issue #1967](https://github.com/openforcefield/openff-toolkit/issues/1967)) ### Tests updated diff --git a/openff/toolkit/_tests/test_toolkits.py b/openff/toolkit/_tests/test_toolkits.py index dd36b4d08..8ec0d5ba7 100644 --- a/openff/toolkit/_tests/test_toolkits.py +++ b/openff/toolkit/_tests/test_toolkits.py @@ -2250,6 +2250,27 @@ def test_rdkit_from_smiles_hydrogens_are_explicit(self): ) assert offmol.n_atoms == 4 + def test_rdkit_from_smiles_hydrogens_are_explicit_and_in_graph(self): + """ + Test to ensure that RDKitToolkitWrapper.from_smiles has the proper behavior with + respect to its hydrogens_are_explicit kwarg and that a CMILES entry has all hydrogens + in the graph. + + `Issue #936 `, + `Issue #1696 `, + """ + toolkit_wrapper = RDKitToolkitWrapper() + smiles_impl = "[H][C]([H])([H])[NH+]([H])[C]([H])([H])[H]" + with pytest.raises( + ValueError, + match="the following SMILES as having some nonexplicit hydrogens", + ): + _ = Molecule.from_smiles( + smiles_impl, + toolkit_registry=toolkit_wrapper, + hydrogens_are_explicit=True, + ) + @pytest.mark.parametrize("molecule", get_mini_drug_bank(RDKitToolkitWrapper)) def test_to_inchi(self, molecule): """Test, but do not validate, conversion to standard and non-standard InChI""" diff --git a/openff/toolkit/utils/rdkit_wrapper.py b/openff/toolkit/utils/rdkit_wrapper.py index 4a99577a1..ebf7eb8be 100644 --- a/openff/toolkit/utils/rdkit_wrapper.py +++ b/openff/toolkit/utils/rdkit_wrapper.py @@ -1588,6 +1588,24 @@ def from_smiles( f"desired molecule as an RDMol with no implicit hydrogens, and then use " f"Molecule.from_rdkit() to create the desired OFFMol." ) + # Issue #936 and #1696: GetNumExplicitHs() counts the number of hydrogens that are + # explicitly in the string but not present in the graph. This criteria would apply to + # hydrogens inside the brackets descibing a heavy atom, e.g. [NH+]. + # https://www.rdkit.org/docs/Cookbook.html#explicit-valence-and-number-of-hydrogens + # This method is also used in GetTotalNumHs, which returns + # GetNumImplicitHs() + GetNumExplicitHs() + Optionally the number of + # neighboring hydrogens in the molecular graph. + # (see the cookbook link above for more details) + elif atom.GetNumExplicitHs() != 0: + raise ValueError( + "'hydrogens_are_explicit' was specified as True, but the RDKit wrapper interpreted " + f"the following SMILES as having some nonexplicit hydrogens (e.g., [NH+]): '{smiles}'\n" + "If this SMILES is intended to express all explicit hydrogens in the molecule, then either: \n" + "1) Replace instances of implicit Hs (ex. `[NH+]` with `[N+]([H])` to make " + "the hydrogen explicit, or\n" + "2) construct the desired molecule as an RDMol with no implicit hydrogens, " + "and then use Molecule.from_rdkit() to create the desired OFFMol." + ) molecule = self.from_rdkit( rdmol,