Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ranking passed templates by H_excess if tied with H_missing #301

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions meeko/export_flexres.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def export_pdb_updated_flexres(polymer, pdbqt_mol):
orig_resmol = Chem.Mol(polymer.monomers[res_id].rdkit_mol)
orig_atomnames = polymer.monomers[res_id].atom_names

backbone_SMARTS = "[NX3]([H])[CX4][CX3](=O)"
expected_names = ('N', 'H', 'CA', 'C', 'O')
backbone_SMARTS = "[NX3]([H])[CX4]([H])[CX3](=O)"
expected_names = ('N', 'H', 'CA', 'HA', 'C', 'O')

backbone_qmol = Chem.MolFromSmarts(backbone_SMARTS)
backbone_matches = orig_resmol.GetSubstructMatches(backbone_qmol)
Expand Down Expand Up @@ -105,7 +105,10 @@ def export_pdb_updated_flexres(polymer, pdbqt_mol):
for neighbor_idx, bond_type in bonds_to_recover:
new_CA_idx = covres_mol.GetNumAtoms() + expected_names.index('CA')
combined_res.AddBond(new_CA_idx, neighbor_idx, bond_type)
combined_res.RemoveAtom(CA_index)

CA_h_idx = [nei.GetIdx() for nei in combined_res.GetAtomWithIdx(CA_index).GetNeighbors() if nei.GetAtomicNum()==1]
for atom_idx in sorted(CA_h_idx + [CA_index], reverse=True):
combined_res.RemoveAtom(atom_idx)
combined_res = combined_res.GetMol()

polymer.monomers[res_id].rdkit_mol = combined_res
Expand Down
60 changes: 37 additions & 23 deletions meeko/polymer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from .utils.rdkitutils import _aux_altloc_mol_build
from .utils.rdkitutils import covalent_radius
from .utils.pdbutils import PDBAtomInfo
from .utils.rdkitutils import getPdbInfoNoNull
from .chemtempgen import export_chem_templates_to_json
from .chemtempgen import build_noncovalent_CC
from .chemtempgen import build_linked_CCs
Expand Down Expand Up @@ -1357,7 +1358,7 @@ def _get_monomers(
passed.append(i)

# 3rd round
if len(passed) == 0:
if len(passed) == 0 or any(all_stats["H_excess"][i] for i in passed):
for i in range(len(candidate_templates)):
if (
all_stats["heavy_missing"][i]
Expand All @@ -1367,7 +1368,8 @@ def _get_monomers(
or len(all_stats["bonded_atoms_excess"][i])
):
continue
passed.append(i)
if i not in passed:
passed.append(i)

if len(passed) == 0:
template_key = None
Expand Down Expand Up @@ -1406,20 +1408,24 @@ def _get_monomers(
best_idxs.append(index)

if len(best_idxs) > 1:
tied = " ".join(candidate_template_keys[i] for i in best_idxs)
m = f"for {residue_key=}, {len(passed)} have passed: "
tkeys = [candidate_template_keys[i] for i in passed]
m += f"{tkeys} and tied for fewest missing H: {tied} "
raise RuntimeError(m)
elif len(best_idxs) == 0:
raise RuntimeError("unexpected situation")
else:
index = best_idxs[0]
template_key = candidate_template_keys[index]
template = residue_templates[template_key]
mapping = mappings[index]
H_miss = all_stats["H_missing"][index]
log["chosen_by_fewest_missing_H"][residue_key] = template_key
number_excess_H = [len(all_stats["H_excess"][index]) for index in passed]
min_excess_H = min(number_excess_H)
best_idxs = [index for index in passed if len(all_stats["H_excess"][index]) == min_excess_H]

if len(best_idxs) > 1:
tied = " ".join(candidate_template_keys[i] for i in best_idxs)
m = f"for {residue_key=}, {len(passed)} have passed: "
tkeys = [candidate_template_keys[i] for i in passed]
m += f"{tkeys} and tied for fewest missing and excess H: {tied} "

raise RuntimeError(m)

index = best_idxs[0]
template_key = candidate_template_keys[index]
template = residue_templates[template_key]
mapping = mappings[index]
H_miss = all_stats["H_missing"][index]
log["chosen_by_fewest_missing_H"][residue_key] = template_key
if template is None:
rdkit_mol = None
atom_names = None
Expand Down Expand Up @@ -2526,13 +2532,21 @@ def match(self, input_mol):
for atom in input_mol.GetAtoms():
element = "H" if atom.GetAtomicNum() == 1 else "heavy"
if atom.GetIdx() not in mapping_inv:
if element == "H":
nei_idx = atom.GetNeighbors()[0].GetIdx()
if nei_idx in mapping_inv:
result[element]["excess"].append(mapping_inv[nei_idx])
else:
result[element]["excess"].append(-1)
else:
if element == "H":
if atom.GetNeighbors():
nei_idx = atom.GetNeighbors()[0].GetIdx()
if nei_idx in mapping_inv:
result[element]["excess"].append(mapping_inv[nei_idx])
else:
result[element]["excess"].append(-1)
else: # lone hydrogen found in monomer
monomer_info = getPdbInfoNoNull(atom)
if monomer_info:
logger.warning(f"WARNING: Lone hydrogen is ignored: \n"
f" {monomer_info} \n")
else:
logger.warning(f"WARNING: A lone hydrogen is ignored during monomer-template matching. \n")
else:
result[element]["excess"] += 1
return result, mapping

Expand Down
22 changes: 22 additions & 0 deletions test/polymer_creation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
pkgdir = pathlib.Path(meeko.__file__).parents[1]

ahhy_example = pkgdir / "test/polymer_data/AHHY.pdb"
nphe_ser_example = pkgdir / "test/polymer_data/NPHE_SER.pdb"
just_one_ALA_missing = (
pkgdir / "test/polymer_data/just-one-ALA-missing-CB.pdb"
)
Expand Down Expand Up @@ -123,6 +124,27 @@ def test_AHHY_all_static_residues():
assert len(movable_part) == 0


def test_protonated_Nterm_residue():
f = open(nphe_ser_example, "r")
pdb_string = f.read()
polymer = Polymer.from_pdb_string(
pdb_string,
chem_templates,
mk_prep,
blunt_ends=[("A:2", 0)],
)
# Asserts that the residues have been imported in a way that makes sense, and that all the
# private functions we expect to have run as expected.
assert len(polymer.monomers) == 2
assert len(polymer.get_ignored_monomers()) == 0
assert len(polymer.get_valid_monomers()) == 2
assert polymer.monomers["A:1"].residue_template_key == "NPHE"
assert polymer.monomers["A:2"].residue_template_key == "SER"

check_charge(polymer.monomers["A:1"], 1.0)
check_charge(polymer.monomers["A:2"], 0.0)


def test_AHHY_padding():
with open(ahhy_example, "r") as f:
pdb_string = f.read()
Expand Down
33 changes: 33 additions & 0 deletions test/polymer_data/NPHE_SER.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
ATOM 1 N PHE A 1 68.722 74.399 242.118 1.00 9.63 A N
ATOM 2 CA PHE A 1 67.294 74.658 241.856 1.00 8.10 A C
ATOM 3 C PHE A 1 66.598 74.783 243.193 1.00 8.76 A C
ATOM 4 O PHE A 1 66.839 73.949 244.106 1.00 9.91 A O
ATOM 5 CB PHE A 1 66.715 73.473 241.081 1.00 9.82 A C
ATOM 6 CG PHE A 1 65.243 73.567 240.859 1.00 9.28 A C
ATOM 7 CD1 PHE A 1 64.361 72.788 241.615 1.00 9.06 A C
ATOM 8 CD2 PHE A 1 64.740 74.432 239.904 1.00 8.56 A C
ATOM 9 CE1 PHE A 1 63.001 72.865 241.408 1.00 9.60 A C
ATOM 10 CE2 PHE A 1 63.373 74.505 239.694 1.00 8.88 A C
ATOM 11 CZ PHE A 1 62.502 73.696 240.440 1.00 8.74 A C
ATOM 12 HA PHE A 1 67.173 75.469 241.337 1.00 8.10 A H
ATOM 13 HB2 PHE A 1 67.161 73.409 240.222 1.00 9.82 A H
ATOM 14 HB3 PHE A 1 66.910 72.654 241.563 1.00 9.82 A H
ATOM 15 HD1 PHE A 1 64.696 72.212 242.264 1.00 9.06 A H
ATOM 16 HD2 PHE A 1 65.318 74.963 239.405 1.00 8.56 A H
ATOM 17 HE1 PHE A 1 62.422 72.353 241.925 1.00 9.60 A H
ATOM 18 HE2 PHE A 1 63.032 75.091 239.058 1.00 8.88 A H
ATOM 19 HZ PHE A 1 61.587 73.724 240.277 1.00 8.74 A H
ATOM 20 H1 PHE A 1 69.160 74.339 241.345 1.00 9.63 A H
ATOM 21 H2 PHE A 1 69.061 75.066 242.601 1.00 9.63 A H
ATOM 22 H3 PHE A 1 68.808 73.636 242.567 1.00 9.63 A H
ATOM 23 N SER A 2 65.746 75.780 243.354 1.00 9.10 A N
ATOM 24 CA SER A 2 65.047 75.935 244.606 1.00 8.79 A C
ATOM 25 C SER A 2 63.631 76.423 244.347 1.00 9.57 A C
ATOM 26 O SER A 2 63.343 77.010 243.291 1.00 8.63 A O
ATOM 27 CB SER A 2 65.746 76.932 245.519 1.00 11.39 A C
ATOM 28 OG SER A 2 67.085 76.531 245.798 1.00 12.67 A O
ATOM 29 H SER A 2 65.562 76.370 242.756 1.00 9.10 A H
ATOM 30 HA SER A 2 65.035 75.069 245.043 1.00 8.79 A H
ATOM 31 HB2 SER A 2 65.749 77.808 245.102 1.00 11.39 A H
ATOM 32 HB3 SER A 2 65.252 77.015 246.349 1.00 11.39 A H
ATOM 33 HG SER A 2 67.220 75.765 245.482 1.00 12.67 A H
Loading