From 54b93f61f38a43c88db498e2e69c367d76d00034 Mon Sep 17 00:00:00 2001 From: Katja Linnemann Date: Tue, 17 Oct 2023 15:12:54 +0200 Subject: [PATCH 01/14] Implements residue flag using the residue flag only part of a chain can be treated as ligand to investigate domain-domain interactions --- plip/basic/config.py | 1 + plip/plipcmd.py | 19 +++++++++++++++++++ plip/structure/preparation.py | 20 +++++++++++++------- plip/visualization/visualize.py | 5 ++++- 4 files changed, 37 insertions(+), 8 deletions(-) diff --git a/plip/basic/config.py b/plip/basic/config.py index 4e87be8..dd1fb98 100644 --- a/plip/basic/config.py +++ b/plip/basic/config.py @@ -25,6 +25,7 @@ NOFIXFILE = False # Turn off writing to files for fixed PDB structures PEPTIDES = [] # Definition which chains should be considered as peptide ligands INTRA = None +RESIDUES = {} KEEPMOD = False DNARECEPTOR = False OUTPUTFILENAME = "report" # Naming for the TXT and XML report files diff --git a/plip/plipcmd.py b/plip/plipcmd.py index 78ac79b..342bbaa 100644 --- a/plip/plipcmd.py +++ b/plip/plipcmd.py @@ -38,6 +38,16 @@ def threshold_limiter(aparser, arg): aparser.error("All thresholds have to be values larger than zero.") return arg +def residue_list(input_string): + """Parse mix of residue numbers and ranges passed with the --residues flag into one list""" + result = [] + for part in input_string.split(','): + if '-' in part: + start, end = map(int, part.split('-')) + result.extend(range(start, end + 1)) + else: + result.append(int(part)) + return result def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report'): """Analysis of a single PDB file. Can generate textual reports XML, PyMOL session files and images as output.""" @@ -218,6 +228,9 @@ def main(): help="Allows to define one or multiple chains as peptide ligands or to detect inter-chain contacts", nargs="+") ligandtype.add_argument("--intra", dest="intra", help="Allows to define one chain to analyze intra-chain contacts.") + parser.add_argument("--residues", dest="residues", default=[], nargs="+", + help="""Allows to specify which residues of the chain(s) should be considered as peptide ligands. + Give single residues (separated with comma) or ranges (with dash) or both, for several chains separate selections with one space""") parser.add_argument("--keepmod", dest="keepmod", default=False, help="Keep modified residues as ligands", action="store_true") @@ -242,6 +255,11 @@ def main(): parser.add_argument('--%s' % t.name, dest=t.name, type=lambda val: threshold_limiter(parser, val), help=argparse.SUPPRESS) arguments = parser.parse_args() + # make sure, residues is only used together with --inter (could be expanded to --intra in the future) + if arguments.residues and not (arguments.peptides or arguments.intra): + parser.error("The --residues option requires specification of a chain with --inter or --peptide") + if arguments.residues and len(arguments.residues)!=len(arguments.peptides): + parser.error("Please provide residue numbers or ranges for each chain specified. Separate selections with a single space.") # configure log levels config.VERBOSE = True if arguments.verbose else False config.QUIET = True if arguments.quiet else False @@ -268,6 +286,7 @@ def main(): config.BREAKCOMPOSITE = arguments.breakcomposite config.ALTLOC = arguments.altlocation config.PEPTIDES = arguments.peptides + config.RESIDUES = dict(zip(arguments.peptides, map(residue_list, arguments.residues))) config.INTRA = arguments.intra config.NOFIX = arguments.nofix config.NOFIXFILE = arguments.nofixfile diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py index 32326cf..d92d13a 100644 --- a/plip/structure/preparation.py +++ b/plip/structure/preparation.py @@ -243,7 +243,7 @@ def getpeptides(self, chain): given chain without water """ all_from_chain = [o for o in pybel.ob.OBResidueIter( - self.proteincomplex.OBMol) if o.GetChain() == chain] # All residues from chain + self.proteincomplex.OBMol) if o.GetChain() == chain and (chain not in config.RESIDUES.keys() or o.GetNum() in config.RESIDUES[chain])] # All residues from chain if len(all_from_chain) == 0: return None else: @@ -932,8 +932,11 @@ def find_charged(self, mol): """If nucleic acids are part of the receptor, looks for negative charges in phosphate backbone""" data = namedtuple('pcharge', 'atoms atoms_orig_idx type center restype resnr reschain') a_set = [] - # Iterate through all residue, exclude those in chains defined as peptides - for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if not r.GetChain() in config.PEPTIDES]: + # Iterate through all residue, exclude those in (part of) chains defined as peptides + for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol)]: + if res.GetChain() in config.PEPTIDES: + if not res.GetChain() in config.RESIDUES.keys() or res.GetNum() in config.RESIDUES[res.GetChain()]: + continue if config.INTRA is not None: if res.GetChain() != config.INTRA: continue @@ -1475,7 +1478,7 @@ def characterize_complex(self, ligand): if idx in self.Mapper.proteinmap and self.Mapper.mapid(idx, mtype='protein') not in self.altconf] if ligand.type == 'PEPTIDE': # If peptide, don't consider the peptide chain as part of the protein binding site - bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() != lig_obj.chain] + bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() != lig_obj.chain or (lig_obj.chain in config.RESIDUES.keys() and not a.OBAtom.GetResidue().GetNum() in config.RESIDUES[lig_obj.chain])] if ligand.type == 'INTRA': # Interactions within the chain bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() == lig_obj.chain] @@ -1508,13 +1511,16 @@ def extract_bs(self, cutoff, ligcentroid, resis): @staticmethod def res_belongs_to_bs(res, cutoff, ligcentroid): """Check for each residue if its centroid is within a certain distance to the ligand centroid. - Additionally checks if a residue belongs to a chain restricted by the user (e.g. by defining a peptide chain)""" + Additionally checks if a residue belongs to a segment restricted by the user (e.g. by defining a peptide chain)""" rescentroid = centroid([(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)]) # Check geometry near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False # Check chain membership - restricted_chain = True if res.GetChain() in config.PEPTIDES else False - return (near_enough and not restricted_chain) + restricted_segment = False + if res.GetChain() in config.PEPTIDES: + if not res.GetChain() in config.RESIDUES.keys() or res.GetNum() in config.RESIDUES[res.GetChain()]: + restricted_segment = True + return (near_enough and not restricted_segment) def get_atom(self, idx): return self.atoms[idx] diff --git a/plip/visualization/visualize.py b/plip/visualization/visualize.py index 31cce2e..3918b5b 100644 --- a/plip/visualization/visualize.py +++ b/plip/visualization/visualize.py @@ -45,7 +45,10 @@ def visualize_in_pymol(plcomplex): cmd.set_name(current_name, pdbid) cmd.hide('everything', 'all') if config.PEPTIDES: - cmd.select(ligname, 'chain %s and not resn HOH' % plcomplex.chain) + if plcomplex.chain in config.RESIDUES.keys(): + cmd.select(ligname, 'chain %s and not resn HOH and resi %s' % (plcomplex.chain, "+".join(map(str, config.RESIDUES[plcomplex.chain])))) + else: + cmd.select(ligname, 'chain %s and not resn HOH' % plcomplex.chain) else: cmd.select(ligname, 'resn %s and chain %s and resi %s*' % (hetid, chain, plcomplex.position)) logger.debug(f'selecting ligand for PDBID {pdbid} and ligand name {ligname}') From 798db26e07058b712597e19c69c811d7eabea0f0 Mon Sep 17 00:00:00 2001 From: Sarah Naomi Bolz Date: Tue, 22 Oct 2024 14:14:29 +0200 Subject: [PATCH 02/14] added functions to test whether a residue or atom is part of a ligand. --- plip/basic/supplemental.py | 8 ++++++++ plip/structure/preparation.py | 31 +++++++++++++++++-------------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/plip/basic/supplemental.py b/plip/basic/supplemental.py index 1975c37..bc5a400 100644 --- a/plip/basic/supplemental.py +++ b/plip/basic/supplemental.py @@ -55,6 +55,14 @@ def whichchain(atom): return atom.GetResidue().GetChain() if atom.GetResidue() is not None else None +def residue_belongs_to_ligand(res, config): + """tests whether the residue is part of a peptide or residue ligand.""" + if res.GetChain() in config.PEPTIDES: + if res.GetChain() not in config.RESIDUES.keys() or res.GetNum() in config.RESIDUES[res.GetChain()]: + return True + return False + + ######################### # Mathematical operations ######################### diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py index d92d13a..024c82c 100644 --- a/plip/structure/preparation.py +++ b/plip/structure/preparation.py @@ -12,7 +12,7 @@ from plip.basic.supplemental import centroid, tilde_expansion, tmpfile, classify_by_name from plip.basic.supplemental import cluster_doubles, is_lig, normalize_vector, vector, ring_is_planar from plip.basic.supplemental import extract_pdbid, read_pdb, create_folder_if_not_exists, canonicalize -from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance +from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance, residue_belongs_to_ligand from plip.basic.supplemental import whichchain, whichrestype, whichresnumber, euclidean3d, int32_to_negative from plip.structure.detection import halogen, pication, water_bridges, metal_complexation from plip.structure.detection import hydrophobic_interactions, pistacking, hbonds, saltbridge @@ -934,9 +934,8 @@ def find_charged(self, mol): a_set = [] # Iterate through all residue, exclude those in (part of) chains defined as peptides for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol)]: - if res.GetChain() in config.PEPTIDES: - if not res.GetChain() in config.RESIDUES.keys() or res.GetNum() in config.RESIDUES[res.GetChain()]: - continue + if residue_belongs_to_ligand(res, config): + continue if config.INTRA is not None: if res.GetChain() != config.INTRA: continue @@ -1448,6 +1447,15 @@ def analyze(self): for ligand in self.ligands: self.characterize_complex(ligand) + @staticmethod + def atom_belongs_to_ligand(atom, lig_obj): + """test whether atom is part of a peptide or residue ligand.""" + if atom.OBAtom.GetResidue().GetChain() == lig_obj.chain: + return True + if lig_obj.chain not in config.RESIDUES.keys() and atom.OBAtom.GetResidue().GetNum() in config.RESIDUES[lig_obj.chain]: + return True + return False + def characterize_complex(self, ligand): """Handles all basic functions for characterizing the interactions for one ligand""" @@ -1478,7 +1486,7 @@ def characterize_complex(self, ligand): if idx in self.Mapper.proteinmap and self.Mapper.mapid(idx, mtype='protein') not in self.altconf] if ligand.type == 'PEPTIDE': # If peptide, don't consider the peptide chain as part of the protein binding site - bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() != lig_obj.chain or (lig_obj.chain in config.RESIDUES.keys() and not a.OBAtom.GetResidue().GetNum() in config.RESIDUES[lig_obj.chain])] + bs_atoms = [a for a in bs_atoms if not self.atom_belongs_to_ligand(a, lig_obj)] if ligand.type == 'INTRA': # Interactions within the chain bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() == lig_obj.chain] @@ -1511,16 +1519,11 @@ def extract_bs(self, cutoff, ligcentroid, resis): @staticmethod def res_belongs_to_bs(res, cutoff, ligcentroid): """Check for each residue if its centroid is within a certain distance to the ligand centroid. - Additionally checks if a residue belongs to a segment restricted by the user (e.g. by defining a peptide chain)""" - rescentroid = centroid([(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)]) + Additionally, checks if a residue belongs to a ligand.""" + res_centroid = centroid([(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)]) # Check geometry - near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False - # Check chain membership - restricted_segment = False - if res.GetChain() in config.PEPTIDES: - if not res.GetChain() in config.RESIDUES.keys() or res.GetNum() in config.RESIDUES[res.GetChain()]: - restricted_segment = True - return (near_enough and not restricted_segment) + near_enough = True if euclidean3d(res_centroid, ligcentroid) < cutoff else False + return near_enough and not residue_belongs_to_ligand(res, config) def get_atom(self, idx): return self.atoms[idx] From bec7a319f53006d5caf87d177255f080807de797 Mon Sep 17 00:00:00 2001 From: Sarah Naomi Bolz Date: Wed, 23 Oct 2024 17:35:35 +0200 Subject: [PATCH 03/14] started reviewing the PDBParser class --- plip/plipcmd.py | 6 +++--- plip/structure/preparation.py | 21 ++++++++++++--------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/plip/plipcmd.py b/plip/plipcmd.py index 342bbaa..64c114f 100644 --- a/plip/plipcmd.py +++ b/plip/plipcmd.py @@ -137,16 +137,16 @@ def run_analysis(inputstructs, inputpdbids): output_prefix = config.OUTPUTFILENAME if inputstructs is not None: # Process PDB file(s) - num_structures = len(inputstructs) + num_structures = len(inputstructs) # @question: how can it become more than one file? The tilde_expansion function does not consider this case. inputstructs = remove_duplicates(inputstructs) read_from_stdin = False for inputstruct in inputstructs: - if inputstruct == '-': + if inputstruct == '-': # @expl: when user gives '-' as input, pdb file is read from stdin inputstruct = sys.stdin.read() read_from_stdin = True if config.RAWSTRING: if sys.version_info < (3,): - inputstruct = bytes(inputstruct).decode('unicode_escape') + inputstruct = bytes(inputstruct).decode('unicode_escape') # @expl: in Python2, the bytes object is just a string. else: inputstruct = bytes(inputstruct, 'utf8').decode('unicode_escape') else: diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py index 024c82c..d392127 100644 --- a/plip/structure/preparation.py +++ b/plip/structure/preparation.py @@ -21,12 +21,15 @@ class PDBParser: - def __init__(self, pdbpath, as_string): - self.as_string = as_string - self.pdbpath = pdbpath - self.num_fixed_lines = 0 + def __init__(self, pdbpath: str, as_string: bool = False): + self.as_string: bool = as_string # @expl: becomes only True if pdb file is read from stdin + self.pdbpath: str = pdbpath # @expl: full filename of downloaded (from pdb id) or provided pdb file. If pdb file was read from stdin, contains the content of the file. + self.num_fixed_lines: int = 0 self.covlinkage = namedtuple("covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2") + # @expl: information from the LINK lines in the PDB file, conf1/2 refers to the altLoc indicator self.proteinmap, self.modres, self.covalent, self.altconformations, self.corrected_pdb = self.parse_pdb() + # @expl: proteinmap: + # @expl: covalent: list of all covlinkage namedtuples def parse_pdb(self): """Extracts additional information from PDB files. @@ -113,8 +116,8 @@ def parse_pdb(self): j += 1 else: i += 1 - j += 2 - d[i] = j + j += 2 # @question: Why += 2? + d[i] = j # @question: Why not the other way around d[j] = i? previous_ter = False # Numbering Changes at TER records if line.startswith("TER"): @@ -432,7 +435,7 @@ def identify_kmers(self, residues): """Using the covalent linkage information, find out which fragments/subunits form a ligand.""" # Remove all those not considered by ligands and pairings including alternate conformations - ligdoubles = [[(link.id1, link.chain1, link.pos1), + ligdoubles = [[(link.id1, link.chain1, link.pos1), # @question: What are ligdoubles? Why the word "doubles"? (link.id2, link.chain2, link.pos2)] for link in [c for c in self.covalent if c.id1 in self.lignames_kept and c.id2 in self.lignames_kept and c.conf1 in ['A', ''] and c.conf2 in ['A', ''] @@ -458,7 +461,7 @@ def identify_kmers(self, residues): return res_kmers -class Mapper: +class Mapper: # @question: Why do you need this mapper? """Provides functions for mapping atom IDs in the correct way""" def __init__(self): @@ -468,7 +471,7 @@ def __init__(self): def mapid(self, idx, mtype, bsid=None, to='original'): # Mapping to original IDs is standard for ligands if mtype == 'reversed': # Needed to map internal ID back to original protein ID - return self.reversed_proteinmap[idx] + return self.reversed_proteinmap[idx] # @todo: self.reversed_proteinmap must be defined inside class if mtype == 'protein': return self.proteinmap[idx] elif mtype == 'ligand': From 1ffb4b7ab3c46f8413cfe4f49a12c674d032e7c8 Mon Sep 17 00:00:00 2001 From: Philipp Schake Date: Mon, 4 Nov 2024 15:05:22 +0100 Subject: [PATCH 04/14] add local to ignore --- .gitignore | 1 + plip/structure/preparation.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 2516804..a193256 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,4 @@ target/ # Other *~ .nfs* +tests/ \ No newline at end of file diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py index d392127..c1dc081 100644 --- a/plip/structure/preparation.py +++ b/plip/structure/preparation.py @@ -1454,9 +1454,9 @@ def analyze(self): def atom_belongs_to_ligand(atom, lig_obj): """test whether atom is part of a peptide or residue ligand.""" if atom.OBAtom.GetResidue().GetChain() == lig_obj.chain: - return True - if lig_obj.chain not in config.RESIDUES.keys() and atom.OBAtom.GetResidue().GetNum() in config.RESIDUES[lig_obj.chain]: - return True + if lig_obj.chain not in config.RESIDUES.keys() or atom.OBAtom.GetResidue().GetNum() in config.RESIDUES[ + lig_obj.chain]: + return True return False def characterize_complex(self, ligand): From c22be4c8472098ab2bf4aec2b01bd1649fdc0d6a Mon Sep 17 00:00:00 2001 From: Philipp Schake Date: Thu, 7 Nov 2024 13:23:08 +0100 Subject: [PATCH 05/14] Start implementing chains selection --- plip/plipcmd.py | 35 ++++++++++---- plip/structure/preparation.py | 89 +++++++++++++++++++++++++---------- 2 files changed, 91 insertions(+), 33 deletions(-) diff --git a/plip/plipcmd.py b/plip/plipcmd.py index 64c114f..fef06a6 100644 --- a/plip/plipcmd.py +++ b/plip/plipcmd.py @@ -10,6 +10,7 @@ import multiprocessing import os import sys +import ast from argparse import ArgumentParser from collections import namedtuple @@ -49,8 +50,9 @@ def residue_list(input_string): result.append(int(part)) return result -def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report'): - """Analysis of a single PDB file. Can generate textual reports XML, PyMOL session files and images as output.""" +def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report', chains=None): + """Analysis of a single PDB file with optional chain filtering.""" + print(chains) if not as_string: pdb_file_name = pdbfile.split('/')[-1] startmessage = f'starting analysis of {pdb_file_name}' @@ -59,8 +61,7 @@ def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report'): logger.info(startmessage) mol = PDBComplex() mol.output_path = outpath - mol.load_pdb(pdbfile, as_string=as_string) - # @todo Offers possibility for filter function from command line (by ligand chain, position, hetid) + mol.load_pdb(pdbfile, as_string=as_string, chains=chains) # Pass chains here for ligand in mol.ligands: mol.characterize_complex(ligand) @@ -126,7 +127,7 @@ def remove_duplicates(slist): return unique -def run_analysis(inputstructs, inputpdbids): +def run_analysis(inputstructs, inputpdbids, chains=None): """Main function. Calls functions for processing, report generation and visualization.""" pdbid, pdbpath = None, None # @todo For multiprocessing, implement better stacktracing for errors @@ -157,7 +158,7 @@ def run_analysis(inputstructs, inputpdbids): basename = inputstruct.split('.')[-2].split('/')[-1] config.OUTPATH = '/'.join([config.BASEPATH, basename]) output_prefix = 'report' - process_pdb(inputstruct, config.OUTPATH, as_string=read_from_stdin, outputprefix=output_prefix) + process_pdb(inputstruct, config.OUTPATH, as_string=read_from_stdin, outputprefix=output_prefix, chains=chains) else: # Try to fetch the current PDB structure(s) directly from the RCBS server num_pdbids = len(inputpdbids) inputpdbids = remove_duplicates(inputpdbids) @@ -166,7 +167,7 @@ def run_analysis(inputstructs, inputpdbids): if num_pdbids > 1: config.OUTPATH = '/'.join([config.BASEPATH, pdbid[1:3].upper(), pdbid.upper()]) output_prefix = 'report' - process_pdb(pdbpath, config.OUTPATH, outputprefix=output_prefix) + process_pdb(pdbpath, config.OUTPATH, outputprefix=output_prefix, chains=chains) if (pdbid is not None or inputstructs is not None) and config.BASEPATH is not None: if config.BASEPATH in ['.', './']: @@ -254,6 +255,13 @@ def main(): for t in thresholds: parser.add_argument('--%s' % t.name, dest=t.name, type=lambda val: threshold_limiter(parser, val), help=argparse.SUPPRESS) + + # Add argument to define receptor and ligand chains + parser.add_argument("--chains", dest="chains", type=str, + help="Specify chains as receptor/ligand groups, e.g., '[['A'], ['B']]'. " + "Use format [['A'], ['B', 'C']] to define A as receptor, and B, C as ligands.") + + arguments = parser.parse_args() # make sure, residues is only used together with --inter (could be expanded to --intra in the future) if arguments.residues and not (arguments.peptides or arguments.intra): @@ -296,6 +304,17 @@ def main(): config.OUTPUTFILENAME = arguments.outputfilename config.NOHYDRO = arguments.nohydro config.MODEL = arguments.model + + + try: + config.CHAINS = ast.literal_eval(arguments.chains) if arguments.chains else None + if config.CHAINS and not all(isinstance(c, list) for c in config.CHAINS): + raise ValueError("Chains should be specified as a list of lists, e.g., '[['A'], ['B', 'C']]'.") + except (ValueError, SyntaxError): + parser.error("The --chains option must be in the format '[['A'], ['B', 'C']]'.") + + chains = arguments.chains + # Make sure we have pymol with --pics and --pymol if config.PICS or config.PYMOL: try: @@ -325,7 +344,7 @@ def main(): if not config.WATER_BRIDGE_OMEGA_MIN < config.WATER_BRIDGE_OMEGA_MAX: parser.error("The water bridge omega minimum angle has to be smaller than the water bridge omega maximum angle") expanded_path = tilde_expansion(arguments.input) if arguments.input is not None else None - run_analysis(expanded_path, arguments.pdbid) # Start main script + run_analysis(expanded_path, arguments.pdbid, chains=chains) # Start main script if __name__ == '__main__': diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py index c1dc081..11b6a4d 100644 --- a/plip/structure/preparation.py +++ b/plip/structure/preparation.py @@ -21,9 +21,10 @@ class PDBParser: - def __init__(self, pdbpath: str, as_string: bool = False): + def __init__(self, pdbpath: str, as_string: bool = False, chains: list = None): self.as_string: bool = as_string # @expl: becomes only True if pdb file is read from stdin self.pdbpath: str = pdbpath # @expl: full filename of downloaded (from pdb id) or provided pdb file. If pdb file was read from stdin, contains the content of the file. + self.chains = chains # New chains argument to filter specified chains self.num_fixed_lines: int = 0 self.covlinkage = namedtuple("covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2") # @expl: information from the LINK lines in the PDB file, conf1/2 refers to the altLoc indicator @@ -40,20 +41,20 @@ def parse_pdb(self): III. Furthermore, covalent linkages between ligands and protein residues/other ligands are identified IV. Alternative conformations """ + # Read lines from the PDB content if self.as_string: - fil = self.pdbpath.rstrip('\n').split('\n') # Removing trailing newline character + fil = self.pdbpath.rstrip('\n').split('\n') # Split content if reading from string else: - f = read(self.pdbpath) - fil = f.readlines() - f.close() - corrected_lines = [] + with open(self.pdbpath, 'r') as f: + fil = f.readlines() + + filtered_lines = [] i, j = 0, 0 # idx and PDB numbering d = {} modres = set() covalent = [] alt = [] previous_ter = False - model_dict = {0: list()} # Standard without fixing @@ -61,31 +62,31 @@ def parse_pdb(self): if not config.PLUGIN_MODE: lastnum = 0 # Atom numbering (has to be consecutive) other_models = False - # Model 0 stores header and similar additional data - # or the full file if no MODEL entries exist in the file current_model = 0 for line in fil: + # Check if the line belongs to relevant chains + if not self.is_relevant_chain(line): + continue # Skip lines not matching specified chains + corrected_line, newnum = self.fix_pdbline(line, lastnum) if corrected_line is not None: if corrected_line.startswith('MODEL'): - # reset atom number when new model is encountered lastnum = 0 - try: # Get number of MODEL (1,2,3) + try: model_num = int(corrected_line[10:14]) - # initialize storage for new model model_dict[model_num] = list() current_model = model_num - if model_num > 1: # MODEL 2,3,4 etc. + if model_num > 1: other_models = True except ValueError: - logger.debug(f'ignoring invalid MODEL entry: {corrected_line}') + logger.debug(f'Ignoring invalid MODEL entry: {corrected_line}') else: lastnum = newnum model_dict[current_model].append(corrected_line) - # select model + try: if other_models: - logger.info(f'selecting model {config.MODEL} for analysis') + logger.info(f'Selecting model {config.MODEL} for analysis') corrected_pdb = ''.join(model_dict[0]) corrected_lines = model_dict[0] if current_model > 0: @@ -95,7 +96,7 @@ def parse_pdb(self): corrected_pdb = ''.join(model_dict[1]) corrected_lines = model_dict[1] config.MODEL = 1 - logger.warning('invalid model number specified, using first model instead') + logger.warning('Invalid model number specified, using first model instead') else: corrected_pdb = self.pdbpath corrected_lines = fil @@ -116,20 +117,34 @@ def parse_pdb(self): j += 1 else: i += 1 - j += 2 # @question: Why += 2? - d[i] = j # @question: Why not the other way around d[j] = i? + j += 2 # Adjust numbering after TER records + d[i] = j previous_ter = False - # Numbering Changes at TER records + if line.startswith("TER"): previous_ter = True - # Get modified residues + if line.startswith("MODRES"): modres.add(line[12:15].strip()) - # Get covalent linkages between ligands + if line.startswith("LINK"): covalent.append(self.get_linkage(line)) + return d, modres, covalent, alt, corrected_pdb + def is_relevant_chain(self, line): + """Helper to check if a line belongs to one of the specified chains.""" + if not self.chains: + return True # No filtering needed if chains are not specified + + # Check if the line represents an ATOM or HETATM record and matches specified chains + if line.startswith(("ATOM", "HETATM")): + all_chains = self.chains[0] + self.chains[1] + chain_id = line[21] # Column 22 holds the chain ID in PDB format + return any(chain_id in group for group in all_chains) # Check if chain_id is in receptor/ligand groups + + return True # Include non-atom records by default + def fix_pdbline(self, pdbline, lastnum): """Fix a PDB line if information is missing.""" pdbqt_conversion = { @@ -240,6 +255,12 @@ def __init__(self, proteincomplex, altconf, modres, covalent, mapper): self.ligands = self.getligs() self.excluded = sorted(list(self.lignames_all.difference(set(self.lignames_kept)))) + def filter_chains(self, receptor_chains, ligand_chains): + """Filter proteincomplex chains to include only specified receptors and ligands.""" + self.receptor_residues = [res for res in self.proteincomplex.OBMol if res.GetChain() in receptor_chains] + self.ligand_residues = [res for res in self.proteincomplex.OBMol if res.GetChain() in ligand_chains] + logger.debug(f"Filtered to receptor chains: {receptor_chains}, ligand chains: {ligand_chains}") + def getpeptides(self, chain): """If peptide ligand chains are defined via the command line options, try to extract the underlying ligand formed by all residues in the @@ -259,7 +280,7 @@ def getligs(self): Returns all non-empty ligands. """ - if config.PEPTIDES == [] and config.INTRA is None: + if config.PEPTIDES == [] and config.INTRA is None and config.CHAINS is None: # Extract small molecule ligands (default) ligands = [] @@ -1348,7 +1369,7 @@ def __str__(self): return "Protein structure %s with ligands:\n" % (self.pymol_name) + "\n".join( [lig for lig in formatted_lig_names]) - def load_pdb(self, pdbpath, as_string=False): + def load_pdb(self, pdbpath, as_string=False, chains=None): """Loads a pdb file with protein AND ligand(s), separates and prepares them. If specified 'as_string', the input is a PDB string instead of a path.""" if as_string: @@ -1359,7 +1380,7 @@ def load_pdb(self, pdbpath, as_string=False): self.sourcefiles['pdbcomplex.original'] = pdbpath self.sourcefiles['pdbcomplex'] = pdbpath self.information['pdbfixes'] = False - pdbparser = PDBParser(pdbpath, as_string=as_string) # Parse PDB file to find errors and get additional data + pdbparser = PDBParser(pdbpath, as_string=as_string, chains=chains) # Parse PDB file to find errors and get additional data # #@todo Refactor and rename here self.Mapper.proteinmap = pdbparser.proteinmap self.Mapper.reversed_proteinmap = {v: k for k, v in self.Mapper.proteinmap.items()} @@ -1368,6 +1389,24 @@ def load_pdb(self, pdbpath, as_string=False): self.altconf = pdbparser.altconformations self.corrected_pdb = pdbparser.corrected_pdb + + + if chains: + receptors, ligands = chains[0], chains[1] + self.receptor_chains = receptors + self.ligand_chains = ligands + logger.info(f"Chains set as receptor: {receptors} and ligand: {ligands}") + else: + self.receptor_chains = None + self.ligand_chains = None + # Call LigandFinder with chain filtering if set + ligandfinder = LigandFinder(self.protcomplex, self.altconf, self.modres, self.covalent, self.Mapper) + if self.receptor_chains or self.ligand_chains: + ligandfinder.filter_chains(self.receptor_chains, self.ligand_chains) + + self.ligands = ligandfinder.ligands + self.excluded = ligandfinder.excluded + if not config.PLUGIN_MODE: if pdbparser.num_fixed_lines > 0: logger.info(f'{pdbparser.num_fixed_lines} lines automatically fixed in PDB input file') From fa17af0fd687c2c5410ce2a60f354339f09acb74 Mon Sep 17 00:00:00 2001 From: Philipp Schake Date: Thu, 14 Nov 2024 11:58:11 +0100 Subject: [PATCH 06/14] First implementation of chains flag. Works for two chains (one L one R) --- .DS_Store | Bin 0 -> 6148 bytes plip/basic/config.py | 2 + plip/plipcmd.py | 12 ++- plip/structure/preparation.py | 154 +++++++++++++--------------------- 4 files changed, 66 insertions(+), 102 deletions(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..224f00bf3542c34f1efa7d9446196c0fdab59cf4 GIT binary patch literal 6148 zcmeHK%}T>S5Z>*NO(;SR3gRi?wPI_FMZAPqU%(VSsMN#+4aRI~QhO+cJb=EC590GU zv%8T>_25OM%)rbyot@cqzm%O0V~o4gu)~|~W z5)rKAQs93wfbXu(hAd(k%h~e#dke-%oaOEIJFir0^^JznFq+1V_aJ6o?&s6Y^(R;O zX(&Vzl)fKa#L;YIZJmlV_v190$bvYGAm#cpPD3$s#WW2wS?j3(|#u15^6${}v+JV#@pP?a;ThYzlw%=LuA<zRr9j+92hh=2D1- 1: config.OUTPATH = '/'.join([config.BASEPATH, pdbid[1:3].upper(), pdbid.upper()]) output_prefix = 'report' - process_pdb(pdbpath, config.OUTPATH, outputprefix=output_prefix, chains=chains) + process_pdb(pdbpath, config.OUTPATH, outputprefix=output_prefix) if (pdbid is not None or inputstructs is not None) and config.BASEPATH is not None: if config.BASEPATH in ['.', './']: @@ -313,7 +312,6 @@ def main(): except (ValueError, SyntaxError): parser.error("The --chains option must be in the format '[['A'], ['B', 'C']]'.") - chains = arguments.chains # Make sure we have pymol with --pics and --pymol if config.PICS or config.PYMOL: @@ -344,7 +342,7 @@ def main(): if not config.WATER_BRIDGE_OMEGA_MIN < config.WATER_BRIDGE_OMEGA_MAX: parser.error("The water bridge omega minimum angle has to be smaller than the water bridge omega maximum angle") expanded_path = tilde_expansion(arguments.input) if arguments.input is not None else None - run_analysis(expanded_path, arguments.pdbid, chains=chains) # Start main script + run_analysis(expanded_path, arguments.pdbid) # Start main script if __name__ == '__main__': diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py index 11b6a4d..cdfaed0 100644 --- a/plip/structure/preparation.py +++ b/plip/structure/preparation.py @@ -12,7 +12,7 @@ from plip.basic.supplemental import centroid, tilde_expansion, tmpfile, classify_by_name from plip.basic.supplemental import cluster_doubles, is_lig, normalize_vector, vector, ring_is_planar from plip.basic.supplemental import extract_pdbid, read_pdb, create_folder_if_not_exists, canonicalize -from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance, residue_belongs_to_ligand +from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance from plip.basic.supplemental import whichchain, whichrestype, whichresnumber, euclidean3d, int32_to_negative from plip.structure.detection import halogen, pication, water_bridges, metal_complexation from plip.structure.detection import hydrophobic_interactions, pistacking, hbonds, saltbridge @@ -21,16 +21,12 @@ class PDBParser: - def __init__(self, pdbpath: str, as_string: bool = False, chains: list = None): - self.as_string: bool = as_string # @expl: becomes only True if pdb file is read from stdin - self.pdbpath: str = pdbpath # @expl: full filename of downloaded (from pdb id) or provided pdb file. If pdb file was read from stdin, contains the content of the file. - self.chains = chains # New chains argument to filter specified chains - self.num_fixed_lines: int = 0 + def __init__(self, pdbpath, as_string): + self.as_string = as_string + self.pdbpath = pdbpath + self.num_fixed_lines = 0 self.covlinkage = namedtuple("covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2") - # @expl: information from the LINK lines in the PDB file, conf1/2 refers to the altLoc indicator self.proteinmap, self.modres, self.covalent, self.altconformations, self.corrected_pdb = self.parse_pdb() - # @expl: proteinmap: - # @expl: covalent: list of all covlinkage namedtuples def parse_pdb(self): """Extracts additional information from PDB files. @@ -41,20 +37,20 @@ def parse_pdb(self): III. Furthermore, covalent linkages between ligands and protein residues/other ligands are identified IV. Alternative conformations """ - # Read lines from the PDB content if self.as_string: - fil = self.pdbpath.rstrip('\n').split('\n') # Split content if reading from string + fil = self.pdbpath.rstrip('\n').split('\n') # Removing trailing newline character else: - with open(self.pdbpath, 'r') as f: - fil = f.readlines() - - filtered_lines = [] + f = read(self.pdbpath) + fil = f.readlines() + f.close() + corrected_lines = [] i, j = 0, 0 # idx and PDB numbering d = {} modres = set() covalent = [] alt = [] previous_ter = False + model_dict = {0: list()} # Standard without fixing @@ -62,31 +58,31 @@ def parse_pdb(self): if not config.PLUGIN_MODE: lastnum = 0 # Atom numbering (has to be consecutive) other_models = False + # Model 0 stores header and similar additional data + # or the full file if no MODEL entries exist in the file current_model = 0 for line in fil: - # Check if the line belongs to relevant chains - if not self.is_relevant_chain(line): - continue # Skip lines not matching specified chains - corrected_line, newnum = self.fix_pdbline(line, lastnum) if corrected_line is not None: if corrected_line.startswith('MODEL'): + # reset atom number when new model is encountered lastnum = 0 - try: + try: # Get number of MODEL (1,2,3) model_num = int(corrected_line[10:14]) + # initialize storage for new model model_dict[model_num] = list() current_model = model_num - if model_num > 1: + if model_num > 1: # MODEL 2,3,4 etc. other_models = True except ValueError: - logger.debug(f'Ignoring invalid MODEL entry: {corrected_line}') + logger.debug(f'ignoring invalid MODEL entry: {corrected_line}') else: lastnum = newnum model_dict[current_model].append(corrected_line) - + # select model try: if other_models: - logger.info(f'Selecting model {config.MODEL} for analysis') + logger.info(f'selecting model {config.MODEL} for analysis') corrected_pdb = ''.join(model_dict[0]) corrected_lines = model_dict[0] if current_model > 0: @@ -96,7 +92,7 @@ def parse_pdb(self): corrected_pdb = ''.join(model_dict[1]) corrected_lines = model_dict[1] config.MODEL = 1 - logger.warning('Invalid model number specified, using first model instead') + logger.warning('invalid model number specified, using first model instead') else: corrected_pdb = self.pdbpath corrected_lines = fil @@ -117,34 +113,20 @@ def parse_pdb(self): j += 1 else: i += 1 - j += 2 # Adjust numbering after TER records + j += 2 d[i] = j previous_ter = False - + # Numbering Changes at TER records if line.startswith("TER"): previous_ter = True - + # Get modified residues if line.startswith("MODRES"): modres.add(line[12:15].strip()) - + # Get covalent linkages between ligands if line.startswith("LINK"): covalent.append(self.get_linkage(line)) - return d, modres, covalent, alt, corrected_pdb - def is_relevant_chain(self, line): - """Helper to check if a line belongs to one of the specified chains.""" - if not self.chains: - return True # No filtering needed if chains are not specified - - # Check if the line represents an ATOM or HETATM record and matches specified chains - if line.startswith(("ATOM", "HETATM")): - all_chains = self.chains[0] + self.chains[1] - chain_id = line[21] # Column 22 holds the chain ID in PDB format - return any(chain_id in group for group in all_chains) # Check if chain_id is in receptor/ligand groups - - return True # Include non-atom records by default - def fix_pdbline(self, pdbline, lastnum): """Fix a PDB line if information is missing.""" pdbqt_conversion = { @@ -255,19 +237,13 @@ def __init__(self, proteincomplex, altconf, modres, covalent, mapper): self.ligands = self.getligs() self.excluded = sorted(list(self.lignames_all.difference(set(self.lignames_kept)))) - def filter_chains(self, receptor_chains, ligand_chains): - """Filter proteincomplex chains to include only specified receptors and ligands.""" - self.receptor_residues = [res for res in self.proteincomplex.OBMol if res.GetChain() in receptor_chains] - self.ligand_residues = [res for res in self.proteincomplex.OBMol if res.GetChain() in ligand_chains] - logger.debug(f"Filtered to receptor chains: {receptor_chains}, ligand chains: {ligand_chains}") - def getpeptides(self, chain): """If peptide ligand chains are defined via the command line options, try to extract the underlying ligand formed by all residues in the given chain without water """ all_from_chain = [o for o in pybel.ob.OBResidueIter( - self.proteincomplex.OBMol) if o.GetChain() == chain and (chain not in config.RESIDUES.keys() or o.GetNum() in config.RESIDUES[chain])] # All residues from chain + self.proteincomplex.OBMol) if o.GetChain() == chain] # All residues from chain if len(all_from_chain) == 0: return None else: @@ -308,8 +284,16 @@ def getligs(self): else: # Extract peptides from given chains self.water = [o for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) if o.GetResidueProperty(9)] - if config.PEPTIDES: + if config.PEPTIDES and not config.CHAINS: peptide_ligands = [self.getpeptides(chain) for chain in config.PEPTIDES] + + #Todo: Validate change here... Do we want to combine multiple chains to a single ligand? + # if yes can be easily added to the getpeptides function by flatten the resulting list - Philipp + elif config.CHAINS: + # chains is defined as list of list e.g. [['A'], ['B', 'C']] in which second list contains the + # ligand chains and the first one should be the receptor + peptide_ligands = [self.getpeptides(chain) for chain in config.CHAINS[1]] + elif config.INTRA is not None: peptide_ligands = [self.getpeptides(config.INTRA), ] @@ -328,7 +312,7 @@ def extract_ligand(self, kmer): names = [x[0] for x in members] longname = '-'.join([x[0] for x in members]) - if config.PEPTIDES: + if config.PEPTIDES or config.CHAINS: ligtype = 'PEPTIDE' elif config.INTRA is not None: ligtype = 'INTRA' @@ -456,7 +440,7 @@ def identify_kmers(self, residues): """Using the covalent linkage information, find out which fragments/subunits form a ligand.""" # Remove all those not considered by ligands and pairings including alternate conformations - ligdoubles = [[(link.id1, link.chain1, link.pos1), # @question: What are ligdoubles? Why the word "doubles"? + ligdoubles = [[(link.id1, link.chain1, link.pos1), (link.id2, link.chain2, link.pos2)] for link in [c for c in self.covalent if c.id1 in self.lignames_kept and c.id2 in self.lignames_kept and c.conf1 in ['A', ''] and c.conf2 in ['A', ''] @@ -482,7 +466,7 @@ def identify_kmers(self, residues): return res_kmers -class Mapper: # @question: Why do you need this mapper? +class Mapper: """Provides functions for mapping atom IDs in the correct way""" def __init__(self): @@ -492,7 +476,7 @@ def __init__(self): def mapid(self, idx, mtype, bsid=None, to='original'): # Mapping to original IDs is standard for ligands if mtype == 'reversed': # Needed to map internal ID back to original protein ID - return self.reversed_proteinmap[idx] # @todo: self.reversed_proteinmap must be defined inside class + return self.reversed_proteinmap[idx] if mtype == 'protein': return self.proteinmap[idx] elif mtype == 'ligand': @@ -956,10 +940,8 @@ def find_charged(self, mol): """If nucleic acids are part of the receptor, looks for negative charges in phosphate backbone""" data = namedtuple('pcharge', 'atoms atoms_orig_idx type center restype resnr reschain') a_set = [] - # Iterate through all residue, exclude those in (part of) chains defined as peptides - for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol)]: - if residue_belongs_to_ligand(res, config): - continue + # Iterate through all residue, exclude those in chains defined as peptides + for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if not r.GetChain() in config.PEPTIDES]: if config.INTRA is not None: if res.GetChain() != config.INTRA: continue @@ -1000,7 +982,7 @@ def find_charged(self, mol): a_contributing.append(pybel.Atom(a)) a_contributing_orig_idx.append(self.Mapper.mapid(a.GetIdx(), mtype='protein')) if not len(a_contributing) == 0: - a_set.append(data(atoms=a_contributing,atoms_orig_idx=a_contributing_orig_idx, type='negative', + a_set.append(data(atoms=a_contributing,atoms_orig_idx=a_contributing_orig_idx, type='negative', center=centroid([ac.coords for ac in a_contributing]), restype=res.GetName(), resnr=res.GetNum(), reschain=res.GetChain())) @@ -1369,7 +1351,7 @@ def __str__(self): return "Protein structure %s with ligands:\n" % (self.pymol_name) + "\n".join( [lig for lig in formatted_lig_names]) - def load_pdb(self, pdbpath, as_string=False, chains=None): + def load_pdb(self, pdbpath, as_string=False): """Loads a pdb file with protein AND ligand(s), separates and prepares them. If specified 'as_string', the input is a PDB string instead of a path.""" if as_string: @@ -1380,7 +1362,7 @@ def load_pdb(self, pdbpath, as_string=False, chains=None): self.sourcefiles['pdbcomplex.original'] = pdbpath self.sourcefiles['pdbcomplex'] = pdbpath self.information['pdbfixes'] = False - pdbparser = PDBParser(pdbpath, as_string=as_string, chains=chains) # Parse PDB file to find errors and get additional data + pdbparser = PDBParser(pdbpath, as_string=as_string) # Parse PDB file to find errors and get additional data # #@todo Refactor and rename here self.Mapper.proteinmap = pdbparser.proteinmap self.Mapper.reversed_proteinmap = {v: k for k, v in self.Mapper.proteinmap.items()} @@ -1389,24 +1371,6 @@ def load_pdb(self, pdbpath, as_string=False, chains=None): self.altconf = pdbparser.altconformations self.corrected_pdb = pdbparser.corrected_pdb - - - if chains: - receptors, ligands = chains[0], chains[1] - self.receptor_chains = receptors - self.ligand_chains = ligands - logger.info(f"Chains set as receptor: {receptors} and ligand: {ligands}") - else: - self.receptor_chains = None - self.ligand_chains = None - # Call LigandFinder with chain filtering if set - ligandfinder = LigandFinder(self.protcomplex, self.altconf, self.modres, self.covalent, self.Mapper) - if self.receptor_chains or self.ligand_chains: - ligandfinder.filter_chains(self.receptor_chains, self.ligand_chains) - - self.ligands = ligandfinder.ligands - self.excluded = ligandfinder.excluded - if not config.PLUGIN_MODE: if pdbparser.num_fixed_lines > 0: logger.info(f'{pdbparser.num_fixed_lines} lines automatically fixed in PDB input file') @@ -1489,15 +1453,6 @@ def analyze(self): for ligand in self.ligands: self.characterize_complex(ligand) - @staticmethod - def atom_belongs_to_ligand(atom, lig_obj): - """test whether atom is part of a peptide or residue ligand.""" - if atom.OBAtom.GetResidue().GetChain() == lig_obj.chain: - if lig_obj.chain not in config.RESIDUES.keys() or atom.OBAtom.GetResidue().GetNum() in config.RESIDUES[ - lig_obj.chain]: - return True - return False - def characterize_complex(self, ligand): """Handles all basic functions for characterizing the interactions for one ligand""" @@ -1528,7 +1483,7 @@ def characterize_complex(self, ligand): if idx in self.Mapper.proteinmap and self.Mapper.mapid(idx, mtype='protein') not in self.altconf] if ligand.type == 'PEPTIDE': # If peptide, don't consider the peptide chain as part of the protein binding site - bs_atoms = [a for a in bs_atoms if not self.atom_belongs_to_ligand(a, lig_obj)] + bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() != lig_obj.chain] if ligand.type == 'INTRA': # Interactions within the chain bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() == lig_obj.chain] @@ -1561,11 +1516,20 @@ def extract_bs(self, cutoff, ligcentroid, resis): @staticmethod def res_belongs_to_bs(res, cutoff, ligcentroid): """Check for each residue if its centroid is within a certain distance to the ligand centroid. - Additionally, checks if a residue belongs to a ligand.""" - res_centroid = centroid([(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)]) + Additionally checks if a residue belongs to a chain restricted by the user (e.g. by defining a peptide chain)""" + rescentroid = centroid([(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)]) # Check geometry - near_enough = True if euclidean3d(res_centroid, ligcentroid) < cutoff else False - return near_enough and not residue_belongs_to_ligand(res, config) + near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False + # Check chain membership + if config.PEPTIDES: + restricted_chain = True if res.GetChain() in config.PEPTIDES else False + #Todo: Test if properly working + # Add restriction via chains flag + if config.CHAINS: + #print(config.CHAINS[0], res.GetChain(), res.GetChain() in config.CHAINS[0]) + restricted_chain = True if res.GetChain() not in config.CHAINS[0] else False + + return (near_enough and not restricted_chain) def get_atom(self, idx): return self.atoms[idx] @@ -1576,4 +1540,4 @@ def output_path(self): @output_path.setter def output_path(self, path): - self._output_path = tilde_expansion(path) + self._output_path = tilde_expansion(path) \ No newline at end of file From 11a8da8dd1aebb1e3db9d2c5dbe1111995878717 Mon Sep 17 00:00:00 2001 From: Philipp Schake Date: Thu, 5 Dec 2024 10:46:03 +0100 Subject: [PATCH 07/14] Added robustness for chains flag --- plip/structure/preparation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py index d2faf3e..ff04617 100644 --- a/plip/structure/preparation.py +++ b/plip/structure/preparation.py @@ -1576,7 +1576,7 @@ def res_belongs_to_bs(res, cutoff, ligcentroid): # Check geometry near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False # Check chain membership - if config.PEPTIDES: + if config.PEPTIDES and not config.CHAINS: restricted_chain = True if res.GetChain() in config.PEPTIDES else False #Todo: Test if properly working # Add restriction via chains flag From 04a7ed276f1712e97d0182a636ce43590ddea08f Mon Sep 17 00:00:00 2001 From: Sarah Naomi Bolz Date: Thu, 5 Dec 2024 14:45:34 +0100 Subject: [PATCH 08/14] find charged residues only for chain that is defined as binding site --- plip/structure/preparation.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py index ff04617..113741d 100644 --- a/plip/structure/preparation.py +++ b/plip/structure/preparation.py @@ -959,7 +959,11 @@ def find_charged(self, mol): data = namedtuple('pcharge', 'atoms atoms_orig_idx type center restype resnr reschain') a_set = [] # Iterate through all residue, exclude those in chains defined as peptides - for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if not r.GetChain() in config.PEPTIDES]: + chain_config = config.CHAINS if config.CHAINS else [[], []] + for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if not r.GetChain() in config.PEPTIDES and not r.GetChain() in chain_config[1]]: + if config.CHAINS: + if res.GetChain() not in config.CHAINS[0]: + continue if config.INTRA is not None: if res.GetChain() != config.INTRA: continue @@ -1200,7 +1204,7 @@ def find_charged(self, all_atoms): """ data = namedtuple('lcharge', 'atoms orig_atoms atoms_orig_idx type center fgroup') a_set = [] - if not (config.INTRA or config.PEPTIDES): + if not (config.INTRA or config.PEPTIDES or config.CHAINS): for a in all_atoms: a_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid) a_orig = self.Mapper.id_to_atom(a_orig_idx) From 4e95a2345fb575b7db430051c651d0c0c9b62a8d Mon Sep 17 00:00:00 2001 From: Sarah Naomi Bolz Date: Thu, 5 Dec 2024 15:56:12 +0100 Subject: [PATCH 09/14] Joined the query whether a residue belongs to the target in one function. --- plip/basic/supplemental.py | 14 ++++++++------ plip/structure/preparation.py | 16 +++------------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/plip/basic/supplemental.py b/plip/basic/supplemental.py index 3d59740..3c8b9a2 100644 --- a/plip/basic/supplemental.py +++ b/plip/basic/supplemental.py @@ -55,12 +55,14 @@ def whichchain(atom): return atom.GetResidue().GetChain() if atom.GetResidue() is not None else None -def residue_belongs_to_ligand(res, config): - """tests whether the residue is part of a peptide or residue ligand.""" - if res.GetChain() in config.PEPTIDES: - if res.GetChain() not in config.RESIDUES.keys() or res.GetNum() in config.RESIDUES[res.GetChain()]: - return True - return False +def residue_belongs_to_target(res, config): + """tests whether the residue is part of a peptide or residue ligand or is not defined as target.""" + if config.CHAINS: + if config.CHAINS[0]: # if target chains were given + return res.GetChain() in config.CHAINS[0] # True if residue is part of target chains + if config.CHAINS[1]: # if ligand but no target chains were given + return res.GetChain() not in config.CHAINS[1] # True if residue is not part of ligand chains + return res.GetChain() not in config.PEPTIDES # True if residue is not part of peptide ligand. ######################### diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py index 113741d..784a458 100644 --- a/plip/structure/preparation.py +++ b/plip/structure/preparation.py @@ -14,6 +14,7 @@ from plip.basic.supplemental import extract_pdbid, read_pdb, create_folder_if_not_exists, canonicalize from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance from plip.basic.supplemental import whichchain, whichrestype, whichresnumber, euclidean3d, int32_to_negative +from plip.basic.supplemental import residue_belongs_to_target from plip.structure.detection import halogen, pication, water_bridges, metal_complexation from plip.structure.detection import hydrophobic_interactions, pistacking, hbonds, saltbridge @@ -959,11 +960,7 @@ def find_charged(self, mol): data = namedtuple('pcharge', 'atoms atoms_orig_idx type center restype resnr reschain') a_set = [] # Iterate through all residue, exclude those in chains defined as peptides - chain_config = config.CHAINS if config.CHAINS else [[], []] - for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if not r.GetChain() in config.PEPTIDES and not r.GetChain() in chain_config[1]]: - if config.CHAINS: - if res.GetChain() not in config.CHAINS[0]: - continue + for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if residue_belongs_to_target(r, config)]: if config.INTRA is not None: if res.GetChain() != config.INTRA: continue @@ -1579,16 +1576,9 @@ def res_belongs_to_bs(res, cutoff, ligcentroid): rescentroid = centroid([(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)]) # Check geometry near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False - # Check chain membership - if config.PEPTIDES and not config.CHAINS: - restricted_chain = True if res.GetChain() in config.PEPTIDES else False #Todo: Test if properly working # Add restriction via chains flag - if config.CHAINS: - #print(config.CHAINS[0], res.GetChain(), res.GetChain() in config.CHAINS[0]) - restricted_chain = True if res.GetChain() not in config.CHAINS[0] else False - - return (near_enough and not restricted_chain) + return near_enough and residue_belongs_to_target(res, config) def get_atom(self, idx): return self.atoms[idx] From acff5d55b251720f2ad4434bdc359fe67d93cfdd Mon Sep 17 00:00:00 2001 From: Philipp Schake Date: Thu, 12 Dec 2024 08:44:18 +0100 Subject: [PATCH 10/14] Adapted and commented tests that failed with current version due to new bond prioritization and correction in wbridge omega angle compute --- plip/test/test_literature_validated.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/plip/test/test_literature_validated.py b/plip/test/test_literature_validated.py index 6815b8c..144ff40 100644 --- a/plip/test/test_literature_validated.py +++ b/plip/test/test_literature_validated.py @@ -232,6 +232,9 @@ def test_1xdn(self): # Water bridges to Lys307, Arg309 and 111 from phosphate groups waterbridges = {wb.resnr for wb in s.water_bridges} # Water bridge to 60 not found due to prioritization + # Philipp: No interaction to 111 due to prioritization/omega angle compute correction + # --> Now two interactions with ASN92 which were before only one that is not checked here + # self.assertTrue({307, 309}.issubset(waterbridges)) self.assertTrue({307, 309, 111}.issubset(waterbridges)) # pi-stacking interaction with Phe209 pistackres = {pistack.resnr for pistack in s.pistacking} @@ -312,6 +315,8 @@ def test_4kya(self): tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonds to Ala609 + # Philipp: In this case the protein is the acceptor + # hbonds = {hbond.resnr for hbond in s.hbonds_pdon + s.hbonds_ldon} hbonds = {hbond.resnr for hbond in s.hbonds_pdon} self.assertTrue({609}.issubset(hbonds)) # Saltbridge to Asp513 @@ -515,12 +520,16 @@ def test_1bju(self): s = tmpmol.interaction_sets[bsid] #@todo Publication show hydrogen bond interactions for Gly219 # Hydrogen bonds to Ser190, Ser195, Gly219 and Asp189 + + #Philipp: Only Hbont to 189 + 195 are possible. Else, donors would be included in multiple interactions hbonds = {hbond.resnr for hbond in s.hbonds_pdon+s.hbonds_ldon} - self.assertTrue({189, 190, 195}.issubset(hbonds)) + #self.assertTrue({189, 190, 195}.issubset(hbonds)) + self.assertTrue({189, 195}.issubset(hbonds)) # Water bridges to Ser190 and Val227 # Water bridge to 190 not detected due to prioritization + # Philipp: Listen to the comment above waterbridges = {wb.resnr for wb in s.water_bridges} - self.assertTrue({227}.issubset(waterbridges)) + #self.assertTrue({227}.issubset(waterbridges)) # hydrophobic interaction of Leu99 hydrophobics = {hydrophobic.resnr for hydrophobic in s.all_hydrophobic_contacts} self.assertTrue({99}.issubset(hydrophobics)) @@ -587,6 +596,8 @@ def test_2iuz(self): # Water bridges to Trp137 # res nr 52 mentioned in alternative conformation not considered waterbridges = {wb.resnr for wb in s.water_bridges} + # Philipp: Now detects two wbridges with ARG57 because the omega angles are better + #self.assertTrue({57}.issubset(waterbridges)) self.assertTrue({322}.issubset(waterbridges)) # pi-stacking interaction with Trp384, Trp137 and Trp52 pistackres = {pistack.resnr for pistack in s.pistacking} @@ -701,6 +712,8 @@ def test_1hvi(self): # Water bridges waterbridges = {str(wb.resnr)+wb.reschain for wb in s.water_bridges} # #@todo Water bridge with 50B not detected + # Philipp: Water bridge with 50A not detected -> Angle prio leads to two wbridges with 50B + # self.assertTrue({'50B'}.issubset(waterbridges)) self.assertTrue({'50A'}.issubset(waterbridges)) # Bridging Ile-B50 and Ile-A50 with ligand # pi-cation Interactions picat = {pication.resnr for pication in s.pication_laro} @@ -752,4 +765,6 @@ def test_1hpx(self): # Water bridges waterbridges = {str(wb.resnr)+wb.reschain for wb in s.water_bridges} # Waterbridge with Gly27 is detected instead of Ala28/Asp29 + # Philipp: Waterbridge with 50B not detected rather two towards 50A + # self.assertTrue({'50B', '29A'}.issubset(waterbridges)) self.assertTrue({'50A', '50B', '29A'}.issubset(waterbridges)) From 54901194e7b310f44f574da0eec27fb4fce2ac89 Mon Sep 17 00:00:00 2001 From: Sarah Bolz Date: Fri, 13 Dec 2024 13:57:41 +0100 Subject: [PATCH 11/14] fixed handling of cases where the same chain is given as ligand and as receptor. --- plip/basic/supplemental.py | 12 +++++++----- plip/structure/preparation.py | 6 +++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/plip/basic/supplemental.py b/plip/basic/supplemental.py index 3c8b9a2..6939ae5 100644 --- a/plip/basic/supplemental.py +++ b/plip/basic/supplemental.py @@ -55,13 +55,15 @@ def whichchain(atom): return atom.GetResidue().GetChain() if atom.GetResidue() is not None else None -def residue_belongs_to_target(res, config): - """tests whether the residue is part of a peptide or residue ligand or is not defined as target.""" +def residue_belongs_to_receptor(res, config): + """tests whether the residue is defined as receptor and is not part of a peptide or residue ligand.""" if config.CHAINS: - if config.CHAINS[0]: # if target chains were given - return res.GetChain() in config.CHAINS[0] # True if residue is part of target chains - if config.CHAINS[1]: # if ligand but no target chains were given + if config.CHAINS[0] and config.CHAINS[1]: # if receptor and ligand chains were given + return res.GetChain() in config.CHAINS[0] and res.GetChain() not in config.CHAINS[1] + # True if residue is part of receptor chains and not of ligand chains + if config.CHAINS[1]: # if only ligand chains were given return res.GetChain() not in config.CHAINS[1] # True if residue is not part of ligand chains + return False # if only receptor chains were given or both is empty return res.GetChain() not in config.PEPTIDES # True if residue is not part of peptide ligand. diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py index 784a458..71483eb 100644 --- a/plip/structure/preparation.py +++ b/plip/structure/preparation.py @@ -14,7 +14,7 @@ from plip.basic.supplemental import extract_pdbid, read_pdb, create_folder_if_not_exists, canonicalize from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance from plip.basic.supplemental import whichchain, whichrestype, whichresnumber, euclidean3d, int32_to_negative -from plip.basic.supplemental import residue_belongs_to_target +from plip.basic.supplemental import residue_belongs_to_receptor from plip.structure.detection import halogen, pication, water_bridges, metal_complexation from plip.structure.detection import hydrophobic_interactions, pistacking, hbonds, saltbridge @@ -960,7 +960,7 @@ def find_charged(self, mol): data = namedtuple('pcharge', 'atoms atoms_orig_idx type center restype resnr reschain') a_set = [] # Iterate through all residue, exclude those in chains defined as peptides - for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if residue_belongs_to_target(r, config)]: + for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if residue_belongs_to_receptor(r, config)]: if config.INTRA is not None: if res.GetChain() != config.INTRA: continue @@ -1578,7 +1578,7 @@ def res_belongs_to_bs(res, cutoff, ligcentroid): near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False #Todo: Test if properly working # Add restriction via chains flag - return near_enough and residue_belongs_to_target(res, config) + return near_enough and residue_belongs_to_receptor(res, config) def get_atom(self, idx): return self.atoms[idx] From 54c26b3f49b2760dada97cae7bb698b9850c477d Mon Sep 17 00:00:00 2001 From: Philipp Schake Date: Mon, 16 Dec 2024 11:24:30 +0100 Subject: [PATCH 12/14] removed requirement for inner quotation marks in chains flag --- plip/plipcmd.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/plip/plipcmd.py b/plip/plipcmd.py index 03ef49c..b79b7df 100644 --- a/plip/plipcmd.py +++ b/plip/plipcmd.py @@ -304,13 +304,19 @@ def main(): config.NOHYDRO = arguments.nohydro config.MODEL = arguments.model - try: - config.CHAINS = ast.literal_eval(arguments.chains) if arguments.chains else None + # add inner quotes for python backend + if not arguments.chains: + config.CHAINS = None + else: + import re + quoted_input = re.sub(r'(? Date: Mon, 16 Dec 2024 11:50:39 +0100 Subject: [PATCH 13/14] changed visualization file naming for chains feature to show that lig is a peptide --- plip/visualization/visualize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plip/visualization/visualize.py b/plip/visualization/visualize.py index 3918b5b..b419500 100644 --- a/plip/visualization/visualize.py +++ b/plip/visualization/visualize.py @@ -19,7 +19,7 @@ def visualize_in_pymol(plcomplex): pdbid = plcomplex.pdbid lig_members = plcomplex.lig_members chain = plcomplex.chain - if config.PEPTIDES: + if config.PEPTIDES or config.CHAINS: vis.ligname = 'PeptideChain%s' % plcomplex.chain if config.INTRA is not None: vis.ligname = 'Intra%s' % plcomplex.chain @@ -100,7 +100,7 @@ def visualize_in_pymol(plcomplex): cmd.hide('cartoon', '%sLines' % plcomplex.pdbid) cmd.show('lines', '%sLines' % plcomplex.pdbid) - if config.PEPTIDES: + if config.PEPTIDES or config.CHAINS: filename = "%s_PeptideChain%s" % (pdbid.upper(), plcomplex.chain) if config.PYMOL: vis.save_session(config.OUTPATH, override=filename) From b42655aa98669a931eb00b0b8ba3eb9a6634d3bd Mon Sep 17 00:00:00 2001 From: Sarah Naomi Bolz Date: Mon, 16 Dec 2024 16:07:53 +0100 Subject: [PATCH 14/14] no smiles generation for chains flag and hydrophobic interactions refinement for chains flag. --- plip/structure/preparation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py index 71483eb..cd1b081 100644 --- a/plip/structure/preparation.py +++ b/plip/structure/preparation.py @@ -752,7 +752,7 @@ def refine_hydrophobic(all_h, pistacks): hydroph = [h for h in sel2.values()] hydroph_final = [] # 3. If a protein atom interacts with several neighboring ligand atoms, just keep the one with the closest dist - if config.PEPTIDES or config.INTRA: + if config.PEPTIDES or config.INTRA or config.CHAINS: # the ligand also consists of amino acid residues, repeat step 2 just the other way around sel3 = {} for h in hydroph: @@ -1060,7 +1060,7 @@ def __init__(self, cclass, ligand): self.complex = cclass self.molecule = ligand.mol # Pybel Molecule # get canonical SMILES String, but not for peptide ligand (tend to be too long -> openBabel crashes) - self.smiles = "" if (config.INTRA or config.PEPTIDES) else self.molecule.write(format='can') + self.smiles = "" if (config.INTRA or config.PEPTIDES or config.CHAINS) else self.molecule.write(format='can') self.inchikey = self.molecule.write(format='inchikey') self.can_to_pdb = ligand.can_to_pdb if not len(self.smiles) == 0: