From 54b93f61f38a43c88db498e2e69c367d76d00034 Mon Sep 17 00:00:00 2001
From: Katja Linnemann <kalili94@web.de>
Date: Tue, 17 Oct 2023 15:12:54 +0200
Subject: [PATCH 01/14] Implements residue flag

using the residue flag only part of a chain can be treated as ligand to investigate domain-domain interactions
---
 plip/basic/config.py            |  1 +
 plip/plipcmd.py                 | 19 +++++++++++++++++++
 plip/structure/preparation.py   | 20 +++++++++++++-------
 plip/visualization/visualize.py |  5 ++++-
 4 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/plip/basic/config.py b/plip/basic/config.py
index 4e87be8..dd1fb98 100644
--- a/plip/basic/config.py
+++ b/plip/basic/config.py
@@ -25,6 +25,7 @@
 NOFIXFILE = False  # Turn off writing to files for fixed PDB structures
 PEPTIDES = []  # Definition which chains should be considered as peptide ligands
 INTRA = None
+RESIDUES = {}
 KEEPMOD = False
 DNARECEPTOR = False
 OUTPUTFILENAME = "report"  # Naming for the TXT and XML report files
diff --git a/plip/plipcmd.py b/plip/plipcmd.py
index 78ac79b..342bbaa 100644
--- a/plip/plipcmd.py
+++ b/plip/plipcmd.py
@@ -38,6 +38,16 @@ def threshold_limiter(aparser, arg):
         aparser.error("All thresholds have to be values larger than zero.")
     return arg
 
+def residue_list(input_string):
+    """Parse mix of residue numbers and ranges passed with the --residues flag into one list"""
+    result = []
+    for part in input_string.split(','):
+        if '-' in part:
+            start, end = map(int, part.split('-'))
+            result.extend(range(start, end + 1))
+        else:
+            result.append(int(part))
+    return result
 
 def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report'):
     """Analysis of a single PDB file. Can generate textual reports XML, PyMOL session files and images as output."""
@@ -218,6 +228,9 @@ def main():
                             help="Allows to define one or multiple chains as peptide ligands or to detect inter-chain contacts",
                             nargs="+")
     ligandtype.add_argument("--intra", dest="intra", help="Allows to define one chain to analyze intra-chain contacts.")
+    parser.add_argument("--residues", dest="residues", default=[], nargs="+",
+                        help="""Allows to specify which residues of the chain(s) should be considered as peptide ligands.
+                        Give single residues (separated with comma) or ranges (with dash) or both, for several chains separate selections with one space""")
     parser.add_argument("--keepmod", dest="keepmod", default=False,
                         help="Keep modified residues as ligands",
                         action="store_true")
@@ -242,6 +255,11 @@ def main():
         parser.add_argument('--%s' % t.name, dest=t.name, type=lambda val: threshold_limiter(parser, val),
                             help=argparse.SUPPRESS)
     arguments = parser.parse_args()
+    # make sure, residues is only used together with --inter (could be expanded to --intra in the future)
+    if arguments.residues and not (arguments.peptides or arguments.intra):
+        parser.error("The --residues option requires specification of a chain with --inter or --peptide")
+    if arguments.residues and len(arguments.residues)!=len(arguments.peptides):
+        parser.error("Please provide residue numbers or ranges for each chain specified. Separate selections with a single space.")
     # configure log levels
     config.VERBOSE = True if arguments.verbose else False
     config.QUIET = True if arguments.quiet else False
@@ -268,6 +286,7 @@ def main():
     config.BREAKCOMPOSITE = arguments.breakcomposite
     config.ALTLOC = arguments.altlocation
     config.PEPTIDES = arguments.peptides
+    config.RESIDUES = dict(zip(arguments.peptides, map(residue_list, arguments.residues)))
     config.INTRA = arguments.intra
     config.NOFIX = arguments.nofix
     config.NOFIXFILE = arguments.nofixfile
diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py
index 32326cf..d92d13a 100644
--- a/plip/structure/preparation.py
+++ b/plip/structure/preparation.py
@@ -243,7 +243,7 @@ def getpeptides(self, chain):
         given chain without water
         """
         all_from_chain = [o for o in pybel.ob.OBResidueIter(
-            self.proteincomplex.OBMol) if o.GetChain() == chain]  # All residues from chain
+            self.proteincomplex.OBMol) if o.GetChain() == chain and (chain not in config.RESIDUES.keys() or o.GetNum() in config.RESIDUES[chain])]  # All residues from chain
         if len(all_from_chain) == 0:
             return None
         else:
@@ -932,8 +932,11 @@ def find_charged(self, mol):
         """If nucleic acids are part of the receptor, looks for negative charges in phosphate backbone"""
         data = namedtuple('pcharge', 'atoms atoms_orig_idx type center restype resnr reschain')
         a_set = []
-        # Iterate through all residue, exclude those in chains defined as peptides
-        for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if not r.GetChain() in config.PEPTIDES]:
+        # Iterate through all residue, exclude those in (part of) chains defined as peptides
+        for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol)]:
+            if res.GetChain() in config.PEPTIDES:
+                if not res.GetChain() in config.RESIDUES.keys() or res.GetNum() in config.RESIDUES[res.GetChain()]:
+                    continue
             if config.INTRA is not None:
                 if res.GetChain() != config.INTRA:
                     continue
@@ -1475,7 +1478,7 @@ def characterize_complex(self, ligand):
                     if idx in self.Mapper.proteinmap and self.Mapper.mapid(idx, mtype='protein') not in self.altconf]
         if ligand.type == 'PEPTIDE':
             # If peptide, don't consider the peptide chain as part of the protein binding site
-            bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() != lig_obj.chain]
+            bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() != lig_obj.chain or (lig_obj.chain in config.RESIDUES.keys() and not a.OBAtom.GetResidue().GetNum() in config.RESIDUES[lig_obj.chain])]
         if ligand.type == 'INTRA':
             # Interactions within the chain
             bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() == lig_obj.chain]
@@ -1508,13 +1511,16 @@ def extract_bs(self, cutoff, ligcentroid, resis):
     @staticmethod
     def res_belongs_to_bs(res, cutoff, ligcentroid):
         """Check for each residue if its centroid is within a certain distance to the ligand centroid.
-        Additionally checks if a residue belongs to a chain restricted by the user (e.g. by defining a peptide chain)"""
+        Additionally checks if a residue belongs to a segment restricted by the user (e.g. by defining a peptide chain)"""
         rescentroid = centroid([(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)])
         # Check geometry
         near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False
         # Check chain membership
-        restricted_chain = True if res.GetChain() in config.PEPTIDES else False
-        return (near_enough and not restricted_chain)
+        restricted_segment = False
+        if res.GetChain() in config.PEPTIDES:
+            if not res.GetChain() in config.RESIDUES.keys() or res.GetNum() in config.RESIDUES[res.GetChain()]:
+                restricted_segment = True
+        return (near_enough and not restricted_segment)
 
     def get_atom(self, idx):
         return self.atoms[idx]
diff --git a/plip/visualization/visualize.py b/plip/visualization/visualize.py
index 31cce2e..3918b5b 100644
--- a/plip/visualization/visualize.py
+++ b/plip/visualization/visualize.py
@@ -45,7 +45,10 @@ def visualize_in_pymol(plcomplex):
     cmd.set_name(current_name, pdbid)
     cmd.hide('everything', 'all')
     if config.PEPTIDES:
-        cmd.select(ligname, 'chain %s and not resn HOH' % plcomplex.chain)
+        if plcomplex.chain in config.RESIDUES.keys():
+            cmd.select(ligname, 'chain %s and not resn HOH and resi %s' % (plcomplex.chain, "+".join(map(str, config.RESIDUES[plcomplex.chain]))))
+        else:
+            cmd.select(ligname, 'chain %s and not resn HOH' % plcomplex.chain)
     else:
         cmd.select(ligname, 'resn %s and chain %s and resi %s*' % (hetid, chain, plcomplex.position))
     logger.debug(f'selecting ligand for PDBID {pdbid} and ligand name {ligname}')

From 798db26e07058b712597e19c69c811d7eabea0f0 Mon Sep 17 00:00:00 2001
From: Sarah Naomi Bolz <sarah_naomi.bolz@tu-dresden.de>
Date: Tue, 22 Oct 2024 14:14:29 +0200
Subject: [PATCH 02/14] added functions to test whether a residue or atom is
 part of a ligand.

---
 plip/basic/supplemental.py    |  8 ++++++++
 plip/structure/preparation.py | 31 +++++++++++++++++--------------
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/plip/basic/supplemental.py b/plip/basic/supplemental.py
index 1975c37..bc5a400 100644
--- a/plip/basic/supplemental.py
+++ b/plip/basic/supplemental.py
@@ -55,6 +55,14 @@ def whichchain(atom):
     return atom.GetResidue().GetChain() if atom.GetResidue() is not None else None
 
 
+def residue_belongs_to_ligand(res, config):
+    """tests whether the residue is part of a peptide or residue ligand."""
+    if res.GetChain() in config.PEPTIDES:
+        if res.GetChain() not in config.RESIDUES.keys() or res.GetNum() in config.RESIDUES[res.GetChain()]:
+            return True
+    return False
+
+
 #########################
 # Mathematical operations
 #########################
diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py
index d92d13a..024c82c 100644
--- a/plip/structure/preparation.py
+++ b/plip/structure/preparation.py
@@ -12,7 +12,7 @@
 from plip.basic.supplemental import centroid, tilde_expansion, tmpfile, classify_by_name
 from plip.basic.supplemental import cluster_doubles, is_lig, normalize_vector, vector, ring_is_planar
 from plip.basic.supplemental import extract_pdbid, read_pdb, create_folder_if_not_exists, canonicalize
-from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance
+from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance, residue_belongs_to_ligand
 from plip.basic.supplemental import whichchain, whichrestype, whichresnumber, euclidean3d, int32_to_negative
 from plip.structure.detection import halogen, pication, water_bridges, metal_complexation
 from plip.structure.detection import hydrophobic_interactions, pistacking, hbonds, saltbridge
@@ -934,9 +934,8 @@ def find_charged(self, mol):
         a_set = []
         # Iterate through all residue, exclude those in (part of) chains defined as peptides
         for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol)]:
-            if res.GetChain() in config.PEPTIDES:
-                if not res.GetChain() in config.RESIDUES.keys() or res.GetNum() in config.RESIDUES[res.GetChain()]:
-                    continue
+            if residue_belongs_to_ligand(res, config):
+                continue
             if config.INTRA is not None:
                 if res.GetChain() != config.INTRA:
                     continue
@@ -1448,6 +1447,15 @@ def analyze(self):
         for ligand in self.ligands:
             self.characterize_complex(ligand)
 
+    @staticmethod
+    def atom_belongs_to_ligand(atom, lig_obj):
+        """test whether atom is part of a peptide or residue ligand."""
+        if atom.OBAtom.GetResidue().GetChain() == lig_obj.chain:
+            return True
+        if lig_obj.chain not in config.RESIDUES.keys() and atom.OBAtom.GetResidue().GetNum() in config.RESIDUES[lig_obj.chain]:
+            return True
+        return False
+
     def characterize_complex(self, ligand):
         """Handles all basic functions for characterizing the interactions for one ligand"""
 
@@ -1478,7 +1486,7 @@ def characterize_complex(self, ligand):
                     if idx in self.Mapper.proteinmap and self.Mapper.mapid(idx, mtype='protein') not in self.altconf]
         if ligand.type == 'PEPTIDE':
             # If peptide, don't consider the peptide chain as part of the protein binding site
-            bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() != lig_obj.chain or (lig_obj.chain in config.RESIDUES.keys() and not a.OBAtom.GetResidue().GetNum() in config.RESIDUES[lig_obj.chain])]
+            bs_atoms = [a for a in bs_atoms if not self.atom_belongs_to_ligand(a, lig_obj)]
         if ligand.type == 'INTRA':
             # Interactions within the chain
             bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() == lig_obj.chain]
@@ -1511,16 +1519,11 @@ def extract_bs(self, cutoff, ligcentroid, resis):
     @staticmethod
     def res_belongs_to_bs(res, cutoff, ligcentroid):
         """Check for each residue if its centroid is within a certain distance to the ligand centroid.
-        Additionally checks if a residue belongs to a segment restricted by the user (e.g. by defining a peptide chain)"""
-        rescentroid = centroid([(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)])
+        Additionally, checks if a residue belongs to a ligand."""
+        res_centroid = centroid([(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)])
         # Check geometry
-        near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False
-        # Check chain membership
-        restricted_segment = False
-        if res.GetChain() in config.PEPTIDES:
-            if not res.GetChain() in config.RESIDUES.keys() or res.GetNum() in config.RESIDUES[res.GetChain()]:
-                restricted_segment = True
-        return (near_enough and not restricted_segment)
+        near_enough = True if euclidean3d(res_centroid, ligcentroid) < cutoff else False
+        return near_enough and not residue_belongs_to_ligand(res, config)
 
     def get_atom(self, idx):
         return self.atoms[idx]

From bec7a319f53006d5caf87d177255f080807de797 Mon Sep 17 00:00:00 2001
From: Sarah Naomi Bolz <sarah_naomi.bolz@tu-dresden.de>
Date: Wed, 23 Oct 2024 17:35:35 +0200
Subject: [PATCH 03/14] started reviewing the PDBParser class

---
 plip/plipcmd.py               |  6 +++---
 plip/structure/preparation.py | 21 ++++++++++++---------
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/plip/plipcmd.py b/plip/plipcmd.py
index 342bbaa..64c114f 100644
--- a/plip/plipcmd.py
+++ b/plip/plipcmd.py
@@ -137,16 +137,16 @@ def run_analysis(inputstructs, inputpdbids):
     output_prefix = config.OUTPUTFILENAME
 
     if inputstructs is not None:  # Process PDB file(s)
-        num_structures = len(inputstructs)
+        num_structures = len(inputstructs)  # @question: how can it become more than one file? The tilde_expansion function does not consider this case.
         inputstructs = remove_duplicates(inputstructs)
         read_from_stdin = False
         for inputstruct in inputstructs:
-            if inputstruct == '-':
+            if inputstruct == '-':  # @expl: when user gives '-' as input, pdb file is read from stdin
                 inputstruct = sys.stdin.read()
                 read_from_stdin = True
                 if config.RAWSTRING:
                     if sys.version_info < (3,):
-                        inputstruct = bytes(inputstruct).decode('unicode_escape')
+                        inputstruct = bytes(inputstruct).decode('unicode_escape')  # @expl: in Python2, the bytes object is just a string.
                     else:
                         inputstruct = bytes(inputstruct, 'utf8').decode('unicode_escape')
             else:
diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py
index 024c82c..d392127 100644
--- a/plip/structure/preparation.py
+++ b/plip/structure/preparation.py
@@ -21,12 +21,15 @@
 
 
 class PDBParser:
-    def __init__(self, pdbpath, as_string):
-        self.as_string = as_string
-        self.pdbpath = pdbpath
-        self.num_fixed_lines = 0
+    def __init__(self, pdbpath: str, as_string: bool = False):
+        self.as_string: bool = as_string  # @expl: becomes only True if pdb file is read from stdin
+        self.pdbpath: str = pdbpath  # @expl: full filename of downloaded (from pdb id) or provided pdb file. If pdb file was read from stdin, contains the content of the file.
+        self.num_fixed_lines: int = 0
         self.covlinkage = namedtuple("covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2")
+        # @expl: information from the LINK lines in the PDB file, conf1/2 refers to the altLoc indicator
         self.proteinmap, self.modres, self.covalent, self.altconformations, self.corrected_pdb = self.parse_pdb()
+        # @expl: proteinmap:
+        # @expl: covalent: list of all covlinkage namedtuples
 
     def parse_pdb(self):
         """Extracts additional information from PDB files.
@@ -113,8 +116,8 @@ def parse_pdb(self):
                     j += 1
                 else:
                     i += 1
-                    j += 2
-                d[i] = j
+                    j += 2  # @question: Why += 2?
+                d[i] = j  # @question: Why not the other way around d[j] = i?
                 previous_ter = False
             # Numbering Changes at TER records
             if line.startswith("TER"):
@@ -432,7 +435,7 @@ def identify_kmers(self, residues):
         """Using the covalent linkage information, find out which fragments/subunits form a ligand."""
 
         # Remove all those not considered by ligands and pairings including alternate conformations
-        ligdoubles = [[(link.id1, link.chain1, link.pos1),
+        ligdoubles = [[(link.id1, link.chain1, link.pos1),  # @question: What are ligdoubles? Why the word "doubles"?
                        (link.id2, link.chain2, link.pos2)] for link in
                       [c for c in self.covalent if c.id1 in self.lignames_kept and c.id2 in self.lignames_kept
                        and c.conf1 in ['A', ''] and c.conf2 in ['A', '']
@@ -458,7 +461,7 @@ def identify_kmers(self, residues):
             return res_kmers
 
 
-class Mapper:
+class Mapper:  # @question: Why do you need this mapper?
     """Provides functions for mapping atom IDs in the correct way"""
 
     def __init__(self):
@@ -468,7 +471,7 @@ def __init__(self):
 
     def mapid(self, idx, mtype, bsid=None, to='original'):  # Mapping to original IDs is standard for ligands
         if mtype == 'reversed':  # Needed to map internal ID back to original protein ID
-            return self.reversed_proteinmap[idx]
+            return self.reversed_proteinmap[idx]  # @todo: self.reversed_proteinmap must be defined inside class
         if mtype == 'protein':
             return self.proteinmap[idx]
         elif mtype == 'ligand':

From 1ffb4b7ab3c46f8413cfe4f49a12c674d032e7c8 Mon Sep 17 00:00:00 2001
From: Philipp Schake <philipp@bioinf-mbp4.biotec.tu-dresden.de>
Date: Mon, 4 Nov 2024 15:05:22 +0100
Subject: [PATCH 04/14] add local to ignore

---
 .gitignore                    | 1 +
 plip/structure/preparation.py | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2516804..a193256 100644
--- a/.gitignore
+++ b/.gitignore
@@ -48,3 +48,4 @@ target/
 # Other
 *~
 .nfs*
+tests/
\ No newline at end of file
diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py
index d392127..c1dc081 100644
--- a/plip/structure/preparation.py
+++ b/plip/structure/preparation.py
@@ -1454,9 +1454,9 @@ def analyze(self):
     def atom_belongs_to_ligand(atom, lig_obj):
         """test whether atom is part of a peptide or residue ligand."""
         if atom.OBAtom.GetResidue().GetChain() == lig_obj.chain:
-            return True
-        if lig_obj.chain not in config.RESIDUES.keys() and atom.OBAtom.GetResidue().GetNum() in config.RESIDUES[lig_obj.chain]:
-            return True
+            if lig_obj.chain not in config.RESIDUES.keys() or atom.OBAtom.GetResidue().GetNum() in config.RESIDUES[
+                lig_obj.chain]:
+                return True
         return False
 
     def characterize_complex(self, ligand):

From c22be4c8472098ab2bf4aec2b01bd1649fdc0d6a Mon Sep 17 00:00:00 2001
From: Philipp Schake <philipp@bioinf-mbp4.biotec.tu-dresden.de>
Date: Thu, 7 Nov 2024 13:23:08 +0100
Subject: [PATCH 05/14] Start implementing chains selection

---
 plip/plipcmd.py               | 35 ++++++++++----
 plip/structure/preparation.py | 89 +++++++++++++++++++++++++----------
 2 files changed, 91 insertions(+), 33 deletions(-)

diff --git a/plip/plipcmd.py b/plip/plipcmd.py
index 64c114f..fef06a6 100644
--- a/plip/plipcmd.py
+++ b/plip/plipcmd.py
@@ -10,6 +10,7 @@
 import multiprocessing
 import os
 import sys
+import ast
 from argparse import ArgumentParser
 from collections import namedtuple
 
@@ -49,8 +50,9 @@ def residue_list(input_string):
             result.append(int(part))
     return result
 
-def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report'):
-    """Analysis of a single PDB file. Can generate textual reports XML, PyMOL session files and images as output."""
+def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report', chains=None):
+    """Analysis of a single PDB file with optional chain filtering."""
+    print(chains)
     if not as_string:
         pdb_file_name = pdbfile.split('/')[-1]
         startmessage = f'starting analysis of {pdb_file_name}'
@@ -59,8 +61,7 @@ def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report'):
     logger.info(startmessage)
     mol = PDBComplex()
     mol.output_path = outpath
-    mol.load_pdb(pdbfile, as_string=as_string)
-    # @todo Offers possibility for filter function from command line (by ligand chain, position, hetid)
+    mol.load_pdb(pdbfile, as_string=as_string, chains=chains)  # Pass chains here
     for ligand in mol.ligands:
         mol.characterize_complex(ligand)
 
@@ -126,7 +127,7 @@ def remove_duplicates(slist):
     return unique
 
 
-def run_analysis(inputstructs, inputpdbids):
+def run_analysis(inputstructs, inputpdbids, chains=None):
     """Main function. Calls functions for processing, report generation and visualization."""
     pdbid, pdbpath = None, None
     # @todo For multiprocessing, implement better stacktracing for errors
@@ -157,7 +158,7 @@ def run_analysis(inputstructs, inputpdbids):
                     basename = inputstruct.split('.')[-2].split('/')[-1]
                     config.OUTPATH = '/'.join([config.BASEPATH, basename])
                     output_prefix = 'report'
-            process_pdb(inputstruct, config.OUTPATH, as_string=read_from_stdin, outputprefix=output_prefix)
+            process_pdb(inputstruct, config.OUTPATH, as_string=read_from_stdin, outputprefix=output_prefix, chains=chains)
     else:  # Try to fetch the current PDB structure(s) directly from the RCBS server
         num_pdbids = len(inputpdbids)
         inputpdbids = remove_duplicates(inputpdbids)
@@ -166,7 +167,7 @@ def run_analysis(inputstructs, inputpdbids):
             if num_pdbids > 1:
                 config.OUTPATH = '/'.join([config.BASEPATH, pdbid[1:3].upper(), pdbid.upper()])
                 output_prefix = 'report'
-            process_pdb(pdbpath, config.OUTPATH, outputprefix=output_prefix)
+            process_pdb(pdbpath, config.OUTPATH, outputprefix=output_prefix, chains=chains)
 
     if (pdbid is not None or inputstructs is not None) and config.BASEPATH is not None:
         if config.BASEPATH in ['.', './']:
@@ -254,6 +255,13 @@ def main():
     for t in thresholds:
         parser.add_argument('--%s' % t.name, dest=t.name, type=lambda val: threshold_limiter(parser, val),
                             help=argparse.SUPPRESS)
+
+    # Add argument to define receptor and ligand chains
+    parser.add_argument("--chains", dest="chains", type=str,
+                        help="Specify chains as receptor/ligand groups, e.g., '[['A'], ['B']]'. "
+                             "Use format [['A'], ['B', 'C']] to define A as receptor, and B, C as ligands.")
+
+
     arguments = parser.parse_args()
     # make sure, residues is only used together with --inter (could be expanded to --intra in the future)
     if arguments.residues and not (arguments.peptides or arguments.intra):
@@ -296,6 +304,17 @@ def main():
     config.OUTPUTFILENAME = arguments.outputfilename
     config.NOHYDRO = arguments.nohydro
     config.MODEL = arguments.model
+
+
+    try:
+        config.CHAINS = ast.literal_eval(arguments.chains) if arguments.chains else None
+        if config.CHAINS and not all(isinstance(c, list) for c in config.CHAINS):
+            raise ValueError("Chains should be specified as a list of lists, e.g., '[['A'], ['B', 'C']]'.")
+    except (ValueError, SyntaxError):
+        parser.error("The --chains option must be in the format '[['A'], ['B', 'C']]'.")
+
+    chains = arguments.chains
+
     # Make sure we have pymol with --pics and --pymol
     if config.PICS or config.PYMOL:
         try:
@@ -325,7 +344,7 @@ def main():
     if not config.WATER_BRIDGE_OMEGA_MIN < config.WATER_BRIDGE_OMEGA_MAX:
         parser.error("The water bridge omega minimum angle has to be smaller than the water bridge omega maximum angle")
     expanded_path = tilde_expansion(arguments.input) if arguments.input is not None else None
-    run_analysis(expanded_path, arguments.pdbid)  # Start main script
+    run_analysis(expanded_path, arguments.pdbid, chains=chains)  # Start main script
 
 
 if __name__ == '__main__':
diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py
index c1dc081..11b6a4d 100644
--- a/plip/structure/preparation.py
+++ b/plip/structure/preparation.py
@@ -21,9 +21,10 @@
 
 
 class PDBParser:
-    def __init__(self, pdbpath: str, as_string: bool = False):
+    def __init__(self, pdbpath: str, as_string: bool = False,  chains: list = None):
         self.as_string: bool = as_string  # @expl: becomes only True if pdb file is read from stdin
         self.pdbpath: str = pdbpath  # @expl: full filename of downloaded (from pdb id) or provided pdb file. If pdb file was read from stdin, contains the content of the file.
+        self.chains = chains  # New chains argument to filter specified chains
         self.num_fixed_lines: int = 0
         self.covlinkage = namedtuple("covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2")
         # @expl: information from the LINK lines in the PDB file, conf1/2 refers to the altLoc indicator
@@ -40,20 +41,20 @@ def parse_pdb(self):
         III. Furthermore, covalent linkages between ligands and protein residues/other ligands are identified
         IV. Alternative conformations
         """
+        # Read lines from the PDB content
         if self.as_string:
-            fil = self.pdbpath.rstrip('\n').split('\n')  # Removing trailing newline character
+            fil = self.pdbpath.rstrip('\n').split('\n')  # Split content if reading from string
         else:
-            f = read(self.pdbpath)
-            fil = f.readlines()
-            f.close()
-        corrected_lines = []
+            with open(self.pdbpath, 'r') as f:
+                fil = f.readlines()
+
+        filtered_lines = []
         i, j = 0, 0  # idx and PDB numbering
         d = {}
         modres = set()
         covalent = []
         alt = []
         previous_ter = False
-
         model_dict = {0: list()}
 
         # Standard without fixing
@@ -61,31 +62,31 @@ def parse_pdb(self):
             if not config.PLUGIN_MODE:
                 lastnum = 0  # Atom numbering (has to be consecutive)
                 other_models = False
-                # Model 0 stores header and similar additional data
-                # or the full file if no MODEL entries exist in the file
                 current_model = 0
                 for line in fil:
+                    # Check if the line belongs to relevant chains
+                    if not self.is_relevant_chain(line):
+                        continue  # Skip lines not matching specified chains
+
                     corrected_line, newnum = self.fix_pdbline(line, lastnum)
                     if corrected_line is not None:
                         if corrected_line.startswith('MODEL'):
-                            # reset atom number when new model is encountered
                             lastnum = 0
-                            try:  # Get number of MODEL (1,2,3)
+                            try:
                                 model_num = int(corrected_line[10:14])
-                                # initialize storage for new model
                                 model_dict[model_num] = list()
                                 current_model = model_num
-                                if model_num > 1:  # MODEL 2,3,4 etc.
+                                if model_num > 1:
                                     other_models = True
                             except ValueError:
-                                logger.debug(f'ignoring invalid MODEL entry: {corrected_line}')
+                                logger.debug(f'Ignoring invalid MODEL entry: {corrected_line}')
                         else:
                             lastnum = newnum
                         model_dict[current_model].append(corrected_line)
-                # select model
+
                 try:
                     if other_models:
-                        logger.info(f'selecting model {config.MODEL} for analysis')
+                        logger.info(f'Selecting model {config.MODEL} for analysis')
                     corrected_pdb = ''.join(model_dict[0])
                     corrected_lines = model_dict[0]
                     if current_model > 0:
@@ -95,7 +96,7 @@ def parse_pdb(self):
                     corrected_pdb = ''.join(model_dict[1])
                     corrected_lines = model_dict[1]
                     config.MODEL = 1
-                    logger.warning('invalid model number specified, using first model instead')
+                    logger.warning('Invalid model number specified, using first model instead')
             else:
                 corrected_pdb = self.pdbpath
                 corrected_lines = fil
@@ -116,20 +117,34 @@ def parse_pdb(self):
                     j += 1
                 else:
                     i += 1
-                    j += 2  # @question: Why += 2?
-                d[i] = j  # @question: Why not the other way around d[j] = i?
+                    j += 2  # Adjust numbering after TER records
+                d[i] = j
                 previous_ter = False
-            # Numbering Changes at TER records
+
             if line.startswith("TER"):
                 previous_ter = True
-            # Get modified residues
+
             if line.startswith("MODRES"):
                 modres.add(line[12:15].strip())
-            # Get covalent linkages between ligands
+
             if line.startswith("LINK"):
                 covalent.append(self.get_linkage(line))
+
         return d, modres, covalent, alt, corrected_pdb
 
+    def is_relevant_chain(self, line):
+        """Helper to check if a line belongs to one of the specified chains."""
+        if not self.chains:
+            return True  # No filtering needed if chains are not specified
+
+        # Check if the line represents an ATOM or HETATM record and matches specified chains
+        if line.startswith(("ATOM", "HETATM")):
+            all_chains = self.chains[0] + self.chains[1]
+            chain_id = line[21]  # Column 22 holds the chain ID in PDB format
+            return any(chain_id in group for group in all_chains)  # Check if chain_id is in receptor/ligand groups
+
+        return True  # Include non-atom records by default
+
     def fix_pdbline(self, pdbline, lastnum):
         """Fix a PDB line if information is missing."""
         pdbqt_conversion = {
@@ -240,6 +255,12 @@ def __init__(self, proteincomplex, altconf, modres, covalent, mapper):
         self.ligands = self.getligs()
         self.excluded = sorted(list(self.lignames_all.difference(set(self.lignames_kept))))
 
+    def filter_chains(self, receptor_chains, ligand_chains):
+        """Filter proteincomplex chains to include only specified receptors and ligands."""
+        self.receptor_residues = [res for res in self.proteincomplex.OBMol if res.GetChain() in receptor_chains]
+        self.ligand_residues = [res for res in self.proteincomplex.OBMol if res.GetChain() in ligand_chains]
+        logger.debug(f"Filtered to receptor chains: {receptor_chains}, ligand chains: {ligand_chains}")
+
     def getpeptides(self, chain):
         """If peptide ligand chains are defined via the command line options,
         try to extract the underlying ligand formed by all residues in the
@@ -259,7 +280,7 @@ def getligs(self):
         Returns all non-empty ligands.
         """
 
-        if config.PEPTIDES == [] and config.INTRA is None:
+        if config.PEPTIDES == [] and config.INTRA is None and config.CHAINS is None:
             # Extract small molecule ligands (default)
             ligands = []
 
@@ -1348,7 +1369,7 @@ def __str__(self):
         return "Protein structure %s with ligands:\n" % (self.pymol_name) + "\n".join(
             [lig for lig in formatted_lig_names])
 
-    def load_pdb(self, pdbpath, as_string=False):
+    def load_pdb(self, pdbpath, as_string=False, chains=None):
         """Loads a pdb file with protein AND ligand(s), separates and prepares them.
         If specified 'as_string', the input is a PDB string instead of a path."""
         if as_string:
@@ -1359,7 +1380,7 @@ def load_pdb(self, pdbpath, as_string=False):
             self.sourcefiles['pdbcomplex.original'] = pdbpath
             self.sourcefiles['pdbcomplex'] = pdbpath
         self.information['pdbfixes'] = False
-        pdbparser = PDBParser(pdbpath, as_string=as_string)  # Parse PDB file to find errors and get additional data
+        pdbparser = PDBParser(pdbpath, as_string=as_string, chains=chains)  # Parse PDB file to find errors and get additional data
         # #@todo Refactor and rename here
         self.Mapper.proteinmap = pdbparser.proteinmap
         self.Mapper.reversed_proteinmap = {v: k for k, v in self.Mapper.proteinmap.items()}
@@ -1368,6 +1389,24 @@ def load_pdb(self, pdbpath, as_string=False):
         self.altconf = pdbparser.altconformations
         self.corrected_pdb = pdbparser.corrected_pdb
 
+
+
+        if chains:
+            receptors, ligands = chains[0], chains[1]
+            self.receptor_chains = receptors
+            self.ligand_chains = ligands
+            logger.info(f"Chains set as receptor: {receptors} and ligand: {ligands}")
+        else:
+            self.receptor_chains = None
+            self.ligand_chains = None
+        # Call LigandFinder with chain filtering if set
+        ligandfinder = LigandFinder(self.protcomplex, self.altconf, self.modres, self.covalent, self.Mapper)
+        if self.receptor_chains or self.ligand_chains:
+            ligandfinder.filter_chains(self.receptor_chains, self.ligand_chains)
+
+        self.ligands = ligandfinder.ligands
+        self.excluded = ligandfinder.excluded
+
         if not config.PLUGIN_MODE:
             if pdbparser.num_fixed_lines > 0:
                 logger.info(f'{pdbparser.num_fixed_lines} lines automatically fixed in PDB input file')

From fa17af0fd687c2c5410ce2a60f354339f09acb74 Mon Sep 17 00:00:00 2001
From: Philipp Schake <philipp@bioinf-mbp4.biotec.tu-dresden.de>
Date: Thu, 14 Nov 2024 11:58:11 +0100
Subject: [PATCH 06/14] First implementation of chains flag. Works for two
 chains (one L one R)

---
 .DS_Store                     | Bin 0 -> 6148 bytes
 plip/basic/config.py          |   2 +
 plip/plipcmd.py               |  12 ++-
 plip/structure/preparation.py | 154 +++++++++++++---------------------
 4 files changed, 66 insertions(+), 102 deletions(-)
 create mode 100644 .DS_Store

diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..224f00bf3542c34f1efa7d9446196c0fdab59cf4
GIT binary patch
literal 6148
zcmeHK%}T>S5Z>*NO(;SR3gRi?wPI_FMZAPqU%(VSsMN#+4aRI~QhO+cJb=EC590GU
zv%8T>_25OM%)rbyot@cqzm%O0V~o4gu)~<e7&AZ-D<+t41lLhlq@p8LKrZ*lf>|~W
z5)rKAQs93wfbXu(hAd(k%h~e#dke-%oaOEIJFir0^^JznFq+1V_aJ6o?&s6Y^(R;O
zX(&Vzl)fKa#L;YIZJmlV_v190$bvYGAm#cpPD3$s#WW2wS?j3<!!*p1wL72hcly0{
z%kFo3i<Uh<J_NtlJz6YGV|(x5<b3p)Jf-4AQOV&qCuLJ(39sNRohi)jk^sIVrZ0t`
zy-Avg^d6$+M&$;C!~iis46Hc=`r0#^Ykp2T9x*@+{GI{4A8b%WM`NK-UL9cM2mn}v
zuoUodmcSZYqoc7<2rD35l>(|#u15^6${}v+JV#@pP?a;ThYzlw%=LuA<<nt*Tc$Ja
zC?rb^5CdNsz}XMP!v25u^ZYN9$Pfd>z<M&kYXfiK!r63hT{$K8S{w8h6b17Ng`Xwh
i$W{!w*ot>Rr9j+92hh=2D1-<I{SlBfkRb;Clz|U-!ce#X

literal 0
HcmV?d00001

diff --git a/plip/basic/config.py b/plip/basic/config.py
index dd1fb98..bdd9673 100644
--- a/plip/basic/config.py
+++ b/plip/basic/config.py
@@ -32,6 +32,8 @@
 NOPDBCANMAP = False  # Skip calculation of mapping canonical atom order: PDB atom order
 NOHYDRO = False  # Do not add hydrogen bonds (in case already present in the structure)
 MODEL = 1  # The model to be selected for multi-model structures (default = 1).
+CHAINS = None # Define chains for protein-protein interaction detection
+
 
 # Configuration file for Protein-Ligand Interaction Profiler (PLIP)
 # Set thresholds for detection of interactions
diff --git a/plip/plipcmd.py b/plip/plipcmd.py
index fef06a6..3c7538a 100644
--- a/plip/plipcmd.py
+++ b/plip/plipcmd.py
@@ -50,9 +50,8 @@ def residue_list(input_string):
             result.append(int(part))
     return result
 
-def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report', chains=None):
+def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report'):
     """Analysis of a single PDB file with optional chain filtering."""
-    print(chains)
     if not as_string:
         pdb_file_name = pdbfile.split('/')[-1]
         startmessage = f'starting analysis of {pdb_file_name}'
@@ -61,7 +60,7 @@ def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report', chains
     logger.info(startmessage)
     mol = PDBComplex()
     mol.output_path = outpath
-    mol.load_pdb(pdbfile, as_string=as_string, chains=chains)  # Pass chains here
+    mol.load_pdb(pdbfile, as_string=as_string)
     for ligand in mol.ligands:
         mol.characterize_complex(ligand)
 
@@ -158,7 +157,7 @@ def run_analysis(inputstructs, inputpdbids, chains=None):
                     basename = inputstruct.split('.')[-2].split('/')[-1]
                     config.OUTPATH = '/'.join([config.BASEPATH, basename])
                     output_prefix = 'report'
-            process_pdb(inputstruct, config.OUTPATH, as_string=read_from_stdin, outputprefix=output_prefix, chains=chains)
+            process_pdb(inputstruct, config.OUTPATH, as_string=read_from_stdin, outputprefix=output_prefix)
     else:  # Try to fetch the current PDB structure(s) directly from the RCBS server
         num_pdbids = len(inputpdbids)
         inputpdbids = remove_duplicates(inputpdbids)
@@ -167,7 +166,7 @@ def run_analysis(inputstructs, inputpdbids, chains=None):
             if num_pdbids > 1:
                 config.OUTPATH = '/'.join([config.BASEPATH, pdbid[1:3].upper(), pdbid.upper()])
                 output_prefix = 'report'
-            process_pdb(pdbpath, config.OUTPATH, outputprefix=output_prefix, chains=chains)
+            process_pdb(pdbpath, config.OUTPATH, outputprefix=output_prefix)
 
     if (pdbid is not None or inputstructs is not None) and config.BASEPATH is not None:
         if config.BASEPATH in ['.', './']:
@@ -313,7 +312,6 @@ def main():
     except (ValueError, SyntaxError):
         parser.error("The --chains option must be in the format '[['A'], ['B', 'C']]'.")
 
-    chains = arguments.chains
 
     # Make sure we have pymol with --pics and --pymol
     if config.PICS or config.PYMOL:
@@ -344,7 +342,7 @@ def main():
     if not config.WATER_BRIDGE_OMEGA_MIN < config.WATER_BRIDGE_OMEGA_MAX:
         parser.error("The water bridge omega minimum angle has to be smaller than the water bridge omega maximum angle")
     expanded_path = tilde_expansion(arguments.input) if arguments.input is not None else None
-    run_analysis(expanded_path, arguments.pdbid, chains=chains)  # Start main script
+    run_analysis(expanded_path, arguments.pdbid)  # Start main script
 
 
 if __name__ == '__main__':
diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py
index 11b6a4d..cdfaed0 100644
--- a/plip/structure/preparation.py
+++ b/plip/structure/preparation.py
@@ -12,7 +12,7 @@
 from plip.basic.supplemental import centroid, tilde_expansion, tmpfile, classify_by_name
 from plip.basic.supplemental import cluster_doubles, is_lig, normalize_vector, vector, ring_is_planar
 from plip.basic.supplemental import extract_pdbid, read_pdb, create_folder_if_not_exists, canonicalize
-from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance, residue_belongs_to_ligand
+from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance
 from plip.basic.supplemental import whichchain, whichrestype, whichresnumber, euclidean3d, int32_to_negative
 from plip.structure.detection import halogen, pication, water_bridges, metal_complexation
 from plip.structure.detection import hydrophobic_interactions, pistacking, hbonds, saltbridge
@@ -21,16 +21,12 @@
 
 
 class PDBParser:
-    def __init__(self, pdbpath: str, as_string: bool = False,  chains: list = None):
-        self.as_string: bool = as_string  # @expl: becomes only True if pdb file is read from stdin
-        self.pdbpath: str = pdbpath  # @expl: full filename of downloaded (from pdb id) or provided pdb file. If pdb file was read from stdin, contains the content of the file.
-        self.chains = chains  # New chains argument to filter specified chains
-        self.num_fixed_lines: int = 0
+    def __init__(self, pdbpath, as_string):
+        self.as_string = as_string
+        self.pdbpath = pdbpath
+        self.num_fixed_lines = 0
         self.covlinkage = namedtuple("covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2")
-        # @expl: information from the LINK lines in the PDB file, conf1/2 refers to the altLoc indicator
         self.proteinmap, self.modres, self.covalent, self.altconformations, self.corrected_pdb = self.parse_pdb()
-        # @expl: proteinmap:
-        # @expl: covalent: list of all covlinkage namedtuples
 
     def parse_pdb(self):
         """Extracts additional information from PDB files.
@@ -41,20 +37,20 @@ def parse_pdb(self):
         III. Furthermore, covalent linkages between ligands and protein residues/other ligands are identified
         IV. Alternative conformations
         """
-        # Read lines from the PDB content
         if self.as_string:
-            fil = self.pdbpath.rstrip('\n').split('\n')  # Split content if reading from string
+            fil = self.pdbpath.rstrip('\n').split('\n')  # Removing trailing newline character
         else:
-            with open(self.pdbpath, 'r') as f:
-                fil = f.readlines()
-
-        filtered_lines = []
+            f = read(self.pdbpath)
+            fil = f.readlines()
+            f.close()
+        corrected_lines = []
         i, j = 0, 0  # idx and PDB numbering
         d = {}
         modres = set()
         covalent = []
         alt = []
         previous_ter = False
+
         model_dict = {0: list()}
 
         # Standard without fixing
@@ -62,31 +58,31 @@ def parse_pdb(self):
             if not config.PLUGIN_MODE:
                 lastnum = 0  # Atom numbering (has to be consecutive)
                 other_models = False
+                # Model 0 stores header and similar additional data
+                # or the full file if no MODEL entries exist in the file
                 current_model = 0
                 for line in fil:
-                    # Check if the line belongs to relevant chains
-                    if not self.is_relevant_chain(line):
-                        continue  # Skip lines not matching specified chains
-
                     corrected_line, newnum = self.fix_pdbline(line, lastnum)
                     if corrected_line is not None:
                         if corrected_line.startswith('MODEL'):
+                            # reset atom number when new model is encountered
                             lastnum = 0
-                            try:
+                            try:  # Get number of MODEL (1,2,3)
                                 model_num = int(corrected_line[10:14])
+                                # initialize storage for new model
                                 model_dict[model_num] = list()
                                 current_model = model_num
-                                if model_num > 1:
+                                if model_num > 1:  # MODEL 2,3,4 etc.
                                     other_models = True
                             except ValueError:
-                                logger.debug(f'Ignoring invalid MODEL entry: {corrected_line}')
+                                logger.debug(f'ignoring invalid MODEL entry: {corrected_line}')
                         else:
                             lastnum = newnum
                         model_dict[current_model].append(corrected_line)
-
+                # select model
                 try:
                     if other_models:
-                        logger.info(f'Selecting model {config.MODEL} for analysis')
+                        logger.info(f'selecting model {config.MODEL} for analysis')
                     corrected_pdb = ''.join(model_dict[0])
                     corrected_lines = model_dict[0]
                     if current_model > 0:
@@ -96,7 +92,7 @@ def parse_pdb(self):
                     corrected_pdb = ''.join(model_dict[1])
                     corrected_lines = model_dict[1]
                     config.MODEL = 1
-                    logger.warning('Invalid model number specified, using first model instead')
+                    logger.warning('invalid model number specified, using first model instead')
             else:
                 corrected_pdb = self.pdbpath
                 corrected_lines = fil
@@ -117,34 +113,20 @@ def parse_pdb(self):
                     j += 1
                 else:
                     i += 1
-                    j += 2  # Adjust numbering after TER records
+                    j += 2
                 d[i] = j
                 previous_ter = False
-
+            # Numbering Changes at TER records
             if line.startswith("TER"):
                 previous_ter = True
-
+            # Get modified residues
             if line.startswith("MODRES"):
                 modres.add(line[12:15].strip())
-
+            # Get covalent linkages between ligands
             if line.startswith("LINK"):
                 covalent.append(self.get_linkage(line))
-
         return d, modres, covalent, alt, corrected_pdb
 
-    def is_relevant_chain(self, line):
-        """Helper to check if a line belongs to one of the specified chains."""
-        if not self.chains:
-            return True  # No filtering needed if chains are not specified
-
-        # Check if the line represents an ATOM or HETATM record and matches specified chains
-        if line.startswith(("ATOM", "HETATM")):
-            all_chains = self.chains[0] + self.chains[1]
-            chain_id = line[21]  # Column 22 holds the chain ID in PDB format
-            return any(chain_id in group for group in all_chains)  # Check if chain_id is in receptor/ligand groups
-
-        return True  # Include non-atom records by default
-
     def fix_pdbline(self, pdbline, lastnum):
         """Fix a PDB line if information is missing."""
         pdbqt_conversion = {
@@ -255,19 +237,13 @@ def __init__(self, proteincomplex, altconf, modres, covalent, mapper):
         self.ligands = self.getligs()
         self.excluded = sorted(list(self.lignames_all.difference(set(self.lignames_kept))))
 
-    def filter_chains(self, receptor_chains, ligand_chains):
-        """Filter proteincomplex chains to include only specified receptors and ligands."""
-        self.receptor_residues = [res for res in self.proteincomplex.OBMol if res.GetChain() in receptor_chains]
-        self.ligand_residues = [res for res in self.proteincomplex.OBMol if res.GetChain() in ligand_chains]
-        logger.debug(f"Filtered to receptor chains: {receptor_chains}, ligand chains: {ligand_chains}")
-
     def getpeptides(self, chain):
         """If peptide ligand chains are defined via the command line options,
         try to extract the underlying ligand formed by all residues in the
         given chain without water
         """
         all_from_chain = [o for o in pybel.ob.OBResidueIter(
-            self.proteincomplex.OBMol) if o.GetChain() == chain and (chain not in config.RESIDUES.keys() or o.GetNum() in config.RESIDUES[chain])]  # All residues from chain
+            self.proteincomplex.OBMol) if o.GetChain() == chain]  # All residues from chain
         if len(all_from_chain) == 0:
             return None
         else:
@@ -308,8 +284,16 @@ def getligs(self):
         else:
             # Extract peptides from given chains
             self.water = [o for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) if o.GetResidueProperty(9)]
-            if config.PEPTIDES:
+            if config.PEPTIDES and not config.CHAINS:
                 peptide_ligands = [self.getpeptides(chain) for chain in config.PEPTIDES]
+
+            #Todo: Validate change here... Do we want to combine multiple chains to a single ligand?
+            # if yes can be easily added to the getpeptides function by flatten the resulting list - Philipp
+            elif config.CHAINS:
+                # chains is defined as list of list e.g. [['A'], ['B', 'C']] in which second list contains the
+                # ligand chains and the first one should be the receptor
+                peptide_ligands = [self.getpeptides(chain) for chain in config.CHAINS[1]]
+
             elif config.INTRA is not None:
                 peptide_ligands = [self.getpeptides(config.INTRA), ]
 
@@ -328,7 +312,7 @@ def extract_ligand(self, kmer):
         names = [x[0] for x in members]
         longname = '-'.join([x[0] for x in members])
 
-        if config.PEPTIDES:
+        if config.PEPTIDES or config.CHAINS:
             ligtype = 'PEPTIDE'
         elif config.INTRA is not None:
             ligtype = 'INTRA'
@@ -456,7 +440,7 @@ def identify_kmers(self, residues):
         """Using the covalent linkage information, find out which fragments/subunits form a ligand."""
 
         # Remove all those not considered by ligands and pairings including alternate conformations
-        ligdoubles = [[(link.id1, link.chain1, link.pos1),  # @question: What are ligdoubles? Why the word "doubles"?
+        ligdoubles = [[(link.id1, link.chain1, link.pos1),
                        (link.id2, link.chain2, link.pos2)] for link in
                       [c for c in self.covalent if c.id1 in self.lignames_kept and c.id2 in self.lignames_kept
                        and c.conf1 in ['A', ''] and c.conf2 in ['A', '']
@@ -482,7 +466,7 @@ def identify_kmers(self, residues):
             return res_kmers
 
 
-class Mapper:  # @question: Why do you need this mapper?
+class Mapper:
     """Provides functions for mapping atom IDs in the correct way"""
 
     def __init__(self):
@@ -492,7 +476,7 @@ def __init__(self):
 
     def mapid(self, idx, mtype, bsid=None, to='original'):  # Mapping to original IDs is standard for ligands
         if mtype == 'reversed':  # Needed to map internal ID back to original protein ID
-            return self.reversed_proteinmap[idx]  # @todo: self.reversed_proteinmap must be defined inside class
+            return self.reversed_proteinmap[idx]
         if mtype == 'protein':
             return self.proteinmap[idx]
         elif mtype == 'ligand':
@@ -956,10 +940,8 @@ def find_charged(self, mol):
         """If nucleic acids are part of the receptor, looks for negative charges in phosphate backbone"""
         data = namedtuple('pcharge', 'atoms atoms_orig_idx type center restype resnr reschain')
         a_set = []
-        # Iterate through all residue, exclude those in (part of) chains defined as peptides
-        for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol)]:
-            if residue_belongs_to_ligand(res, config):
-                continue
+        # Iterate through all residue, exclude those in chains defined as peptides
+        for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if not r.GetChain() in config.PEPTIDES]:
             if config.INTRA is not None:
                 if res.GetChain() != config.INTRA:
                     continue
@@ -1000,7 +982,7 @@ def find_charged(self, mol):
                         a_contributing.append(pybel.Atom(a))
                         a_contributing_orig_idx.append(self.Mapper.mapid(a.GetIdx(), mtype='protein'))
                 if not len(a_contributing) == 0:
-                    a_set.append(data(atoms=a_contributing,atoms_orig_idx=a_contributing_orig_idx, type='negative', 
+                    a_set.append(data(atoms=a_contributing,atoms_orig_idx=a_contributing_orig_idx, type='negative',
                                       center=centroid([ac.coords for ac in a_contributing]), restype=res.GetName(),
                                       resnr=res.GetNum(),
                                       reschain=res.GetChain()))
@@ -1369,7 +1351,7 @@ def __str__(self):
         return "Protein structure %s with ligands:\n" % (self.pymol_name) + "\n".join(
             [lig for lig in formatted_lig_names])
 
-    def load_pdb(self, pdbpath, as_string=False, chains=None):
+    def load_pdb(self, pdbpath, as_string=False):
         """Loads a pdb file with protein AND ligand(s), separates and prepares them.
         If specified 'as_string', the input is a PDB string instead of a path."""
         if as_string:
@@ -1380,7 +1362,7 @@ def load_pdb(self, pdbpath, as_string=False, chains=None):
             self.sourcefiles['pdbcomplex.original'] = pdbpath
             self.sourcefiles['pdbcomplex'] = pdbpath
         self.information['pdbfixes'] = False
-        pdbparser = PDBParser(pdbpath, as_string=as_string, chains=chains)  # Parse PDB file to find errors and get additional data
+        pdbparser = PDBParser(pdbpath, as_string=as_string)  # Parse PDB file to find errors and get additional data
         # #@todo Refactor and rename here
         self.Mapper.proteinmap = pdbparser.proteinmap
         self.Mapper.reversed_proteinmap = {v: k for k, v in self.Mapper.proteinmap.items()}
@@ -1389,24 +1371,6 @@ def load_pdb(self, pdbpath, as_string=False, chains=None):
         self.altconf = pdbparser.altconformations
         self.corrected_pdb = pdbparser.corrected_pdb
 
-
-
-        if chains:
-            receptors, ligands = chains[0], chains[1]
-            self.receptor_chains = receptors
-            self.ligand_chains = ligands
-            logger.info(f"Chains set as receptor: {receptors} and ligand: {ligands}")
-        else:
-            self.receptor_chains = None
-            self.ligand_chains = None
-        # Call LigandFinder with chain filtering if set
-        ligandfinder = LigandFinder(self.protcomplex, self.altconf, self.modres, self.covalent, self.Mapper)
-        if self.receptor_chains or self.ligand_chains:
-            ligandfinder.filter_chains(self.receptor_chains, self.ligand_chains)
-
-        self.ligands = ligandfinder.ligands
-        self.excluded = ligandfinder.excluded
-
         if not config.PLUGIN_MODE:
             if pdbparser.num_fixed_lines > 0:
                 logger.info(f'{pdbparser.num_fixed_lines} lines automatically fixed in PDB input file')
@@ -1489,15 +1453,6 @@ def analyze(self):
         for ligand in self.ligands:
             self.characterize_complex(ligand)
 
-    @staticmethod
-    def atom_belongs_to_ligand(atom, lig_obj):
-        """test whether atom is part of a peptide or residue ligand."""
-        if atom.OBAtom.GetResidue().GetChain() == lig_obj.chain:
-            if lig_obj.chain not in config.RESIDUES.keys() or atom.OBAtom.GetResidue().GetNum() in config.RESIDUES[
-                lig_obj.chain]:
-                return True
-        return False
-
     def characterize_complex(self, ligand):
         """Handles all basic functions for characterizing the interactions for one ligand"""
 
@@ -1528,7 +1483,7 @@ def characterize_complex(self, ligand):
                     if idx in self.Mapper.proteinmap and self.Mapper.mapid(idx, mtype='protein') not in self.altconf]
         if ligand.type == 'PEPTIDE':
             # If peptide, don't consider the peptide chain as part of the protein binding site
-            bs_atoms = [a for a in bs_atoms if not self.atom_belongs_to_ligand(a, lig_obj)]
+            bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() != lig_obj.chain]
         if ligand.type == 'INTRA':
             # Interactions within the chain
             bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() == lig_obj.chain]
@@ -1561,11 +1516,20 @@ def extract_bs(self, cutoff, ligcentroid, resis):
     @staticmethod
     def res_belongs_to_bs(res, cutoff, ligcentroid):
         """Check for each residue if its centroid is within a certain distance to the ligand centroid.
-        Additionally, checks if a residue belongs to a ligand."""
-        res_centroid = centroid([(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)])
+        Additionally checks if a residue belongs to a chain restricted by the user (e.g. by defining a peptide chain)"""
+        rescentroid = centroid([(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)])
         # Check geometry
-        near_enough = True if euclidean3d(res_centroid, ligcentroid) < cutoff else False
-        return near_enough and not residue_belongs_to_ligand(res, config)
+        near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False
+        # Check chain membership
+        if config.PEPTIDES:
+            restricted_chain = True if res.GetChain() in config.PEPTIDES else False
+        #Todo: Test if properly working
+        # Add restriction via chains flag
+        if config.CHAINS:
+            #print(config.CHAINS[0], res.GetChain(), res.GetChain() in config.CHAINS[0])
+            restricted_chain = True if res.GetChain() not in config.CHAINS[0] else False
+
+        return (near_enough and not restricted_chain)
 
     def get_atom(self, idx):
         return self.atoms[idx]
@@ -1576,4 +1540,4 @@ def output_path(self):
 
     @output_path.setter
     def output_path(self, path):
-        self._output_path = tilde_expansion(path)
+        self._output_path = tilde_expansion(path)
\ No newline at end of file

From 11a8da8dd1aebb1e3db9d2c5dbe1111995878717 Mon Sep 17 00:00:00 2001
From: Philipp Schake <philipp@bioinf-mbp4.biotec.tu-dresden.de>
Date: Thu, 5 Dec 2024 10:46:03 +0100
Subject: [PATCH 07/14] Added robustness for chains flag

---
 plip/structure/preparation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py
index d2faf3e..ff04617 100644
--- a/plip/structure/preparation.py
+++ b/plip/structure/preparation.py
@@ -1576,7 +1576,7 @@ def res_belongs_to_bs(res, cutoff, ligcentroid):
         # Check geometry
         near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False
         # Check chain membership
-        if config.PEPTIDES:
+        if config.PEPTIDES and not config.CHAINS:
             restricted_chain = True if res.GetChain() in config.PEPTIDES else False
         #Todo: Test if properly working
         # Add restriction via chains flag

From 04a7ed276f1712e97d0182a636ce43590ddea08f Mon Sep 17 00:00:00 2001
From: Sarah Naomi Bolz <sarah_naomi.bolz@tu-dresden.de>
Date: Thu, 5 Dec 2024 14:45:34 +0100
Subject: [PATCH 08/14] find charged residues only for chain that is defined as
 binding site

---
 plip/structure/preparation.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py
index ff04617..113741d 100644
--- a/plip/structure/preparation.py
+++ b/plip/structure/preparation.py
@@ -959,7 +959,11 @@ def find_charged(self, mol):
         data = namedtuple('pcharge', 'atoms atoms_orig_idx type center restype resnr reschain')
         a_set = []
         # Iterate through all residue, exclude those in chains defined as peptides
-        for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if not r.GetChain() in config.PEPTIDES]:
+        chain_config = config.CHAINS if config.CHAINS else [[], []]
+        for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if not r.GetChain() in config.PEPTIDES and not r.GetChain() in chain_config[1]]:
+            if config.CHAINS:
+                if res.GetChain() not in config.CHAINS[0]:
+                    continue
             if config.INTRA is not None:
                 if res.GetChain() != config.INTRA:
                     continue
@@ -1200,7 +1204,7 @@ def find_charged(self, all_atoms):
         """
         data = namedtuple('lcharge', 'atoms orig_atoms atoms_orig_idx type center fgroup')
         a_set = []
-        if not (config.INTRA or config.PEPTIDES):
+        if not (config.INTRA or config.PEPTIDES or config.CHAINS):
             for a in all_atoms:
                 a_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid)
                 a_orig = self.Mapper.id_to_atom(a_orig_idx)

From 4e95a2345fb575b7db430051c651d0c0c9b62a8d Mon Sep 17 00:00:00 2001
From: Sarah Naomi Bolz <sarah_naomi.bolz@tu-dresden.de>
Date: Thu, 5 Dec 2024 15:56:12 +0100
Subject: [PATCH 09/14] Joined the query whether a residue belongs to the
 target in one function.

---
 plip/basic/supplemental.py    | 14 ++++++++------
 plip/structure/preparation.py | 16 +++-------------
 2 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/plip/basic/supplemental.py b/plip/basic/supplemental.py
index 3d59740..3c8b9a2 100644
--- a/plip/basic/supplemental.py
+++ b/plip/basic/supplemental.py
@@ -55,12 +55,14 @@ def whichchain(atom):
     return atom.GetResidue().GetChain() if atom.GetResidue() is not None else None
 
 
-def residue_belongs_to_ligand(res, config):
-    """tests whether the residue is part of a peptide or residue ligand."""
-    if res.GetChain() in config.PEPTIDES:
-        if res.GetChain() not in config.RESIDUES.keys() or res.GetNum() in config.RESIDUES[res.GetChain()]:
-            return True
-    return False
+def residue_belongs_to_target(res, config):
+    """tests whether the residue is part of a peptide or residue ligand or is not defined as target."""
+    if config.CHAINS:
+        if config.CHAINS[0]:  # if target chains were given
+            return res.GetChain() in config.CHAINS[0]  # True if residue is part of target chains
+        if config.CHAINS[1]:  # if ligand but no target chains were given
+            return res.GetChain() not in config.CHAINS[1]  # True if residue is not part of ligand chains
+    return res.GetChain() not in config.PEPTIDES  # True if residue is not part of peptide ligand.
 
 
 #########################
diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py
index 113741d..784a458 100644
--- a/plip/structure/preparation.py
+++ b/plip/structure/preparation.py
@@ -14,6 +14,7 @@
 from plip.basic.supplemental import extract_pdbid, read_pdb, create_folder_if_not_exists, canonicalize
 from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance
 from plip.basic.supplemental import whichchain, whichrestype, whichresnumber, euclidean3d, int32_to_negative
+from plip.basic.supplemental import residue_belongs_to_target
 from plip.structure.detection import halogen, pication, water_bridges, metal_complexation
 from plip.structure.detection import hydrophobic_interactions, pistacking, hbonds, saltbridge
 
@@ -959,11 +960,7 @@ def find_charged(self, mol):
         data = namedtuple('pcharge', 'atoms atoms_orig_idx type center restype resnr reschain')
         a_set = []
         # Iterate through all residue, exclude those in chains defined as peptides
-        chain_config = config.CHAINS if config.CHAINS else [[], []]
-        for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if not r.GetChain() in config.PEPTIDES and not r.GetChain() in chain_config[1]]:
-            if config.CHAINS:
-                if res.GetChain() not in config.CHAINS[0]:
-                    continue
+        for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if residue_belongs_to_target(r, config)]:
             if config.INTRA is not None:
                 if res.GetChain() != config.INTRA:
                     continue
@@ -1579,16 +1576,9 @@ def res_belongs_to_bs(res, cutoff, ligcentroid):
         rescentroid = centroid([(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)])
         # Check geometry
         near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False
-        # Check chain membership
-        if config.PEPTIDES and not config.CHAINS:
-            restricted_chain = True if res.GetChain() in config.PEPTIDES else False
         #Todo: Test if properly working
         # Add restriction via chains flag
-        if config.CHAINS:
-            #print(config.CHAINS[0], res.GetChain(), res.GetChain() in config.CHAINS[0])
-            restricted_chain = True if res.GetChain() not in config.CHAINS[0] else False
-
-        return (near_enough and not restricted_chain)
+        return near_enough and residue_belongs_to_target(res, config)
 
     def get_atom(self, idx):
         return self.atoms[idx]

From acff5d55b251720f2ad4434bdc359fe67d93cfdd Mon Sep 17 00:00:00 2001
From: Philipp Schake <philipp@bioinf-mbp4.biotec.tu-dresden.de>
Date: Thu, 12 Dec 2024 08:44:18 +0100
Subject: [PATCH 10/14] Adapted and commented tests that failed with current
 version due to new bond prioritization and correction in wbridge omega angle
 compute

---
 plip/test/test_literature_validated.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/plip/test/test_literature_validated.py b/plip/test/test_literature_validated.py
index 6815b8c..144ff40 100644
--- a/plip/test/test_literature_validated.py
+++ b/plip/test/test_literature_validated.py
@@ -232,6 +232,9 @@ def test_1xdn(self):
         # Water bridges to Lys307, Arg309 and 111 from phosphate groups
         waterbridges = {wb.resnr for wb in s.water_bridges}
         # Water bridge to 60 not found due to prioritization
+        # Philipp: No interaction to 111 due to prioritization/omega angle compute correction
+        # --> Now two interactions with ASN92 which were before only one that is not checked here
+        # self.assertTrue({307, 309}.issubset(waterbridges))
         self.assertTrue({307, 309, 111}.issubset(waterbridges))
         # pi-stacking interaction with Phe209
         pistackres = {pistack.resnr for pistack in s.pistacking}
@@ -312,6 +315,8 @@ def test_4kya(self):
                 tmpmol.characterize_complex(ligand)
         s = tmpmol.interaction_sets[bsid]
         # Hydrogen bonds to Ala609
+        # Philipp: In this case the protein is the acceptor
+        # hbonds = {hbond.resnr for hbond in s.hbonds_pdon + s.hbonds_ldon}
         hbonds = {hbond.resnr for hbond in s.hbonds_pdon}
         self.assertTrue({609}.issubset(hbonds))
         # Saltbridge to Asp513
@@ -515,12 +520,16 @@ def test_1bju(self):
         s = tmpmol.interaction_sets[bsid]
         #@todo Publication show hydrogen bond interactions for Gly219
         # Hydrogen bonds to Ser190, Ser195, Gly219 and Asp189
+
+        #Philipp: Only Hbont to 189 + 195 are possible. Else, donors would be included in multiple interactions
         hbonds = {hbond.resnr for hbond in s.hbonds_pdon+s.hbonds_ldon}
-        self.assertTrue({189, 190, 195}.issubset(hbonds))
+        #self.assertTrue({189, 190, 195}.issubset(hbonds))
+        self.assertTrue({189, 195}.issubset(hbonds))
         # Water bridges to Ser190 and Val227
         # Water bridge to 190 not detected due to prioritization
+        # Philipp: Listen to the comment above
         waterbridges = {wb.resnr for wb in s.water_bridges}
-        self.assertTrue({227}.issubset(waterbridges))
+        #self.assertTrue({227}.issubset(waterbridges))
         # hydrophobic interaction of Leu99
         hydrophobics = {hydrophobic.resnr for hydrophobic in s.all_hydrophobic_contacts}
         self.assertTrue({99}.issubset(hydrophobics))
@@ -587,6 +596,8 @@ def test_2iuz(self):
         # Water bridges to Trp137
         # res nr 52 mentioned in alternative conformation not considered
         waterbridges = {wb.resnr for wb in s.water_bridges}
+        # Philipp: Now detects two wbridges with ARG57 because the omega angles are better
+        #self.assertTrue({57}.issubset(waterbridges))
         self.assertTrue({322}.issubset(waterbridges))
         # pi-stacking interaction with Trp384, Trp137 and Trp52
         pistackres = {pistack.resnr for pistack in s.pistacking}
@@ -701,6 +712,8 @@ def test_1hvi(self):
         # Water bridges
         waterbridges = {str(wb.resnr)+wb.reschain for wb in s.water_bridges}
         # #@todo Water bridge with 50B not detected
+        # Philipp: Water bridge with 50A not detected -> Angle prio leads to two wbridges with 50B
+        # self.assertTrue({'50B'}.issubset(waterbridges))
         self.assertTrue({'50A'}.issubset(waterbridges))  # Bridging Ile-B50 and Ile-A50 with ligand
         # pi-cation Interactions
         picat = {pication.resnr for pication in s.pication_laro}
@@ -752,4 +765,6 @@ def test_1hpx(self):
         # Water bridges
         waterbridges = {str(wb.resnr)+wb.reschain for wb in s.water_bridges}
         # Waterbridge with Gly27 is detected instead of Ala28/Asp29
+        # Philipp: Waterbridge with 50B not detected rather two towards 50A
+        # self.assertTrue({'50B', '29A'}.issubset(waterbridges))
         self.assertTrue({'50A', '50B', '29A'}.issubset(waterbridges))

From 54901194e7b310f44f574da0eec27fb4fce2ac89 Mon Sep 17 00:00:00 2001
From: Sarah Bolz <sarahbolz@bioinf-mba-6.local>
Date: Fri, 13 Dec 2024 13:57:41 +0100
Subject: [PATCH 11/14] fixed handling of cases where the same chain is given
 as ligand and as receptor.

---
 plip/basic/supplemental.py    | 12 +++++++-----
 plip/structure/preparation.py |  6 +++---
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/plip/basic/supplemental.py b/plip/basic/supplemental.py
index 3c8b9a2..6939ae5 100644
--- a/plip/basic/supplemental.py
+++ b/plip/basic/supplemental.py
@@ -55,13 +55,15 @@ def whichchain(atom):
     return atom.GetResidue().GetChain() if atom.GetResidue() is not None else None
 
 
-def residue_belongs_to_target(res, config):
-    """tests whether the residue is part of a peptide or residue ligand or is not defined as target."""
+def residue_belongs_to_receptor(res, config):
+    """tests whether the residue is defined as receptor and is not part of a peptide or residue ligand."""
     if config.CHAINS:
-        if config.CHAINS[0]:  # if target chains were given
-            return res.GetChain() in config.CHAINS[0]  # True if residue is part of target chains
-        if config.CHAINS[1]:  # if ligand but no target chains were given
+        if config.CHAINS[0] and config.CHAINS[1]:  # if receptor and ligand chains were given
+            return res.GetChain() in config.CHAINS[0] and res.GetChain() not in config.CHAINS[1]
+            # True if residue is part of receptor chains and not of ligand chains
+        if config.CHAINS[1]:  # if only ligand chains were given
             return res.GetChain() not in config.CHAINS[1]  # True if residue is not part of ligand chains
+        return False  # if only receptor chains were given or both is empty
     return res.GetChain() not in config.PEPTIDES  # True if residue is not part of peptide ligand.
 
 
diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py
index 784a458..71483eb 100644
--- a/plip/structure/preparation.py
+++ b/plip/structure/preparation.py
@@ -14,7 +14,7 @@
 from plip.basic.supplemental import extract_pdbid, read_pdb, create_folder_if_not_exists, canonicalize
 from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance
 from plip.basic.supplemental import whichchain, whichrestype, whichresnumber, euclidean3d, int32_to_negative
-from plip.basic.supplemental import residue_belongs_to_target
+from plip.basic.supplemental import residue_belongs_to_receptor
 from plip.structure.detection import halogen, pication, water_bridges, metal_complexation
 from plip.structure.detection import hydrophobic_interactions, pistacking, hbonds, saltbridge
 
@@ -960,7 +960,7 @@ def find_charged(self, mol):
         data = namedtuple('pcharge', 'atoms atoms_orig_idx type center restype resnr reschain')
         a_set = []
         # Iterate through all residue, exclude those in chains defined as peptides
-        for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if residue_belongs_to_target(r, config)]:
+        for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if residue_belongs_to_receptor(r, config)]:
             if config.INTRA is not None:
                 if res.GetChain() != config.INTRA:
                     continue
@@ -1578,7 +1578,7 @@ def res_belongs_to_bs(res, cutoff, ligcentroid):
         near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False
         #Todo: Test if properly working
         # Add restriction via chains flag
-        return near_enough and residue_belongs_to_target(res, config)
+        return near_enough and residue_belongs_to_receptor(res, config)
 
     def get_atom(self, idx):
         return self.atoms[idx]

From 54c26b3f49b2760dada97cae7bb698b9850c477d Mon Sep 17 00:00:00 2001
From: Philipp Schake <philipp@bioinf-mbp4.biotec.tu-dresden.de>
Date: Mon, 16 Dec 2024 11:24:30 +0100
Subject: [PATCH 12/14] removed requirement for inner quotation marks in chains
 flag

---
 plip/plipcmd.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/plip/plipcmd.py b/plip/plipcmd.py
index 03ef49c..b79b7df 100644
--- a/plip/plipcmd.py
+++ b/plip/plipcmd.py
@@ -304,13 +304,19 @@ def main():
     config.NOHYDRO = arguments.nohydro
     config.MODEL = arguments.model
 
-
     try:
-        config.CHAINS = ast.literal_eval(arguments.chains) if arguments.chains else None
+        # add inner quotes for python backend
+        if not arguments.chains:
+            config.CHAINS = None
+        else:
+            import re
+            quoted_input = re.sub(r'(?<!["\'])\b([a-zA-Z0-9_]+)\b(?!["\'])', r'"\1"', arguments.chains)
+            config.CHAINS = ast.literal_eval(quoted_input)
+        print(config.CHAINS)
         if config.CHAINS and not all(isinstance(c, list) for c in config.CHAINS):
-            raise ValueError("Chains should be specified as a list of lists, e.g., '[['A'], ['B', 'C']]'.")
+            raise ValueError("Chains should be specified as a list of lists, e.g., '[[A], [B, C]]'.")
     except (ValueError, SyntaxError):
-        parser.error("The --chains option must be in the format '[['A'], ['B', 'C']]'.")
+        parser.error("The --chains option must be in the format '[[A], [B, C]]'.")
 
 
     # Make sure we have pymol with --pics and --pymol

From 7569212c65c5f640cbd3d0b4f75bf7c44d212161 Mon Sep 17 00:00:00 2001
From: Philipp Schake <philipp@bioinf-mbp4.biotec.tu-dresden.de>
Date: Mon, 16 Dec 2024 11:50:39 +0100
Subject: [PATCH 13/14] changed visualization file naming for chains feature to
 show that lig is a peptide

---
 plip/visualization/visualize.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/plip/visualization/visualize.py b/plip/visualization/visualize.py
index 3918b5b..b419500 100644
--- a/plip/visualization/visualize.py
+++ b/plip/visualization/visualize.py
@@ -19,7 +19,7 @@ def visualize_in_pymol(plcomplex):
     pdbid = plcomplex.pdbid
     lig_members = plcomplex.lig_members
     chain = plcomplex.chain
-    if config.PEPTIDES:
+    if config.PEPTIDES or config.CHAINS:
         vis.ligname = 'PeptideChain%s' % plcomplex.chain
     if config.INTRA is not None:
         vis.ligname = 'Intra%s' % plcomplex.chain
@@ -100,7 +100,7 @@ def visualize_in_pymol(plcomplex):
         cmd.hide('cartoon', '%sLines' % plcomplex.pdbid)
         cmd.show('lines', '%sLines' % plcomplex.pdbid)
 
-    if config.PEPTIDES:
+    if config.PEPTIDES or config.CHAINS:
         filename = "%s_PeptideChain%s" % (pdbid.upper(), plcomplex.chain)
         if config.PYMOL:
             vis.save_session(config.OUTPATH, override=filename)

From b42655aa98669a931eb00b0b8ba3eb9a6634d3bd Mon Sep 17 00:00:00 2001
From: Sarah Naomi Bolz <sarah_naomi.bolz@tu-dresden.de>
Date: Mon, 16 Dec 2024 16:07:53 +0100
Subject: [PATCH 14/14] no smiles generation for chains flag and hydrophobic
 interactions refinement for chains flag.

---
 plip/structure/preparation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py
index 71483eb..cd1b081 100644
--- a/plip/structure/preparation.py
+++ b/plip/structure/preparation.py
@@ -752,7 +752,7 @@ def refine_hydrophobic(all_h, pistacks):
         hydroph = [h for h in sel2.values()]
         hydroph_final = []
         #  3. If a protein atom interacts with several neighboring ligand atoms, just keep the one with the closest dist
-        if config.PEPTIDES or config.INTRA:
+        if config.PEPTIDES or config.INTRA or config.CHAINS:
             # the ligand also consists of amino acid residues, repeat step 2 just the other way around
             sel3 = {}
             for h in hydroph:
@@ -1060,7 +1060,7 @@ def __init__(self, cclass, ligand):
         self.complex = cclass
         self.molecule = ligand.mol  # Pybel Molecule
         # get canonical SMILES String, but not for peptide ligand (tend to be too long -> openBabel crashes)
-        self.smiles = "" if (config.INTRA or config.PEPTIDES) else self.molecule.write(format='can')
+        self.smiles = "" if (config.INTRA or config.PEPTIDES or config.CHAINS) else self.molecule.write(format='can')
         self.inchikey = self.molecule.write(format='inchikey')
         self.can_to_pdb = ligand.can_to_pdb
         if not len(self.smiles) == 0: