Skip to content

Commit

Permalink
Merge pull request #165 from pharmai/development
Browse files Browse the repository at this point in the history
Release 2.4.0
  • Loading branch information
mestia authored Dec 18, 2024
2 parents 82c803c + 979e56d commit 172ae02
Show file tree
Hide file tree
Showing 10 changed files with 221 additions and 123 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,4 @@ target/
# Other
*~
.nfs*
tests/
5 changes: 5 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
Changelog
---------
# 2.4.0
* new "--chains" flag to enable detection of interactions between protein chains by @PhiCMS and @snbolz
* update setup.py, attempt to fix and install broken python openbabel bindings 3.1.1.1
* update Dockerfile

# 2.3.1
* fixes in interaction detection with peptide ligands by @kalinni in #153
* fix in hydrogen bond acceptor identification by @kalinni in #151
Expand Down
81 changes: 9 additions & 72 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,86 +1,23 @@
FROM ubuntu:18.04 AS builder

ARG OPENBABEL_TAG="openbabel-3-0-0"
ARG RDKIT_TAG="Release_2019_09_3"
ARG MMTF_TAG="v1.0.0"
ARG PYMOL_TAG="v2.3.0"
FROM debian:stable

LABEL maintainer="PharmAI GmbH <contact@pharm.ai>" \
org.label-schema.name="PLIP: The Protein-Ligand Interaction Profiler" \
org.label-schema.description="https://www.doi.org/10.1093/nar/gkv315"

ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
cmake \
git \
g++ \
imagemagick \
libboost-all-dev \
libeigen3-dev \
libfreetype6-dev \
libghc-zlib-dev \
libglew-dev \
libglm-dev \
libmsgpack-dev \
libnetcdf-dev \
libxml2-dev \
libpng-dev \
libpython3-all-dev \
python3 \
pymol \
python3-distutils \
python3-lxml \
python3-numpy \
python3-pyqt5.qtopengl \
swig

# build OpenBabel
WORKDIR /src
RUN git clone -b $OPENBABEL_TAG \
https://github.com/openbabel/openbabel.git
WORKDIR /src/openbabel/build
RUN cmake .. \
-DPYTHON_EXECUTABLE=/usr/bin/python3.6 \
-DPYTHON_BINDINGS=ON \
-DCMAKE_INSTALL_PREFIX=/usr/local \
-DRUN_SWIG=ON
RUN make -j$(nproc --all) install

# build RDkit
#WORKDIR /src
#RUN git clone -b $RDKIT_TAG \
# https://github.com/rdkit/rdkit.git
#WORKDIR /src/rdkit/build
#RUN cmake .. \
# -DCMAKE_BUILD_TYPE=Release \
# -DRDK_BUILD_INCHI_SUPPORT=ON \
# -DPYTHON_LIBRARY=/usr/lib/x86_64-linux-gnu/libpython3.6m.so \
# -DPYTHON_INCLUDE_DIR=/usr/include/python3.6 \
# -DPYTHON_EXECUTABLE=/usr/bin/python3.6
#RUN make -j$(nproc --all) install
#ENV RDBASE /src/rdkit
#ENV LD_LIBRARY_PATH /usr/local/lib/:$RDBASE/lib
#ENV PYTHONPATH $PYTHONPATH:$RDBASE

# build mmtf-cpp
WORKDIR /src
RUN git clone -b $MMTF_TAG \
https://github.com/rcsb/mmtf-cpp.git
WORKDIR /src/mmtf-cpp/build
RUN cmake ..
RUN make -j$(nproc --all) install

# build PyMOL
WORKDIR /src
RUN git clone -b $PYMOL_TAG \
https://github.com/schrodinger/pymol-open-source
WORKDIR /src/pymol-open-source
RUN python3 setup.py build install \
--prefix=/usr/local \
--jobs=$(nproc --all)
python3-openbabel \
python3-pymol; \
apt-get clean && rm -rf /var/lib/apt/lists/*

# copy PLIP source code
WORKDIR /src
ADD plip/ plip/
RUN chmod +x plip/plipcmd.py
ENV PYTHONPATH $PYTHONPATH:/src
ENV PYTHONPATH=/src

# execute tests
WORKDIR /src/plip/test
Expand All @@ -89,4 +26,4 @@ RUN ./run_all_tests.sh
WORKDIR /

# set entry point to plipcmd.py
ENTRYPOINT ["python3", "/src/plip/plipcmd.py"]
ENTRYPOINT ["python3", "/src/plip/plipcmd.py"]
5 changes: 4 additions & 1 deletion plip/basic/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '2.3.1'
__version__ = '2.4.0'
__maintainer__ = 'PharmAI GmbH (2020-2021) - www.pharm.ai - hello@pharm.ai'
__citation_information__ = "Adasme,M. et al. PLIP 2021: expanding the scope of the protein-ligand interaction profiler to DNA and RNA. " \
"Nucl. Acids Res. (05 May 2021), gkab294. doi: 10.1093/nar/gkab294"
Expand All @@ -25,12 +25,15 @@
NOFIXFILE = False # Turn off writing to files for fixed PDB structures
PEPTIDES = [] # Definition which chains should be considered as peptide ligands
INTRA = None
RESIDUES = {}
KEEPMOD = False
DNARECEPTOR = False
OUTPUTFILENAME = "report" # Naming for the TXT and XML report files
NOPDBCANMAP = False # Skip calculation of mapping canonical atom order: PDB atom order
NOHYDRO = False # Do not add hydrogen bonds (in case already present in the structure)
MODEL = 1 # The model to be selected for multi-model structures (default = 1).
CHAINS = None # Define chains for protein-protein interaction detection


# Configuration file for Protein-Ligand Interaction Profiler (PLIP)
# Set thresholds for detection of interactions
Expand Down
12 changes: 12 additions & 0 deletions plip/basic/supplemental.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,18 @@ def whichchain(atom):
return atom.GetResidue().GetChain() if atom.GetResidue() is not None else None


def residue_belongs_to_receptor(res, config):
"""tests whether the residue is defined as receptor and is not part of a peptide or residue ligand."""
if config.CHAINS:
if config.CHAINS[0] and config.CHAINS[1]: # if receptor and ligand chains were given
return res.GetChain() in config.CHAINS[0] and res.GetChain() not in config.CHAINS[1]
# True if residue is part of receptor chains and not of ligand chains
if config.CHAINS[1]: # if only ligand chains were given
return res.GetChain() not in config.CHAINS[1] # True if residue is not part of ligand chains
return False # if only receptor chains were given or both is empty
return res.GetChain() not in config.PEPTIDES # True if residue is not part of peptide ligand.


#########################
# Mathematical operations
#########################
Expand Down
54 changes: 48 additions & 6 deletions plip/plipcmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import multiprocessing
import os
import sys
import ast
from argparse import ArgumentParser
from collections import namedtuple

Expand Down Expand Up @@ -38,9 +39,19 @@ def threshold_limiter(aparser, arg):
aparser.error("All thresholds have to be values larger than zero.")
return arg

def residue_list(input_string):
"""Parse mix of residue numbers and ranges passed with the --residues flag into one list"""
result = []
for part in input_string.split(','):
if '-' in part:
start, end = map(int, part.split('-'))
result.extend(range(start, end + 1))
else:
result.append(int(part))
return result

def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report'):
"""Analysis of a single PDB file. Can generate textual reports XML, PyMOL session files and images as output."""
"""Analysis of a single PDB file with optional chain filtering."""
if not as_string:
pdb_file_name = pdbfile.split('/')[-1]
startmessage = f'starting analysis of {pdb_file_name}'
Expand All @@ -50,7 +61,6 @@ def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report'):
mol = PDBComplex()
mol.output_path = outpath
mol.load_pdb(pdbfile, as_string=as_string)
# @todo Offers possibility for filter function from command line (by ligand chain, position, hetid)
for ligand in mol.ligands:
mol.characterize_complex(ligand)

Expand Down Expand Up @@ -116,7 +126,7 @@ def remove_duplicates(slist):
return unique


def run_analysis(inputstructs, inputpdbids):
def run_analysis(inputstructs, inputpdbids, chains=None):
"""Main function. Calls functions for processing, report generation and visualization."""
pdbid, pdbpath = None, None
# @todo For multiprocessing, implement better stacktracing for errors
Expand All @@ -127,16 +137,16 @@ def run_analysis(inputstructs, inputpdbids):
output_prefix = config.OUTPUTFILENAME

if inputstructs is not None: # Process PDB file(s)
num_structures = len(inputstructs)
num_structures = len(inputstructs) # @question: how can it become more than one file? The tilde_expansion function does not consider this case.
inputstructs = remove_duplicates(inputstructs)
read_from_stdin = False
for inputstruct in inputstructs:
if inputstruct == '-':
if inputstruct == '-': # @expl: when user gives '-' as input, pdb file is read from stdin
inputstruct = sys.stdin.read()
read_from_stdin = True
if config.RAWSTRING:
if sys.version_info < (3,):
inputstruct = bytes(inputstruct).decode('unicode_escape')
inputstruct = bytes(inputstruct).decode('unicode_escape') # @expl: in Python2, the bytes object is just a string.
else:
inputstruct = bytes(inputstruct, 'utf8').decode('unicode_escape')
else:
Expand Down Expand Up @@ -218,6 +228,9 @@ def main():
help="Allows to define one or multiple chains as peptide ligands or to detect inter-chain contacts",
nargs="+")
ligandtype.add_argument("--intra", dest="intra", help="Allows to define one chain to analyze intra-chain contacts.")
parser.add_argument("--residues", dest="residues", default=[], nargs="+",
help="""Allows to specify which residues of the chain(s) should be considered as peptide ligands.
Give single residues (separated with comma) or ranges (with dash) or both, for several chains separate selections with one space""")
parser.add_argument("--keepmod", dest="keepmod", default=False,
help="Keep modified residues as ligands",
action="store_true")
Expand All @@ -241,7 +254,19 @@ def main():
for t in thresholds:
parser.add_argument('--%s' % t.name, dest=t.name, type=lambda val: threshold_limiter(parser, val),
help=argparse.SUPPRESS)

# Add argument to define receptor and ligand chains
parser.add_argument("--chains", dest="chains", type=str,
help="Specify chains as receptor/ligand groups, e.g., '[['A'], ['B']]'. "
"Use format [['A'], ['B', 'C']] to define A as receptor, and B, C as ligands.")


arguments = parser.parse_args()
# make sure, residues is only used together with --inter (could be expanded to --intra in the future)
if arguments.residues and not (arguments.peptides or arguments.intra):
parser.error("The --residues option requires specification of a chain with --inter or --peptide")
if arguments.residues and len(arguments.residues)!=len(arguments.peptides):
parser.error("Please provide residue numbers or ranges for each chain specified. Separate selections with a single space.")
# configure log levels
config.VERBOSE = True if arguments.verbose else False
config.QUIET = True if arguments.quiet else False
Expand All @@ -268,6 +293,7 @@ def main():
config.BREAKCOMPOSITE = arguments.breakcomposite
config.ALTLOC = arguments.altlocation
config.PEPTIDES = arguments.peptides
config.RESIDUES = dict(zip(arguments.peptides, map(residue_list, arguments.residues)))
config.INTRA = arguments.intra
config.NOFIX = arguments.nofix
config.NOFIXFILE = arguments.nofixfile
Expand All @@ -277,6 +303,22 @@ def main():
config.OUTPUTFILENAME = arguments.outputfilename
config.NOHYDRO = arguments.nohydro
config.MODEL = arguments.model

try:
# add inner quotes for python backend
if not arguments.chains:
config.CHAINS = None
else:
import re
quoted_input = re.sub(r'(?<!["\'])\b([a-zA-Z0-9_]+)\b(?!["\'])', r'"\1"', arguments.chains)
config.CHAINS = ast.literal_eval(quoted_input)
print(config.CHAINS)
if config.CHAINS and not all(isinstance(c, list) for c in config.CHAINS):
raise ValueError("Chains should be specified as a list of lists, e.g., '[[A], [B, C]]'.")
except (ValueError, SyntaxError):
parser.error("The --chains option must be in the format '[[A], [B, C]]'.")


# Make sure we have pymol with --pics and --pymol
if config.PICS or config.PYMOL:
try:
Expand Down
33 changes: 21 additions & 12 deletions plip/structure/preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from plip.basic.supplemental import extract_pdbid, read_pdb, create_folder_if_not_exists, canonicalize
from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance
from plip.basic.supplemental import whichchain, whichrestype, whichresnumber, euclidean3d, int32_to_negative
from plip.basic.supplemental import residue_belongs_to_receptor
from plip.structure.detection import halogen, pication, water_bridges, metal_complexation
from plip.structure.detection import hydrophobic_interactions, pistacking, hbonds, saltbridge

Expand Down Expand Up @@ -256,7 +257,7 @@ def getligs(self):
Returns all non-empty ligands.
"""

if config.PEPTIDES == [] and config.INTRA is None:
if config.PEPTIDES == [] and config.INTRA is None and config.CHAINS is None:
# Extract small molecule ligands (default)
ligands = []

Expand Down Expand Up @@ -284,8 +285,16 @@ def getligs(self):
else:
# Extract peptides from given chains
self.water = [o for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) if o.GetResidueProperty(9)]
if config.PEPTIDES:
if config.PEPTIDES and not config.CHAINS:
peptide_ligands = [self.getpeptides(chain) for chain in config.PEPTIDES]

#Todo: Validate change here... Do we want to combine multiple chains to a single ligand?
# if yes can be easily added to the getpeptides function by flatten the resulting list - Philipp
elif config.CHAINS:
# chains is defined as list of list e.g. [['A'], ['B', 'C']] in which second list contains the
# ligand chains and the first one should be the receptor
peptide_ligands = [self.getpeptides(chain) for chain in config.CHAINS[1]]

elif config.INTRA is not None:
peptide_ligands = [self.getpeptides(config.INTRA), ]

Expand All @@ -304,7 +313,7 @@ def extract_ligand(self, kmer):
names = [x[0] for x in members]
longname = '-'.join([x[0] for x in members])

if config.PEPTIDES:
if config.PEPTIDES or config.CHAINS:
ligtype = 'PEPTIDE'
elif config.INTRA is not None:
ligtype = 'INTRA'
Expand Down Expand Up @@ -743,7 +752,7 @@ def refine_hydrophobic(all_h, pistacks):
hydroph = [h for h in sel2.values()]
hydroph_final = []
# 3. If a protein atom interacts with several neighboring ligand atoms, just keep the one with the closest dist
if config.PEPTIDES or config.INTRA:
if config.PEPTIDES or config.INTRA or config.CHAINS:
# the ligand also consists of amino acid residues, repeat step 2 just the other way around
sel3 = {}
for h in hydroph:
Expand Down Expand Up @@ -951,7 +960,7 @@ def find_charged(self, mol):
data = namedtuple('pcharge', 'atoms atoms_orig_idx type center restype resnr reschain')
a_set = []
# Iterate through all residue, exclude those in chains defined as peptides
for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if not r.GetChain() in config.PEPTIDES]:
for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if residue_belongs_to_receptor(r, config)]:
if config.INTRA is not None:
if res.GetChain() != config.INTRA:
continue
Expand Down Expand Up @@ -992,7 +1001,7 @@ def find_charged(self, mol):
a_contributing.append(pybel.Atom(a))
a_contributing_orig_idx.append(self.Mapper.mapid(a.GetIdx(), mtype='protein'))
if not len(a_contributing) == 0:
a_set.append(data(atoms=a_contributing,atoms_orig_idx=a_contributing_orig_idx, type='negative',
a_set.append(data(atoms=a_contributing,atoms_orig_idx=a_contributing_orig_idx, type='negative',
center=centroid([ac.coords for ac in a_contributing]), restype=res.GetName(),
resnr=res.GetNum(),
reschain=res.GetChain()))
Expand Down Expand Up @@ -1051,7 +1060,7 @@ def __init__(self, cclass, ligand):
self.complex = cclass
self.molecule = ligand.mol # Pybel Molecule
# get canonical SMILES String, but not for peptide ligand (tend to be too long -> openBabel crashes)
self.smiles = "" if (config.INTRA or config.PEPTIDES) else self.molecule.write(format='can')
self.smiles = "" if (config.INTRA or config.PEPTIDES or config.CHAINS) else self.molecule.write(format='can')
self.inchikey = self.molecule.write(format='inchikey')
self.can_to_pdb = ligand.can_to_pdb
if not len(self.smiles) == 0:
Expand Down Expand Up @@ -1192,7 +1201,7 @@ def find_charged(self, all_atoms):
"""
data = namedtuple('lcharge', 'atoms orig_atoms atoms_orig_idx type center fgroup')
a_set = []
if not (config.INTRA or config.PEPTIDES):
if not (config.INTRA or config.PEPTIDES or config.CHAINS):
for a in all_atoms:
a_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid)
a_orig = self.Mapper.id_to_atom(a_orig_idx)
Expand Down Expand Up @@ -1567,9 +1576,9 @@ def res_belongs_to_bs(res, cutoff, ligcentroid):
rescentroid = centroid([(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)])
# Check geometry
near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False
# Check chain membership
restricted_chain = True if res.GetChain() in config.PEPTIDES else False
return (near_enough and not restricted_chain)
#Todo: Test if properly working
# Add restriction via chains flag
return near_enough and residue_belongs_to_receptor(res, config)

def get_atom(self, idx):
return self.atoms[idx]
Expand All @@ -1580,4 +1589,4 @@ def output_path(self):

@output_path.setter
def output_path(self, path):
self._output_path = tilde_expansion(path)
self._output_path = tilde_expansion(path)
Loading

0 comments on commit 172ae02

Please sign in to comment.