-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsetbetahelix.py
291 lines (246 loc) · 13.2 KB
/
setbetahelix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
import warnings
try:
from pymol import cmd
except ImportError:
warnings.warn(
'Cannot import PyMOL: functionality will suffer (you can ignore this if you are just building the documentation).')
import itertools
from .utils import set_dihedral
from typing import Union, List, Tuple, Optional, Iterable
from .secstructdb import SecondaryStructureDB
import re
def is_amino_acid(selection: str) -> bool:
"""Decide if the selection is a single amino-acid
:param selection: the selection
:type selection: str
:return: True or False
:rtype: bool
"""
if not cmd.count_atoms('({}) and name N and symbol N'.format(selection)) == 1:
# either does not contain a nitrogen called "N" (no amino-acid), or
# contains more of them (not a single residue)
return False
elif not cmd.count_atoms('({}) and name C and symbol C'.format(selection)) == 1:
# either does not contain a carbon called "C" (no amino-acid),
# or contains more of them (not a single residue)
return False
elif not cmd.count_atoms(
'({}) and (neighbor (name C and symbol C)) and (name CA and symbol C)'.format(selection)) == 1:
# the "C" atom must have a "CA" neighbour
return False
else:
return True
def is_beta(selection: str) -> bool:
"""Decide if a residue is a beta-amino acid
:param selection: the selection to analyze
:type selection: str
:return: True or False
:rtype: bool
"""
if not is_amino_acid(selection):
return False
# see if "CA" has a neighbour which is also the neighbour of "N" and has the name "CB" or "CB1"
if cmd.count_atoms(
"(neighbor (({0}) and name N)) and (neighbor (({0}) and name CA)) and ({0}) and name CB+CB1".format(
selection)) == 1:
return True
return False
def is_alpha(selection: str) -> bool:
"""Decide if a residue is an alpha-amino acid
:param selection: the selection to analyze
:type selection: str
:return: True or False
:rtype: bool
"""
if not is_amino_acid(selection):
return False
# see if "CA" and "N" are neighbours
if cmd.count_atoms("(neighbor (({0}) and name N)) and ({0}) and name CA".format(selection)) == 1:
return True
return False
def fold_bp(sstype: str, selection: str = '(all)'):
"""
DESCRIPTION
Adjust the torsion angles of an alpha- or beta-peptide to
fold it into the desired secondary structure
USAGE
fold_bp sstype [, selection]
ARGUMENTS
sstype = string: the desired secondary structure. The following possibilities exist:
1. a name from the secondary structure database (see command ssdb_list)
2. two or three (for alpha- and beta-amino acids, respectively) space-separated
floating point numbers in parentheses, corresponding to the backbone dihedral angles
The such given secondary structure will be applied to all residues in the selection.
Additionally, a space-separated list of the same length as the residues in the
selection can be given in square brackets, containing entry names or angle tuples
(or a mix of them), corresponding to the residues.
selection = the selection to operate on. Must be a single peptide chain with
unique and consecutive residue IDs (default: all)
EXAMPLES
fold_bp H14M, model valxval
fold_bp (-140.3 66.5 -136.8), model valxval
fold_bp [(-140.3 66.5 -136.8) (180 180 180) H14M], model tripeptide
SEE ALSO
ssdb_add, ssdb_del, ssdb_dihedrals, ssdb_resetdefaults, ssdb_list
"""
if isinstance(sstype, str):
# parse the string into a list of tuples of floats or Nones
sstype = parse_sstype(sstype)
elif isinstance(sstype, tuple):
# we should have a tuple of floats or Nones
if not all([isinstance(x, float) or x is None for x in sstype]):
raise ValueError('Error in secondary structure type: {}'.format(sstype))
sstype = itertools.cycle([sstype]) # use this for all residues
# otherwise try to use sstype as an iterable, producing tuples of floats or Nones
residues = list(sorted({a.resi_number for a in cmd.get_model(selection).atom}))
r = None
for r, angles in zip(residues, sstype):
phi, theta, psi = angles
if is_beta('({}) and resi {}'.format(selection, r)):
set_dihedral(selection, ('C', r - 1), ('N', r), ('CB+CB1', r), ('CA', r), phi)
set_dihedral(selection, ('N', r), ('CB+CB1', r), ('CA', r), ('C', r), theta)
set_dihedral(selection, ('CB+CB1', r), ('CA', r), ('C', r), ('N', r + 1), psi)
elif is_alpha('({}) and resi {}'.format(selection, r)):
set_dihedral(selection, ('C', r - 1), ('N', r), ('CA', r), ('C', r), phi)
set_dihedral(selection, ('N', r), ('CA', r), ('C', r), ('N', r + 1), psi)
else:
# not an amino acid, do nothing with this.
continue
if not r == max(residues):
# after the for loop, r must be the largest residue number. If this is not the case,
# too few angle triplets were given. Do nothing at present, just warn the user
print('Warning: not all residues have been processed (too few secondary structures given)')
cmd.unpick()
cmd.orient(selection)
def parse_sstype(sstype: Union[
str, Tuple[float, Optional[float], float], List[Union[str, Tuple[float, Optional[float], float]]]]) -> Iterable[
Tuple[float, Optional[float], float]]:
"""Parse the secondary structure information for beta- and alpha-peptides
The desired secondary structure type can be given in two ways:
1. three floating point numbers in parentheses, separated by space:
the backbone torsion angles. The middle one can be None for alpha-amino acids.
2. a name of an entry in the secondary structure database (simple string)
Additionally, a list can be given from any of the above, inside square brackets and
separated by whitespace. E.g.:
[ (-120 80 -136) H14M (130 50 45) ... ]
The thing is more complicated because PyMOL gives all parameters in string format.
"""
# now define some regular expressions which we will use
# a regular expression for floating point numbers, including the exponential form
float_regex = r"""([+-]\s*)? # optional sign and whitespace
( # start of the mantissa
(\d+(\.\d*)?) # one option: some digits, then optionally some decimals
| or
(\.\d+) # second option: only the decimals, led in by a decimal point
) # end of the mantissa
([eE][+-]?\d+)? # optionally, an exponent
"""
# a regular expression of a parenthesized part (without nested parentheses)
paren_regex = re.compile(r"\([^()]*\)")
sstuple_regex = re.compile(r""" #regular expression for dihedral angle tuples
(
\(\s*(?P<angle1>{0})\s*(?P<angle2>{0})\s*((?P<angle3>{0})\s*)?\) # two or three floats in parentheses, whitespace separated
)
| # or
(
\(\s*(?P<phi>{0})\s*(?P<theta>{0}|None)\s*(?P<psi>{0})\s*\) # three floats, the middle can be None, in parentheses, whitespace separated
)
""".format(float_regex),
re.VERBOSE)
def parse_parentuple(parenpart: str) -> Tuple[float, Optional[float], float]:
"""Parse a parenthesized part of a sec.structure definition string"""
m = sstuple_regex.match(parenpart)
if m is None:
raise ValueError('Invalid dihedral angle tuple: {}'.format(parenpart))
if m['angle1'] is not None and m['angle2'] is not None and m['angle3'] is not None:
# three angles
return (float(m['angle1']), float(m['angle2']), float(m['angle3']))
elif m['angle1'] is not None and m['angle2'] is not None and m['angle3'] is None:
# two angles
return (float(m['angle1']), None, float(m['angle2']))
elif m['angle1'] is not None or m['angle2'] is not None or m['angle3'] is not None:
raise ValueError('Invalid dihedral angle tuple: {}'.format(parenpart))
elif m['phi'] is not None and m['theta'] == 'None' and m['psi'] is not None:
return (float(m['phi']), None, float(m['psi']))
elif m['phi'] is not None and m['theta'] is not None and m['psi'] is not None:
return (float(m['phi']), float(m['theta']), float(m['psi']))
else:
raise ValueError('Invalid dihedral angle tuple: {}'.format(parenpart))
# now start working in earnest.
if isinstance(sstype, tuple):
# this must be a tuple of three floats, the middle one can be None
if len(sstype) != 3:
raise ValueError('Invalid tuple length: {}'.format(len(sstype)))
elif not (isinstance(sstype[0], float) and
(isinstance(sstype[1], float) or sstype[1] is None) and
(isinstance(sstype[2], float))):
raise ValueError(
'Incorrect torsion angle type(s): {}, {}, {}'.format(type(sstype[0]), type(sstype[1]), type(sstype[2])))
else:
return itertools.cycle([sstype]) # one-applies-for-all case
elif isinstance(sstype, list):
# see if every element of the list is a correct tuple or a string. If a string, find the corresponding entry
# in the secondary structure database.
sstype_parsed = []
for item in sstype:
if isinstance(item, str):
# this can fail with KeyError
item = SecondaryStructureDB.dihedrals(item)
elif isinstance(item, tuple):
# validate it by simply calling ourselves again
item = parse_sstype(item)[0]
else:
raise TypeError('Invalid type: {}, ({})'.format(item, type(item)))
sstype_parsed.append(item)
return sstype_parsed # list of tuples
elif isinstance(sstype, str):
# this is the most complicated part.
sstype = sstype.strip() # strip leading whitespaces
# some validation (not complete, i.e. mismatched/mixed up parentheses or brackets are not detected
if sstype.count('[') != sstype.count(']'): # 0==0 can also happen
raise ValueError('Unmatched brackets in secondary structure type {}'.format(sstype))
if sstype.count('(') != sstype.count(')'): # 0==0 can also happen
raise ValueError('Unmatched parentheses in secondary structure type {}'.format(sstype))
if sstype.startswith('[') and sstype.endswith(']'): # parse it as a list
sstype = sstype[1:-1].strip() # cut the brackets
# first find the parts in parentheses
parentheses = [(x.start(), x.end()) for x in paren_regex.finditer(sstype)]
items = []
def parse_outsideofparentheses_part(line: str):
# If "x" does not contain parentheses:
# 1. strip it
# 2. split it up at whitespaces
# 3. find the dihedrals in the SSDB
# print('Parsing outside parentheses: *{}*'.format(line))
return [SecondaryStructureDB.dihedrals(x) for x in line.strip().split()]
if not parentheses:
# parse the whole string as a list of entry names
# print('No parentheses found.')
items = parse_outsideofparentheses_part(sstype)
else:
# parse each pair of parentheses and the parts inbetween them.
for ip in range(len(parentheses)):
parenpart = sstype[parentheses[ip][0]:parentheses[ip][1]]
# print('This parenthesis pair: {} to {} = *{}*'.format(*parentheses[ip], parenpart))
if ip == 0:
# parse the part before the first pair of parentheses
items.extend(parse_outsideofparentheses_part(sstype[0:parentheses[0][0]]))
else:
# parse the part before this pair of parentheses and after the previous pair of parentheses
items.extend(parse_outsideofparentheses_part(sstype[parentheses[ip - 1][1]:parentheses[ip][0]]))
# now parse the parenthesized part
items.append(parse_parentuple(parenpart))
# parse the end of the string after the last pair of parentheses
items.extend(parse_outsideofparentheses_part(sstype[parentheses[-1][1]:]))
return items # list of tuples
elif '[' in sstype:
raise ValueError(
'Opening bracket must be the first character in the secondary structure type {}'.format(sstype))
elif sstype.startswith('(') and sstype.endswith(')'):
return itertools.cycle([parse_parentuple(sstype)]) # one-applies-for-all case
elif '(' in sstype:
raise ValueError(
'Opening parenthesis must be the first character in the secondary structure type {}'.format(sstype))
else:
# no [, no ], no ( and no ) in the string, it must be an entry in the secondary structure database
return itertools.cycle([SecondaryStructureDB.dihedrals(sstype)]) # one-applies-for-all case