@@ -105,19 +105,50 @@ def _template_residue_from_smiles(inmol: Molecule, nsres: str, smiles=None):
105
105
return mol
106
106
107
107
108
- def _get_idx (mol , name ):
109
- res = np .where (mol .name == name )
108
+ def _reorder_residue_atoms (mol , resid ):
109
+ # Reorder atoms. AMBER order is: N H CA HA [sidechain] C O
110
+ # the H atom will get added later
111
+ first_bbatoms = [_get_idx (mol , x , resid ) for x in ["N" , "CA" , "HA" ]]
112
+ first_bbatoms = [x for x in first_bbatoms if x is not None ]
113
+ last_bbatoms = [_get_idx (mol , x , resid ) for x in ["C" , "O" ]]
114
+ last_bbatoms = [x for x in last_bbatoms if x is not None ]
115
+ other_idx = np .setdiff1d (
116
+ np .where (mol .resid == resid )[0 ], first_bbatoms + last_bbatoms
117
+ ).tolist ()
118
+ prev_res = np .where (mol .resid == resid )[0 ][0 ]
119
+ if prev_res > 0 :
120
+ prev_res = list (range (prev_res ))
121
+ else :
122
+ prev_res = []
123
+ next_res = np .where (mol .resid == resid )[0 ][- 1 ] + 1
124
+ if next_res < mol .numAtoms :
125
+ next_res = list (range (next_res , mol .numAtoms ))
126
+ else :
127
+ next_res = []
128
+ mol .reorderAtoms (prev_res + first_bbatoms + other_idx + last_bbatoms + next_res )
129
+
130
+
131
+ def _get_idx (mol , name , resid = None ):
132
+ sel = mol .name == name
133
+ if resid is not None :
134
+ sel &= mol .resid == resid
135
+ res = np .where (sel )
110
136
if len (res ) == 0 or len (res [0 ]) == 0 :
111
137
return None
138
+ assert len (res [0 ]) == 1
112
139
return res [0 ][0 ]
113
140
114
141
115
- def _process_custom_residue (mol : Molecule , resname : str ):
142
+ def _process_custom_residue (mol : Molecule , resid : int = None , align : bool = True ):
116
143
import networkx as nx
117
144
145
+ if resid is None :
146
+ resid = mol .resid [0 ]
147
+ resname = mol .resname [mol .resid == resid ][0 ]
148
+
118
149
gg = mol .toGraph ()
119
- n_idx = _get_idx (mol , "N" )
120
- c_idx = _get_idx (mol , "C" )
150
+ n_idx = _get_idx (mol , "N" , resid )
151
+ c_idx = _get_idx (mol , "C" , resid )
121
152
if n_idx is None or c_idx is None :
122
153
raise RuntimeError (
123
154
f"Residue { resname } does not contain N or C atoms. List of atoms: { mol .name } "
@@ -130,7 +161,7 @@ def _process_custom_residue(mol: Molecule, resname: str):
130
161
)
131
162
132
163
# Fix hydrogen names for CA / N
133
- ca_idx = _get_idx (mol , "CA" )
164
+ ca_idx = _get_idx (mol , "CA" , resid )
134
165
ca_hs = [nn for nn in gg .neighbors (ca_idx ) if gg .nodes [nn ]["element" ] == "H" ]
135
166
if len (ca_hs ) > 1 :
136
167
raise RuntimeError ("Found more than 1 hydrogen on CA atom!" )
@@ -139,7 +170,7 @@ def _process_custom_residue(mol: Molecule, resname: str):
139
170
140
171
# Remove all N terminal hydrogens
141
172
gg = mol .toGraph ()
142
- n_idx = _get_idx (mol , "N" )
173
+ n_idx = _get_idx (mol , "N" , resid )
143
174
n_neighbours = list (gg .neighbors (n_idx ))
144
175
n_hs = [nn for nn in n_neighbours if gg .nodes [nn ]["element" ] == "H" ]
145
176
n_heavy = len (n_neighbours ) - len (n_hs )
@@ -148,15 +179,15 @@ def _process_custom_residue(mol: Molecule, resname: str):
148
179
149
180
# Remove all hydrogens attached to terminal C
150
181
gg = mol .toGraph ()
151
- idx = _get_idx (mol , "C" )
182
+ idx = _get_idx (mol , "C" , resid )
152
183
neighbours = list (gg .neighbors (idx ))
153
184
hs = [nn for nn in neighbours if gg .nodes [nn ]["element" ] == "H" ]
154
185
if len (hs ):
155
186
mol .remove (f"index { ' ' .join (map (str , hs ))} " , _logger = False )
156
187
157
188
# Remove all hydrogens attached to C-terminal O
158
189
gg = mol .toGraph ()
159
- idx = _get_idx (mol , "O" )
190
+ idx = _get_idx (mol , "O" , resid )
160
191
neighbours = list (gg .neighbors (idx ))
161
192
hs = [nn for nn in neighbours if gg .nodes [nn ]["element" ] == "H" ]
162
193
if len (hs ):
@@ -166,40 +197,43 @@ def _process_custom_residue(mol: Molecule, resname: str):
166
197
hydr = mol .name == "X_H"
167
198
mol .name [hydr ] = [f"H{ i } " for i in range (10 , sum (hydr ) + 10 )]
168
199
169
- # Reorder atoms. AMBER order is: N H CA HA [sidechain] C O
170
- bbatoms = [x for x in ["N" , "H" , "CA" , "HA" , "C" , "O" ] if x in mol .name ]
171
- ordered_idx = [_get_idx (mol , nn ) for nn in bbatoms ]
172
- other_idx = np .setdiff1d (range (mol .numAtoms ), ordered_idx )
173
- mol .reorderAtoms (ordered_idx [:4 ] + other_idx .tolist () + ordered_idx [4 :])
200
+ _reorder_residue_atoms (mol , resid )
174
201
175
- # Align to reference BB for pdb2pqr
176
- mol .align ("name N CA C" , refmol = backbone )
202
+ if align :
203
+ # Align to reference BB for pdb2pqr
204
+ mol .align ("name N CA C" , refmol = backbone )
177
205
178
206
if n_heavy == 1 and "N" in mol .name :
179
207
# Add the H atom if N is only bonded to CA.
180
208
# This is necessary to add it in the right position for pdb2pqr
181
209
nmol = backbone .copy ()
210
+ if not align and resid is not None :
211
+ nmol .align (
212
+ "name N CA C" , refsel = f"name N CA C and resid { resid } " , refmol = mol
213
+ )
182
214
nmol .filter ("name H" , _logger = False )
183
- mol .insert (nmol , 1 )
184
- mol .bonds = np .vstack ((mol .bonds , [0 , 1 ]))
185
- mol .bondtype = np .hstack ((mol .bondtype , "1" ))
215
+ nmol .resname [:] = resname
216
+ nmol .resid [:] = resid
217
+ insert_idx = np .where (mol .resid == resid )[0 ][0 ] + 1
218
+ mol .insert (nmol , insert_idx )
219
+ mol .addBond (insert_idx - 1 , insert_idx , "1" )
186
220
187
- # Rename to correct resname
188
- mol .resname [:] = resname
189
221
return mol
190
222
191
223
192
- def _prepare_for_parameterize (mol ):
224
+ def _prepare_for_parameterize (mol , resid = None ):
193
225
# Add OXT HXT HN2 atoms to convert it to RCSB-like structures and pass it to parameterize
194
226
import networkx as nx
195
227
196
228
mol = mol .copy ()
197
- resname = mol .resname [0 ]
229
+ if resid is None :
230
+ resid = mol .resid [0 ]
231
+ resname = mol .resname [mol .resid == resid ][0 ]
198
232
199
233
gg = mol .toGraph ()
200
- bb = nx .shortest_path (gg , _get_idx (mol , "N" ), _get_idx (mol , "C" ))
234
+ bb = nx .shortest_path (gg , _get_idx (mol , "N" , resid ), _get_idx (mol , "C" , resid ))
201
235
202
- n_idx = _get_idx (mol , "N" )
236
+ n_idx = _get_idx (mol , "N" , resid )
203
237
mol .formalcharge [n_idx ] = 0
204
238
n_neighbours = list (gg .neighbors (n_idx ))
205
239
if len (n_neighbours ) == 2 :
@@ -208,15 +242,22 @@ def _prepare_for_parameterize(mol):
208
242
align_idx = [n_idx , bb [1 ], non_bb_idx [0 ]]
209
243
nterm = alanine .copy ()
210
244
nterm .align (
211
- [_get_idx (nterm , n ) for n in ("N" , "CA" , "H" )], refmol = mol , refsel = align_idx
245
+ [_get_idx (nterm , n ) for n in ("N" , "CA" , "H" )],
246
+ refmol = mol ,
247
+ refsel = align_idx ,
212
248
)
213
249
nterm .filter ("name H2" , _logger = False )
214
250
nterm .name [0 ] = "HN2"
215
- mol .append (nterm )
216
- mol .bonds = np .vstack ((mol .bonds , [n_idx , mol .numAtoms - 1 ]))
217
- mol .bondtype = np .hstack ((mol .bondtype , "1" ))
251
+ nterm .resname [:] = resname
252
+ nterm .resid [:] = resid
253
+ insert_idx = np .where (mol .resid == resid )[0 ][0 ] + 1 # Second position
254
+ mol .insert (nterm , insert_idx )
255
+ mol .addBond (n_idx , insert_idx , "1" )
218
256
219
- c_idx = _get_idx (mol , "C" )
257
+ gg = mol .toGraph ()
258
+ bb = nx .shortest_path (gg , _get_idx (mol , "N" , resid ), _get_idx (mol , "C" , resid ))
259
+
260
+ c_idx = _get_idx (mol , "C" , resid )
220
261
mol .formalcharge [c_idx ] = 0
221
262
c_neighbours = list (gg .neighbors (c_idx ))
222
263
if len (c_neighbours ) == 2 :
@@ -225,21 +266,19 @@ def _prepare_for_parameterize(mol):
225
266
align_idx = [bb [- 2 ], c_idx , non_bb_idx [0 ]]
226
267
cterm = alanine .copy ()
227
268
cterm .align (
228
- [_get_idx (cterm , n ) for n in ("CA" , "C" , "O" )], refmol = mol , refsel = align_idx
269
+ [_get_idx (cterm , n ) for n in ("CA" , "C" , "O" )],
270
+ refmol = mol ,
271
+ refsel = align_idx ,
229
272
)
230
273
cterm .filter ("name OXT HXT" , _logger = False )
231
- mol .append (cterm )
232
- mol .bonds = np .vstack ((mol .bonds , [c_idx , mol .numAtoms - 2 ]))
233
- mol .bondtype = np .hstack ((mol .bondtype , "1" ))
234
-
235
- # Rename to correct resname
236
- mol .resname [:] = resname
274
+ cterm .resname [:] = resname
275
+ cterm .resid [:] = resid
276
+ insert_idx = np .where (mol .resid == resid )[0 ][- 1 ] + 1 # End
277
+ mol .insert (cterm , insert_idx )
278
+ mol .addBond (c_idx , insert_idx , "1" )
237
279
238
280
# Reorder atoms. AMBER order is: N H CA HA [sidechain] C O
239
- bbatoms = [x for x in ["N" , "H" , "CA" , "HA" , "C" , "O" ] if x in mol .name ]
240
- ordered_idx = [_get_idx (mol , nn ) for nn in bbatoms ]
241
- other_idx = np .setdiff1d (range (mol .numAtoms ), ordered_idx )
242
- mol .reorderAtoms (ordered_idx [:4 ] + other_idx .tolist () + ordered_idx [4 :])
281
+ _reorder_residue_atoms (mol , resid )
243
282
244
283
return mol
245
284
@@ -280,7 +319,9 @@ def _convert_amber_prepi_to_pdb2pqr_residue(prepi, outdir, name=None):
280
319
)
281
320
mol .element [:] = sdf .element [:]
282
321
283
- pmol = _process_custom_residue (mol , name )
322
+ pmol = _process_custom_residue (mol )
323
+ # Rename to correct resname
324
+ pmol .resname [:] = name
284
325
285
326
_mol_to_xml_def (pmol , os .path .join (outdir , f"{ name } .xml" ))
286
327
_mol_to_dat_def (pmol , os .path .join (outdir , f"{ name } .dat" ))
0 commit comments