Skip to content

Commit ce61a77

Browse files
committed
updates for system preparation, generalizing code for other uses
1 parent 5360e6c commit ce61a77

File tree

5 files changed

+303
-251
lines changed

5 files changed

+303
-251
lines changed

moleculekit/tools/preparation.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,13 @@ def _generate_nonstandard_residues_ff(
147147
if residue_smiles is not None and res in residue_smiles:
148148
smiles = residue_smiles[res]
149149

150-
tmol = _template_residue_from_smiles(molc, res, smiles=smiles)
151-
cres = _process_custom_residue(tmol, res)
150+
if smiles is not None and os.path.isfile(smiles):
151+
tmol = Molecule(smiles)
152+
else:
153+
tmol = _template_residue_from_smiles(molc, res, smiles=smiles)
154+
cres = _process_custom_residue(tmol)
155+
# Rename to correct resname
156+
cres.resname[:] = res
152157

153158
_mol_to_xml_def(cres, os.path.join(tmpdir, f"{res}.xml"))
154159
_mol_to_dat_def(cres, os.path.join(tmpdir, f"{res}.dat"))
@@ -542,6 +547,7 @@ def systemPrepare(
542547
_molkit_ff=True,
543548
outdir=None,
544549
residue_smiles=None,
550+
ignore_ns=False,
545551
):
546552
"""Prepare molecular systems through protonation and h-bond optimization.
547553
@@ -732,15 +738,16 @@ def systemPrepare(
732738
_fix_protonation_resnames(mol_in)
733739

734740
definition, forcefield = _get_custom_ff(molkit_ff=_molkit_ff)
735-
definition, forcefield = _generate_nonstandard_residues_ff(
736-
mol_in,
737-
definition,
738-
forcefield,
739-
_molkit_ff,
740-
outdir,
741-
ignore_ns_errors=ignore_ns_errors,
742-
residue_smiles=residue_smiles,
743-
)
741+
if not ignore_ns:
742+
definition, forcefield = _generate_nonstandard_residues_ff(
743+
mol_in,
744+
definition,
745+
forcefield,
746+
_molkit_ff,
747+
outdir,
748+
ignore_ns_errors=ignore_ns_errors,
749+
residue_smiles=residue_smiles,
750+
)
744751

745752
nonpept = []
746753
if hold_nonpeptidic_bonds:

moleculekit/tools/preparation_customres.py

Lines changed: 83 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -105,19 +105,50 @@ def _template_residue_from_smiles(inmol: Molecule, nsres: str, smiles=None):
105105
return mol
106106

107107

108-
def _get_idx(mol, name):
109-
res = np.where(mol.name == name)
108+
def _reorder_residue_atoms(mol, resid):
109+
# Reorder atoms. AMBER order is: N H CA HA [sidechain] C O
110+
# the H atom will get added later
111+
first_bbatoms = [_get_idx(mol, x, resid) for x in ["N", "CA", "HA"]]
112+
first_bbatoms = [x for x in first_bbatoms if x is not None]
113+
last_bbatoms = [_get_idx(mol, x, resid) for x in ["C", "O"]]
114+
last_bbatoms = [x for x in last_bbatoms if x is not None]
115+
other_idx = np.setdiff1d(
116+
np.where(mol.resid == resid)[0], first_bbatoms + last_bbatoms
117+
).tolist()
118+
prev_res = np.where(mol.resid == resid)[0][0]
119+
if prev_res > 0:
120+
prev_res = list(range(prev_res))
121+
else:
122+
prev_res = []
123+
next_res = np.where(mol.resid == resid)[0][-1] + 1
124+
if next_res < mol.numAtoms:
125+
next_res = list(range(next_res, mol.numAtoms))
126+
else:
127+
next_res = []
128+
mol.reorderAtoms(prev_res + first_bbatoms + other_idx + last_bbatoms + next_res)
129+
130+
131+
def _get_idx(mol, name, resid=None):
132+
sel = mol.name == name
133+
if resid is not None:
134+
sel &= mol.resid == resid
135+
res = np.where(sel)
110136
if len(res) == 0 or len(res[0]) == 0:
111137
return None
138+
assert len(res[0]) == 1
112139
return res[0][0]
113140

114141

115-
def _process_custom_residue(mol: Molecule, resname: str):
142+
def _process_custom_residue(mol: Molecule, resid: int = None, align: bool = True):
116143
import networkx as nx
117144

145+
if resid is None:
146+
resid = mol.resid[0]
147+
resname = mol.resname[mol.resid == resid][0]
148+
118149
gg = mol.toGraph()
119-
n_idx = _get_idx(mol, "N")
120-
c_idx = _get_idx(mol, "C")
150+
n_idx = _get_idx(mol, "N", resid)
151+
c_idx = _get_idx(mol, "C", resid)
121152
if n_idx is None or c_idx is None:
122153
raise RuntimeError(
123154
f"Residue {resname} does not contain N or C atoms. List of atoms: {mol.name}"
@@ -130,7 +161,7 @@ def _process_custom_residue(mol: Molecule, resname: str):
130161
)
131162

132163
# Fix hydrogen names for CA / N
133-
ca_idx = _get_idx(mol, "CA")
164+
ca_idx = _get_idx(mol, "CA", resid)
134165
ca_hs = [nn for nn in gg.neighbors(ca_idx) if gg.nodes[nn]["element"] == "H"]
135166
if len(ca_hs) > 1:
136167
raise RuntimeError("Found more than 1 hydrogen on CA atom!")
@@ -139,7 +170,7 @@ def _process_custom_residue(mol: Molecule, resname: str):
139170

140171
# Remove all N terminal hydrogens
141172
gg = mol.toGraph()
142-
n_idx = _get_idx(mol, "N")
173+
n_idx = _get_idx(mol, "N", resid)
143174
n_neighbours = list(gg.neighbors(n_idx))
144175
n_hs = [nn for nn in n_neighbours if gg.nodes[nn]["element"] == "H"]
145176
n_heavy = len(n_neighbours) - len(n_hs)
@@ -148,15 +179,15 @@ def _process_custom_residue(mol: Molecule, resname: str):
148179

149180
# Remove all hydrogens attached to terminal C
150181
gg = mol.toGraph()
151-
idx = _get_idx(mol, "C")
182+
idx = _get_idx(mol, "C", resid)
152183
neighbours = list(gg.neighbors(idx))
153184
hs = [nn for nn in neighbours if gg.nodes[nn]["element"] == "H"]
154185
if len(hs):
155186
mol.remove(f"index {' '.join(map(str, hs))}", _logger=False)
156187

157188
# Remove all hydrogens attached to C-terminal O
158189
gg = mol.toGraph()
159-
idx = _get_idx(mol, "O")
190+
idx = _get_idx(mol, "O", resid)
160191
neighbours = list(gg.neighbors(idx))
161192
hs = [nn for nn in neighbours if gg.nodes[nn]["element"] == "H"]
162193
if len(hs):
@@ -166,40 +197,43 @@ def _process_custom_residue(mol: Molecule, resname: str):
166197
hydr = mol.name == "X_H"
167198
mol.name[hydr] = [f"H{i}" for i in range(10, sum(hydr) + 10)]
168199

169-
# Reorder atoms. AMBER order is: N H CA HA [sidechain] C O
170-
bbatoms = [x for x in ["N", "H", "CA", "HA", "C", "O"] if x in mol.name]
171-
ordered_idx = [_get_idx(mol, nn) for nn in bbatoms]
172-
other_idx = np.setdiff1d(range(mol.numAtoms), ordered_idx)
173-
mol.reorderAtoms(ordered_idx[:4] + other_idx.tolist() + ordered_idx[4:])
200+
_reorder_residue_atoms(mol, resid)
174201

175-
# Align to reference BB for pdb2pqr
176-
mol.align("name N CA C", refmol=backbone)
202+
if align:
203+
# Align to reference BB for pdb2pqr
204+
mol.align("name N CA C", refmol=backbone)
177205

178206
if n_heavy == 1 and "N" in mol.name:
179207
# Add the H atom if N is only bonded to CA.
180208
# This is necessary to add it in the right position for pdb2pqr
181209
nmol = backbone.copy()
210+
if not align and resid is not None:
211+
nmol.align(
212+
"name N CA C", refsel=f"name N CA C and resid {resid}", refmol=mol
213+
)
182214
nmol.filter("name H", _logger=False)
183-
mol.insert(nmol, 1)
184-
mol.bonds = np.vstack((mol.bonds, [0, 1]))
185-
mol.bondtype = np.hstack((mol.bondtype, "1"))
215+
nmol.resname[:] = resname
216+
nmol.resid[:] = resid
217+
insert_idx = np.where(mol.resid == resid)[0][0] + 1
218+
mol.insert(nmol, insert_idx)
219+
mol.addBond(insert_idx - 1, insert_idx, "1")
186220

187-
# Rename to correct resname
188-
mol.resname[:] = resname
189221
return mol
190222

191223

192-
def _prepare_for_parameterize(mol):
224+
def _prepare_for_parameterize(mol, resid=None):
193225
# Add OXT HXT HN2 atoms to convert it to RCSB-like structures and pass it to parameterize
194226
import networkx as nx
195227

196228
mol = mol.copy()
197-
resname = mol.resname[0]
229+
if resid is None:
230+
resid = mol.resid[0]
231+
resname = mol.resname[mol.resid == resid][0]
198232

199233
gg = mol.toGraph()
200-
bb = nx.shortest_path(gg, _get_idx(mol, "N"), _get_idx(mol, "C"))
234+
bb = nx.shortest_path(gg, _get_idx(mol, "N", resid), _get_idx(mol, "C", resid))
201235

202-
n_idx = _get_idx(mol, "N")
236+
n_idx = _get_idx(mol, "N", resid)
203237
mol.formalcharge[n_idx] = 0
204238
n_neighbours = list(gg.neighbors(n_idx))
205239
if len(n_neighbours) == 2:
@@ -208,15 +242,22 @@ def _prepare_for_parameterize(mol):
208242
align_idx = [n_idx, bb[1], non_bb_idx[0]]
209243
nterm = alanine.copy()
210244
nterm.align(
211-
[_get_idx(nterm, n) for n in ("N", "CA", "H")], refmol=mol, refsel=align_idx
245+
[_get_idx(nterm, n) for n in ("N", "CA", "H")],
246+
refmol=mol,
247+
refsel=align_idx,
212248
)
213249
nterm.filter("name H2", _logger=False)
214250
nterm.name[0] = "HN2"
215-
mol.append(nterm)
216-
mol.bonds = np.vstack((mol.bonds, [n_idx, mol.numAtoms - 1]))
217-
mol.bondtype = np.hstack((mol.bondtype, "1"))
251+
nterm.resname[:] = resname
252+
nterm.resid[:] = resid
253+
insert_idx = np.where(mol.resid == resid)[0][0] + 1 # Second position
254+
mol.insert(nterm, insert_idx)
255+
mol.addBond(n_idx, insert_idx, "1")
218256

219-
c_idx = _get_idx(mol, "C")
257+
gg = mol.toGraph()
258+
bb = nx.shortest_path(gg, _get_idx(mol, "N", resid), _get_idx(mol, "C", resid))
259+
260+
c_idx = _get_idx(mol, "C", resid)
220261
mol.formalcharge[c_idx] = 0
221262
c_neighbours = list(gg.neighbors(c_idx))
222263
if len(c_neighbours) == 2:
@@ -225,21 +266,19 @@ def _prepare_for_parameterize(mol):
225266
align_idx = [bb[-2], c_idx, non_bb_idx[0]]
226267
cterm = alanine.copy()
227268
cterm.align(
228-
[_get_idx(cterm, n) for n in ("CA", "C", "O")], refmol=mol, refsel=align_idx
269+
[_get_idx(cterm, n) for n in ("CA", "C", "O")],
270+
refmol=mol,
271+
refsel=align_idx,
229272
)
230273
cterm.filter("name OXT HXT", _logger=False)
231-
mol.append(cterm)
232-
mol.bonds = np.vstack((mol.bonds, [c_idx, mol.numAtoms - 2]))
233-
mol.bondtype = np.hstack((mol.bondtype, "1"))
234-
235-
# Rename to correct resname
236-
mol.resname[:] = resname
274+
cterm.resname[:] = resname
275+
cterm.resid[:] = resid
276+
insert_idx = np.where(mol.resid == resid)[0][-1] + 1 # End
277+
mol.insert(cterm, insert_idx)
278+
mol.addBond(c_idx, insert_idx, "1")
237279

238280
# Reorder atoms. AMBER order is: N H CA HA [sidechain] C O
239-
bbatoms = [x for x in ["N", "H", "CA", "HA", "C", "O"] if x in mol.name]
240-
ordered_idx = [_get_idx(mol, nn) for nn in bbatoms]
241-
other_idx = np.setdiff1d(range(mol.numAtoms), ordered_idx)
242-
mol.reorderAtoms(ordered_idx[:4] + other_idx.tolist() + ordered_idx[4:])
281+
_reorder_residue_atoms(mol, resid)
243282

244283
return mol
245284

@@ -280,7 +319,9 @@ def _convert_amber_prepi_to_pdb2pqr_residue(prepi, outdir, name=None):
280319
)
281320
mol.element[:] = sdf.element[:]
282321

283-
pmol = _process_custom_residue(mol, name)
322+
pmol = _process_custom_residue(mol)
323+
# Rename to correct resname
324+
pmol.resname[:] = name
284325

285326
_mol_to_xml_def(pmol, os.path.join(outdir, f"{name}.xml"))
286327
_mol_to_dat_def(pmol, os.path.join(outdir, f"{name}.dat"))

tests/test_systemprepare.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def _compare_results(refpdb, refdf_f, pmol: Molecule, df):
7171
pmol.filter("not water", _logger=False)
7272
assert mol_equal(
7373
refmol, pmol, exceptFields=["serial"], fieldPrecision={"coords": 1e-3}
74-
)
74+
), f"Failed comparison of {refpdb} vs {pmol.fileloc}"
7575

7676

7777
@pytest.mark.parametrize("pdb", ["3PTB", "1A25", "1U5U", "1UNC", "6A5J"])
@@ -179,8 +179,8 @@ def _test_auto_freezing_and_force():
179179
("2QRV.pdb", "2QRV_prepared"),
180180
),
181181
)
182-
def _test_nonstandard_residues(files):
183-
from moleculekit.tools.preparation import autoSegment2
182+
def _test_nonstandard_residues(tmp_path, files):
183+
from moleculekit.tools.autosegment import autoSegment2
184184

185185
inf, outf = files
186186
test_home = os.path.join(
@@ -204,6 +204,8 @@ def _test_nonstandard_residues(files):
204204
hold_nonpeptidic_bonds=True,
205205
residue_smiles=res_smiles,
206206
)
207+
pmol.fileloc.append(os.path.join(tmp_path, "prepared.pdb"))
208+
pmol.write(pmol.fileloc[0])
207209

208210
_compare_results(
209211
os.path.join(test_home, f"{outf}.pdb"),
@@ -213,6 +215,8 @@ def _test_nonstandard_residues(files):
213215
)
214216

215217
pmol, df = systemPrepare(mol, return_details=True, hold_nonpeptidic_bonds=True)
218+
pmol.fileloc.append(os.path.join(tmp_path, "prepared.pdb"))
219+
pmol.write(pmol.fileloc[0])
216220

217221
_compare_results(
218222
os.path.join(test_home, f"{outf}.pdb"),
@@ -307,7 +311,7 @@ def _test_nucleiclike_ligand():
307311
test_home = os.path.join(curr_dir, "test_systemprepare", "3U5S")
308312
mol = Molecule(os.path.join(test_home, "3U5S.pdb"))
309313

310-
pmol, df = systemPrepare(mol, return_details=True, ignore_ns_errors=True)
314+
pmol, df = systemPrepare(mol, return_details=True, ignore_ns=True)
311315

312316
_compare_results(
313317
os.path.join(test_home, "3U5S_prepared.pdb"),

tests/test_systemprepare/test-nonstandard-residues/2QRV_prepared.csv

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1819,7 +1819,7 @@ PHE,PHE,905,,X,P23,,
18191819
ALA,ALA,906,,X,P23,,
18201820
CYS,CYS,907,,X,P23,9.44095194923338,0.0
18211821
VAL,VAL,908,,X,P23,,
1822-
SAH,SAH,1,,Z,P25,,
1823-
SAH,SAH,4,,b,P27,,
1824-
SAH,SAH,5,,d,P29,,
1825-
SAH,SAH,8,,f,P31,,
1822+
SAH,SAH,1,,Y,P24,,
1823+
SAH,SAH,4,,Z,P25,,
1824+
SAH,SAH,5,,a,P26,,
1825+
SAH,SAH,8,,b,P27,,

0 commit comments

Comments
 (0)