Skip to content

Commit 0fc603d

Browse files
committed
remove aceprep from systemprepare and use Molecule.templateResidueFromSmiles method instead
1 parent b780abb commit 0fc603d

File tree

9 files changed

+1609
-1236
lines changed

9 files changed

+1609
-1236
lines changed

moleculekit/tools/preparation.py

Lines changed: 52 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -84,19 +84,12 @@ def _check_chain_and_segid(mol, verbose):
8484

8585

8686
def _generate_nonstandard_residues_ff(
87-
mol,
88-
definition,
89-
forcefield,
90-
_molkit_ff=True,
91-
outdir=None,
92-
ignore_ns_errors=False,
93-
residue_smiles=None,
87+
mol, definition, forcefield, _molkit_ff=True, outdir=None, residue_smiles=None
9488
):
9589
import tempfile
9690
from moleculekit.tools.preparation_customres import _get_custom_ff
9791
from moleculekit.tools.preparation_customres import (
9892
_process_custom_residue,
99-
_template_residue_from_smiles,
10093
_mol_to_dat_def,
10194
_mol_to_xml_def,
10295
_prepare_for_parameterize,
@@ -111,64 +104,61 @@ def _generate_nonstandard_residues_ff(
111104
if len(not_in_ff) == 0:
112105
return definition, forcefield
113106

114-
try:
115-
from aceprep.prepare import rdk_prepare
116-
except ImportError:
117-
if ignore_ns_errors:
118-
return definition, forcefield
107+
residue_smiles = residue_smiles or {}
108+
missing = np.setdiff1d(not_in_ff, list(residue_smiles.keys()))
109+
if len(missing):
119110
raise RuntimeError(
120-
"To protonate non-canonical aminoacids you need the aceprep library. Please contact Acellera info@acellera.com for more information or set ignore_ns_errors=True to ignore non-canonical residues in the protonation (this will leave the residues unprotonated)."
111+
f"Missing topology for residues {missing}. "
112+
"Please provide their SMILES in the residue_smiles dictionary or remove them from the input structure."
121113
)
122114

123115
with tempfile.TemporaryDirectory() as tmpdir:
124116
for res in not_in_ff:
125-
try:
126-
logger.info(f"Attempting to template non-canonical residue {res}...")
127-
# This removes the non-canonical hydrogens from the original mol object
128-
mol.remove((mol.resname == res) & (mol.element == "H"), _logger=False)
129-
molc = mol.copy()
130-
131-
# Hacky way of getting the first molecule, if there are copies
132-
molresn = molc.resname == res
133-
firstname = molc.name[molresn][0]
134-
lastname = molc.name[molresn][-1]
135-
start = np.where(molresn & (molc.name == firstname))[0][0]
136-
end = np.where(molresn & (molc.name == lastname))[0][0]
137-
# Remove all other stuff
138-
molc.filter(f"index {start} to {end}", _logger=False)
139-
molc.guessBonds()
140-
141-
if len(np.unique(molc.name)) != molc.numAtoms:
142-
raise RuntimeError(
143-
f"Residue {res} contains duplicate atom names. Please rename the atoms to have unique names."
144-
)
145-
146-
smiles = None
147-
if residue_smiles is not None and res in residue_smiles:
148-
smiles = residue_smiles[res]
149-
150-
if smiles is not None and os.path.isfile(smiles):
151-
tmol = Molecule(smiles)
152-
else:
153-
tmol = _template_residue_from_smiles(molc, res, smiles=smiles)
154-
cres = _process_custom_residue(tmol)
155-
# Rename to correct resname
156-
cres.resname[:] = res
157-
158-
_mol_to_xml_def(cres, os.path.join(tmpdir, f"{res}.xml"))
159-
_mol_to_dat_def(cres, os.path.join(tmpdir, f"{res}.dat"))
160-
if outdir is not None:
161-
os.makedirs(outdir, exist_ok=True)
162-
pres = _prepare_for_parameterize(cres)
163-
pres.write(os.path.join(outdir, f"{res}.cif"))
164-
logger.info(f"Succesfully templated non-canonical residue {res}.")
165-
except Exception as e:
166-
import traceback
117+
logger.info(f"Attempting to template non-canonical residue {res}...")
118+
# This removes the non-canonical hydrogens from the original mol object
119+
mol.remove((mol.resname == res) & (mol.element == "H"), _logger=False)
120+
molc = mol.copy()
121+
122+
# Hacky way of getting the first molecule, if there are copies
123+
molresn = molc.resname == res
124+
firstname = molc.name[molresn][0]
125+
lastname = molc.name[molresn][-1]
126+
start = np.where(molresn & (molc.name == firstname))[0][0]
127+
end = np.where(molresn & (molc.name == lastname))[0][0]
128+
# Remove all other stuff
129+
molc.filter(f"index {start} to {end}", _logger=False)
130+
molc.guessBonds()
131+
132+
if len(np.unique(molc.name)) != molc.numAtoms:
133+
raise RuntimeError(
134+
f"Residue {res} contains duplicate atom names. Please rename the atoms to have unique names."
135+
)
167136

168-
traceback.print_exc()
137+
smiles = None
138+
if residue_smiles is not None and res in residue_smiles:
139+
smiles = residue_smiles[res]
140+
if smiles is None:
169141
raise RuntimeError(
170-
f"Failed to protonate non-canonical residue {res}. Please remove it from the protein or mutate it to continue preparation. Detailed error message: {e}"
142+
f"Residue {res} is not in the residue_smiles dictionary. Please add it to the dictionary or remove it from the protein."
171143
)
144+
145+
if os.path.isfile(smiles):
146+
molc = Molecule(smiles)
147+
else:
148+
molc.templateResidueFromSmiles("all", smiles, addHs=True)
149+
150+
cres = _process_custom_residue(molc)
151+
# Rename to correct resname
152+
cres.resname[:] = res
153+
154+
_mol_to_xml_def(cres, os.path.join(tmpdir, f"{res}.xml"))
155+
_mol_to_dat_def(cres, os.path.join(tmpdir, f"{res}.dat"))
156+
if outdir is not None:
157+
os.makedirs(outdir, exist_ok=True)
158+
pres = _prepare_for_parameterize(cres)
159+
pres.write(os.path.join(outdir, f"{res}.cif"))
160+
logger.info(f"Succesfully templated non-canonical residue {res}.")
161+
172162
definition, forcefield = _get_custom_ff(user_ff=tmpdir, molkit_ff=_molkit_ff)
173163
return definition, forcefield
174164

@@ -542,7 +532,6 @@ def systemPrepare(
542532
return_details=False,
543533
hydrophobic_thickness=None,
544534
plot_pka=None,
545-
ignore_ns_errors=False,
546535
_logger_level="ERROR",
547536
_molkit_ff=True,
548537
outdir=None,
@@ -640,14 +629,14 @@ def systemPrepare(
640629
ignoring the covalent bond, meaning it may break the bonds or add hydrogen atoms between the bonds.
641630
plot_pka : str
642631
Provide a file path with .png extension to draw the titration diagram for the system residues.
643-
ignore_ns_errors : bool
644-
If False systemPrepare will issue an error when it fails to protonate non-canonical residues in the protein.
645-
If True it will ignore errors on non-canonical residues leaving them unprotonated.
646632
outdir : str
647633
A path where to save custom residue cif files used for building
648634
residue_smiles : dict
649635
A dictionary with keys being residue names and values being the SMILES string of the residue. This is used to
650-
create protonated versions of non-canonical residues with the help of the aceprep library.
636+
create protonated versions of non-canonical residues.
637+
ignore_ns : bool
638+
If False systemPrepare will issue an error when it fails to protonate non-canonical residues in the protein.
639+
If True it will leave non-canonical residues unprotonated.
651640
652641
Returns
653642
-------
@@ -745,7 +734,6 @@ def systemPrepare(
745734
forcefield,
746735
_molkit_ff,
747736
outdir,
748-
ignore_ns_errors=ignore_ns_errors,
749737
residue_smiles=residue_smiles,
750738
)
751739

moleculekit/tools/preparation_customres.py

Lines changed: 0 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -18,93 +18,6 @@
1818
alanine = Molecule(os.path.join(__share_dir, "ALA.cif"), zerowarning=False)
1919

2020

21-
# def _template_residue_from_mol(molc: Molecule, template: Molecule, res: str):
22-
# from moleculekit.tools.graphalignment import mcsAtomMatching
23-
24-
# if np.any(np.isin(template.bondtype, ("", "un"))):
25-
# raise RuntimeError(f"Residue template {res} must contain correct bond orders.")
26-
# if len(np.unique(molc.name)) != molc.numAtoms:
27-
# raise RuntimeError(
28-
# f"Residue {res} contains duplicate atom names. Please rename the atoms to have unique names."
29-
# )
30-
31-
# template.atomtype = template.element # Replace atomtypes for writing mol2
32-
# atm1, atm2 = mcsAtomMatching(molc, template, bondCompare="any", _logger=False)
33-
# heavy = molc.element != "H"
34-
# if len(atm2) != len(heavy):
35-
# raise RuntimeError(
36-
# f"Residue template {res} matched only {len(atm2)} out of {len(heavy)} heavy atoms in the input molecule"
37-
# )
38-
# for a1, a2 in zip(atm1, atm2): # Rename atoms in reference molecule
39-
# template.name[a2] = molc.name[a1]
40-
41-
# # TODO: Not sure this is a good idea in general
42-
# template.remove("name OXT HXT HN2", _logger=False)
43-
# return template
44-
45-
46-
def _template_residue_from_smiles(inmol: Molecule, nsres: str, smiles=None):
47-
from rdkit import Chem
48-
from aceprep.detector import template_ligand
49-
from aceprep.prepare import rdk_prepare
50-
from moleculekit.tools.graphalignment import makeMolGraph, compareGraphs
51-
import tempfile
52-
import logging
53-
54-
acepreplog = logging.getLogger("aceprep")
55-
oldlevel = acepreplog.getEffectiveLevel()
56-
acepreplog.setLevel("CRITICAL")
57-
58-
try:
59-
assert np.all(np.isin(["N", "CA", "C", "O"], inmol.name))
60-
61-
with tempfile.TemporaryDirectory() as outdir:
62-
resfile = os.path.join(outdir, f"residue_{nsres}.pdb")
63-
# Guess the bonds so that the rdkit templating will work
64-
inmol.guessBonds()
65-
inmol.write(resfile)
66-
67-
outsdf = os.path.join(outdir, f"residue_{nsres}_templated.sdf")
68-
new_mol = template_ligand(resfile, nsres, smiles=smiles)
69-
w = Chem.SDWriter(outsdf)
70-
w.write(new_mol)
71-
w.close()
72-
73-
outsdfh = outsdf.replace(".sdf", "_h.sdf")
74-
rdk_prepare(
75-
outsdf,
76-
outsdfh,
77-
os.path.join(outdir, "aceprep.log"),
78-
gen3d=False,
79-
canonicalize_tautomers=False,
80-
)
81-
82-
mol = Molecule(outsdfh)
83-
except Exception:
84-
acepreplog.setLevel(oldlevel)
85-
raise
86-
87-
acepreplog.setLevel(oldlevel)
88-
89-
fields = ("element",)
90-
g1 = makeMolGraph(mol, "all", fields)
91-
g2 = makeMolGraph(inmol, "all", fields)
92-
_, _, matching = compareGraphs(
93-
g1, g2, fields=fields, tolerance=0.5, returnmatching=True
94-
)
95-
for pp in matching: # Rename atoms in reference molecule
96-
mol.name[pp[0]] = inmol.name[pp[1]]
97-
98-
# Rename non-matched hydrogens to X_H to rename later
99-
matched = [pp[0] for pp in matching]
100-
for i in np.where(mol.element == "H")[0]:
101-
if i in matched:
102-
continue
103-
mol.name[i] = "X_H"
104-
105-
return mol
106-
107-
10821
def _reorder_residue_atoms(mol, resid):
10922
# Reorder atoms. AMBER order is: N H CA HA [sidechain] C O
11023
# the H atom will get added later

tests/test_systemprepare.py

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,6 @@
66

77
curr_dir = os.path.dirname(os.path.abspath(__file__))
88

9-
# The below is used for testing only
10-
try:
11-
from aceprep.prepare import rdk_prepare
12-
except ImportError:
13-
ACEPREP_EXISTS = False
14-
else:
15-
ACEPREP_EXISTS = True
16-
179

1810
def _compare_results(refpdb, refdf_f, pmol: Molecule, df):
1911
from moleculekit.util import tempname
@@ -170,7 +162,6 @@ def _test_auto_freezing_and_force():
170162
)
171163

172164

173-
@pytest.mark.skipif(not ACEPREP_EXISTS, reason="Can only run with aceprep installed")
174165
@pytest.mark.parametrize(
175166
"files",
176167
(
@@ -189,14 +180,15 @@ def _test_nonstandard_residues(tmp_path, files):
189180

190181
res_smiles = {
191182
"200": "c1cc(ccc1C[C@@H](C(=O)O)N)Cl",
192-
"HRG": "C(CCNC(=N)N)C[C@@H](C(=O)O)N",
193-
"OIC": "C1CC[C@H]2[C@@H](C1)C[C@H](N2)C(=O)O",
194-
"TYS": "c1cc(ccc1C[C@@H](C(=O)O)N)OS(=O)(=O)O",
183+
"HRG": "C(CCNC(=N)N)C[C@@H](CO)N",
184+
"OIC": "C1CC[C@H]2[C@@H](C1)C[C@H](N2)CO",
185+
"TYS": "c1cc(ccc1C[C@@H](CO)N)OS(=O)(=O)O",
195186
"SAH": "c1nc(c2c(n1)n(cn2)[C@H]3[C@@H]([C@@H]([C@H](O3)CSCC[C@@H](C(=O)O)N)O)O)N",
196187
}
197188
mol = Molecule(os.path.join(test_home, inf))
198189
if inf == "2QRV.pdb":
199190
mol = autoSegment2(mol, fields=("chain", "segid"))
191+
mol.set("chain", "W", sel="water")
200192

201193
pmol, df = systemPrepare(
202194
mol,
@@ -214,19 +206,7 @@ def _test_nonstandard_residues(tmp_path, files):
214206
df,
215207
)
216208

217-
pmol, df = systemPrepare(mol, return_details=True, hold_nonpeptidic_bonds=True)
218-
pmol.fileloc.append(os.path.join(tmp_path, "prepared.pdb"))
219-
pmol.write(pmol.fileloc[0])
220-
221-
_compare_results(
222-
os.path.join(test_home, f"{outf}.pdb"),
223-
os.path.join(test_home, f"{outf}.csv"),
224-
pmol,
225-
df,
226-
)
227-
228209

229-
@pytest.mark.skipif(ACEPREP_EXISTS, reason="Can only run WITHOUT aceprep installed")
230210
def _test_nonstandard_residue_hard_ignore_ns():
231211
test_home = os.path.join(
232212
curr_dir, "test_systemprepare", "test-nonstandard-residues"
@@ -238,7 +218,7 @@ def _test_nonstandard_residue_hard_ignore_ns():
238218
return_details=True,
239219
hold_nonpeptidic_bonds=True,
240220
_molkit_ff=False,
241-
ignore_ns_errors=True,
221+
ignore_ns=True,
242222
)
243223
_compare_results(
244224
os.path.join(test_home, "5VBL_prepared_ignore_ns.pdb"),
@@ -290,12 +270,17 @@ def _test_cyclic_peptides():
290270
)
291271

292272

293-
@pytest.mark.skipif(not ACEPREP_EXISTS, reason="Can only run with aceprep installed")
294273
def _test_cyclic_peptides_noncanonical():
295274
test_home = os.path.join(curr_dir, "test_systemprepare", "test-cyclic-peptides")
296275
mol = Molecule(os.path.join(test_home, "4TOT_E.pdb"))
297276

298-
pmol, df = systemPrepare(mol, return_details=True)
277+
smiles = {
278+
"33X": "CC(CO)NC",
279+
"34E": "CN[C@@H]([C@H](C)CN1CCN(CCOC)CC1)C(O)",
280+
"BMT": "C/C=C/C[C@@H](C)[C@H]([C@@H](CO)NC)O",
281+
"DAL": "C[C@H](CO)N",
282+
}
283+
pmol, df = systemPrepare(mol, return_details=True, residue_smiles=smiles)
299284

300285
_compare_results(
301286
os.path.join(test_home, "4TOT_E_prepared.pdb"),

0 commit comments

Comments
 (0)