Acellera
diff --git a/‎moleculekit/tools/preparation.py
Lines changed: 52 additions & 64 deletions b/‎moleculekit/tools/preparation.py
Lines changed: 52 additions & 64 deletions
diff --git a/‎moleculekit/tools/preparation_customres.py
Lines changed: 0 additions & 87 deletions b/‎moleculekit/tools/preparation_customres.py
Lines changed: 0 additions & 87 deletions
diff --git a/‎tests/test_systemprepare.py
Lines changed: 12 additions & 27 deletions b/‎tests/test_systemprepare.py
Lines changed: 12 additions & 27 deletions
@@ -84,19 +84,12 @@ def _check_chain_and_segid(mol, verbose):
 
 
 def _generate_nonstandard_residues_ff(
-    mol,
-    definition,
-    forcefield,
-    _molkit_ff=True,
-    outdir=None,
-    ignore_ns_errors=False,
-    residue_smiles=None,
+    mol, definition, forcefield, _molkit_ff=True, outdir=None, residue_smiles=None
 ):
     import tempfile
     from moleculekit.tools.preparation_customres import _get_custom_ff
     from moleculekit.tools.preparation_customres import (
         _process_custom_residue,
-        _template_residue_from_smiles,
         _mol_to_dat_def,
         _mol_to_xml_def,
         _prepare_for_parameterize,
@@ -111,64 +104,61 @@ def _generate_nonstandard_residues_ff(
     if len(not_in_ff) == 0:
         return definition, forcefield
 
-    try:
-        from aceprep.prepare import rdk_prepare
-    except ImportError:
-        if ignore_ns_errors:
-            return definition, forcefield
+    residue_smiles = residue_smiles or {}
+    missing = np.setdiff1d(not_in_ff, list(residue_smiles.keys()))
+    if len(missing):
         raise RuntimeError(
-            "To protonate non-canonical aminoacids you need the aceprep library. Please contact Acellera info@acellera.com for more information or set ignore_ns_errors=True to ignore non-canonical residues in the protonation (this will leave the residues unprotonated)."
+            f"Missing topology for residues {missing}. "
+            "Please provide their SMILES in the residue_smiles dictionary or remove them from the input structure."
         )
 
     with tempfile.TemporaryDirectory() as tmpdir:
         for res in not_in_ff:
-            try:
-                logger.info(f"Attempting to template non-canonical residue {res}...")
-                # This removes the non-canonical hydrogens from the original mol object
-                mol.remove((mol.resname == res) & (mol.element == "H"), _logger=False)
-                molc = mol.copy()
-
-                # Hacky way of getting the first molecule, if there are copies
-                molresn = molc.resname == res
-                firstname = molc.name[molresn][0]
-                lastname = molc.name[molresn][-1]
-                start = np.where(molresn & (molc.name == firstname))[0][0]
-                end = np.where(molresn & (molc.name == lastname))[0][0]
-                # Remove all other stuff
-                molc.filter(f"index {start} to {end}", _logger=False)
-                molc.guessBonds()
-
-                if len(np.unique(molc.name)) != molc.numAtoms:
-                    raise RuntimeError(
-                        f"Residue {res} contains duplicate atom names. Please rename the atoms to have unique names."
-                    )
-
-                smiles = None
-                if residue_smiles is not None and res in residue_smiles:
-                    smiles = residue_smiles[res]
-
-                if smiles is not None and os.path.isfile(smiles):
-                    tmol = Molecule(smiles)
-                else:
-                    tmol = _template_residue_from_smiles(molc, res, smiles=smiles)
-                cres = _process_custom_residue(tmol)
-                # Rename to correct resname
-                cres.resname[:] = res
-
-                _mol_to_xml_def(cres, os.path.join(tmpdir, f"{res}.xml"))
-                _mol_to_dat_def(cres, os.path.join(tmpdir, f"{res}.dat"))
-                if outdir is not None:
-                    os.makedirs(outdir, exist_ok=True)
-                    pres = _prepare_for_parameterize(cres)
-                    pres.write(os.path.join(outdir, f"{res}.cif"))
-                logger.info(f"Succesfully templated non-canonical residue {res}.")
-            except Exception as e:
-                import traceback
+            logger.info(f"Attempting to template non-canonical residue {res}...")
+            # This removes the non-canonical hydrogens from the original mol object
+            mol.remove((mol.resname == res) & (mol.element == "H"), _logger=False)
+            molc = mol.copy()
+
+            # Hacky way of getting the first molecule, if there are copies
+            molresn = molc.resname == res
+            firstname = molc.name[molresn][0]
+            lastname = molc.name[molresn][-1]
+            start = np.where(molresn & (molc.name == firstname))[0][0]
+            end = np.where(molresn & (molc.name == lastname))[0][0]
+            # Remove all other stuff
+            molc.filter(f"index {start} to {end}", _logger=False)
+            molc.guessBonds()
+
+            if len(np.unique(molc.name)) != molc.numAtoms:
+                raise RuntimeError(
+                    f"Residue {res} contains duplicate atom names. Please rename the atoms to have unique names."
+                )
 
-                traceback.print_exc()
+            smiles = None
+            if residue_smiles is not None and res in residue_smiles:
+                smiles = residue_smiles[res]
+            if smiles is None:
                 raise RuntimeError(
-                    f"Failed to protonate non-canonical residue {res}. Please remove it from the protein or mutate it to continue preparation. Detailed error message: {e}"
+                    f"Residue {res} is not in the residue_smiles dictionary. Please add it to the dictionary or remove it from the protein."
                 )
+
+            if os.path.isfile(smiles):
+                molc = Molecule(smiles)
+            else:
+                molc.templateResidueFromSmiles("all", smiles, addHs=True)
+
+            cres = _process_custom_residue(molc)
+            # Rename to correct resname
+            cres.resname[:] = res
+
+            _mol_to_xml_def(cres, os.path.join(tmpdir, f"{res}.xml"))
+            _mol_to_dat_def(cres, os.path.join(tmpdir, f"{res}.dat"))
+            if outdir is not None:
+                os.makedirs(outdir, exist_ok=True)
+                pres = _prepare_for_parameterize(cres)
+                pres.write(os.path.join(outdir, f"{res}.cif"))
+            logger.info(f"Succesfully templated non-canonical residue {res}.")
+
         definition, forcefield = _get_custom_ff(user_ff=tmpdir, molkit_ff=_molkit_ff)
     return definition, forcefield
 
@@ -542,7 +532,6 @@ def systemPrepare(
     return_details=False,
     hydrophobic_thickness=None,
     plot_pka=None,
-    ignore_ns_errors=False,
     _logger_level="ERROR",
     _molkit_ff=True,
     outdir=None,
@@ -640,14 +629,14 @@ def systemPrepare(
         ignoring the covalent bond, meaning it may break the bonds or add hydrogen atoms between the bonds.
     plot_pka : str
         Provide a file path with .png extension to draw the titration diagram for the system residues.
-    ignore_ns_errors : bool
-        If False systemPrepare will issue an error when it fails to protonate non-canonical residues in the protein.
-        If True it will ignore errors on non-canonical residues leaving them unprotonated.
     outdir : str
         A path where to save custom residue cif files used for building
     residue_smiles : dict
         A dictionary with keys being residue names and values being the SMILES string of the residue. This is used to
-        create protonated versions of non-canonical residues with the help of the aceprep library.
+        create protonated versions of non-canonical residues.
+    ignore_ns : bool
+        If False systemPrepare will issue an error when it fails to protonate non-canonical residues in the protein.
+        If True it will leave non-canonical residues unprotonated.
 
     Returns
     -------
@@ -745,7 +734,6 @@ def systemPrepare(
             forcefield,
             _molkit_ff,
             outdir,
-            ignore_ns_errors=ignore_ns_errors,
             residue_smiles=residue_smiles,
         )
 
 
@@ -18,93 +18,6 @@
 alanine = Molecule(os.path.join(__share_dir, "ALA.cif"), zerowarning=False)
 
 
-# def _template_residue_from_mol(molc: Molecule, template: Molecule, res: str):
-#     from moleculekit.tools.graphalignment import mcsAtomMatching
-
-#     if np.any(np.isin(template.bondtype, ("", "un"))):
-#         raise RuntimeError(f"Residue template {res} must contain correct bond orders.")
-#     if len(np.unique(molc.name)) != molc.numAtoms:
-#         raise RuntimeError(
-#             f"Residue {res} contains duplicate atom names. Please rename the atoms to have unique names."
-#         )
-
-#     template.atomtype = template.element  # Replace atomtypes for writing mol2
-#     atm1, atm2 = mcsAtomMatching(molc, template, bondCompare="any", _logger=False)
-#     heavy = molc.element != "H"
-#     if len(atm2) != len(heavy):
-#         raise RuntimeError(
-#             f"Residue template {res} matched only {len(atm2)} out of {len(heavy)} heavy atoms in the input molecule"
-#         )
-#     for a1, a2 in zip(atm1, atm2):  # Rename atoms in reference molecule
-#         template.name[a2] = molc.name[a1]
-
-#     # TODO: Not sure this is a good idea in general
-#     template.remove("name OXT HXT HN2", _logger=False)
-#     return template
-
-
-def _template_residue_from_smiles(inmol: Molecule, nsres: str, smiles=None):
-    from rdkit import Chem
-    from aceprep.detector import template_ligand
-    from aceprep.prepare import rdk_prepare
-    from moleculekit.tools.graphalignment import makeMolGraph, compareGraphs
-    import tempfile
-    import logging
-
-    acepreplog = logging.getLogger("aceprep")
-    oldlevel = acepreplog.getEffectiveLevel()
-    acepreplog.setLevel("CRITICAL")
-
-    try:
-        assert np.all(np.isin(["N", "CA", "C", "O"], inmol.name))
-
-        with tempfile.TemporaryDirectory() as outdir:
-            resfile = os.path.join(outdir, f"residue_{nsres}.pdb")
-            # Guess the bonds so that the rdkit templating will work
-            inmol.guessBonds()
-            inmol.write(resfile)
-
-            outsdf = os.path.join(outdir, f"residue_{nsres}_templated.sdf")
-            new_mol = template_ligand(resfile, nsres, smiles=smiles)
-            w = Chem.SDWriter(outsdf)
-            w.write(new_mol)
-            w.close()
-
-            outsdfh = outsdf.replace(".sdf", "_h.sdf")
-            rdk_prepare(
-                outsdf,
-                outsdfh,
-                os.path.join(outdir, "aceprep.log"),
-                gen3d=False,
-                canonicalize_tautomers=False,
-            )
-
-            mol = Molecule(outsdfh)
-    except Exception:
-        acepreplog.setLevel(oldlevel)
-        raise
-
-    acepreplog.setLevel(oldlevel)
-
-    fields = ("element",)
-    g1 = makeMolGraph(mol, "all", fields)
-    g2 = makeMolGraph(inmol, "all", fields)
-    _, _, matching = compareGraphs(
-        g1, g2, fields=fields, tolerance=0.5, returnmatching=True
-    )
-    for pp in matching:  # Rename atoms in reference molecule
-        mol.name[pp[0]] = inmol.name[pp[1]]
-
-    # Rename non-matched hydrogens to X_H to rename later
-    matched = [pp[0] for pp in matching]
-    for i in np.where(mol.element == "H")[0]:
-        if i in matched:
-            continue
-        mol.name[i] = "X_H"
-
-    return mol
-
-
 def _reorder_residue_atoms(mol, resid):
     # Reorder atoms. AMBER order is: N H CA HA [sidechain] C O
     # the H atom will get added later
 
@@ -6,14 +6,6 @@
 
 curr_dir = os.path.dirname(os.path.abspath(__file__))
 
-# The below is used for testing only
-try:
-    from aceprep.prepare import rdk_prepare
-except ImportError:
-    ACEPREP_EXISTS = False
-else:
-    ACEPREP_EXISTS = True
-
 
 def _compare_results(refpdb, refdf_f, pmol: Molecule, df):
     from moleculekit.util import tempname
@@ -170,7 +162,6 @@ def _test_auto_freezing_and_force():
     )
 
 
-@pytest.mark.skipif(not ACEPREP_EXISTS, reason="Can only run with aceprep installed")
 @pytest.mark.parametrize(
     "files",
     (
@@ -189,14 +180,15 @@ def _test_nonstandard_residues(tmp_path, files):
 
     res_smiles = {
         "200": "c1cc(ccc1C[C@@H](C(=O)O)N)Cl",
-        "HRG": "C(CCNC(=N)N)C[C@@H](C(=O)O)N",
-        "OIC": "C1CC[C@H]2[C@@H](C1)C[C@H](N2)C(=O)O",
-        "TYS": "c1cc(ccc1C[C@@H](C(=O)O)N)OS(=O)(=O)O",
+        "HRG": "C(CCNC(=N)N)C[C@@H](CO)N",
+        "OIC": "C1CC[C@H]2[C@@H](C1)C[C@H](N2)CO",
+        "TYS": "c1cc(ccc1C[C@@H](CO)N)OS(=O)(=O)O",
         "SAH": "c1nc(c2c(n1)n(cn2)[C@H]3[C@@H]([C@@H]([C@H](O3)CSCC[C@@H](C(=O)O)N)O)O)N",
     }
     mol = Molecule(os.path.join(test_home, inf))
     if inf == "2QRV.pdb":
         mol = autoSegment2(mol, fields=("chain", "segid"))
+    mol.set("chain", "W", sel="water")
 
     pmol, df = systemPrepare(
         mol,
@@ -214,19 +206,7 @@ def _test_nonstandard_residues(tmp_path, files):
         df,
     )
 
-    pmol, df = systemPrepare(mol, return_details=True, hold_nonpeptidic_bonds=True)
-    pmol.fileloc.append(os.path.join(tmp_path, "prepared.pdb"))
-    pmol.write(pmol.fileloc[0])
-
-    _compare_results(
-        os.path.join(test_home, f"{outf}.pdb"),
-        os.path.join(test_home, f"{outf}.csv"),
-        pmol,
-        df,
-    )
-
 
-@pytest.mark.skipif(ACEPREP_EXISTS, reason="Can only run WITHOUT aceprep installed")
 def _test_nonstandard_residue_hard_ignore_ns():
     test_home = os.path.join(
         curr_dir, "test_systemprepare", "test-nonstandard-residues"
@@ -238,7 +218,7 @@ def _test_nonstandard_residue_hard_ignore_ns():
         return_details=True,
         hold_nonpeptidic_bonds=True,
         _molkit_ff=False,
-        ignore_ns_errors=True,
+        ignore_ns=True,
     )
     _compare_results(
         os.path.join(test_home, "5VBL_prepared_ignore_ns.pdb"),
@@ -290,12 +270,17 @@ def _test_cyclic_peptides():
     )
 
 
-@pytest.mark.skipif(not ACEPREP_EXISTS, reason="Can only run with aceprep installed")
 def _test_cyclic_peptides_noncanonical():
     test_home = os.path.join(curr_dir, "test_systemprepare", "test-cyclic-peptides")
     mol = Molecule(os.path.join(test_home, "4TOT_E.pdb"))
 
-    pmol, df = systemPrepare(mol, return_details=True)
+    smiles = {
+        "33X": "CC(CO)NC",
+        "34E": "CN[C@@H]([C@H](C)CN1CCN(CCOC)CC1)C(O)",
+        "BMT": "C/C=C/C[C@@H](C)[C@H]([C@@H](CO)NC)O",
+        "DAL": "C[C@H](CO)N",
+    }
+    pmol, df = systemPrepare(mol, return_details=True, residue_smiles=smiles)
 
     _compare_results(
         os.path.join(test_home, "4TOT_E_prepared.pdb"),