From a5e361a523e0aa915514ab7bd40b8a8de614a7d9 Mon Sep 17 00:00:00 2001
From: Zack Ulissi <zulissi@meta.com>
Date: Thu, 20 Mar 2025 23:45:12 +0000
Subject: [PATCH 01/19] first stab

---
 pyproject.toml                       |   1 +
 src/quacc/recipes/common/adsorbml.py | 363 +++++++++++++++++++++++++++
 2 files changed, 364 insertions(+)
 create mode 100644 src/quacc/recipes/common/adsorbml.py

diff --git a/pyproject.toml b/pyproject.toml
index fa87273af4..91b2f348cf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,6 +50,7 @@ mlp1 = ["chgnet>=0.3.3", "torch-dftd>=0.4.0", "sevenn>=0.10.1",  "orb-models>=0.
 mlp2 = ["mace-torch>=0.3.3", "matgl>=1.1.2"]
 mp = ["atomate2>=0.0.14"]
 newtonnet = ["newtonnet>=1.1"]
+ocp = ["fairchem-data-oc>=0.2.0"]
 parsl = ["parsl[monitoring]>=2024.5.27; platform_system!='Windows'"]
 phonons = ["phonopy>=2.20.0", "seekpath>=2.1.0"]
 prefect = ["prefect[dask]>=2.19.0", "dask-jobqueue>=0.8.2"]
diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py
new file mode 100644
index 0000000000..1b9ead18b3
--- /dev/null
+++ b/src/quacc/recipes/common/adsorbml.py
@@ -0,0 +1,363 @@
+from __future__ import annotations
+
+import copy
+import logging
+from typing import TYPE_CHECKING, Any, Literal
+
+import numpy as np
+from ase.atoms import Atoms
+from ase.build import molecule
+from fairchem.data.oc.core.adsorbate import Adsorbate
+from fairchem.data.oc.core.bulk import Bulk
+from fairchem.data.oc.core.multi_adsorbate_slab_config import (
+    MultipleAdsorbateSlabConfig,
+)
+from fairchem.data.oc.core.slab import Slab, tile_and_tag_atoms
+from fairchem.data.oc.utils import DetectTrajAnomaly
+
+from quacc import Job, flow, job
+from quacc.utils.dicts import recursive_dict_merge
+from quacc.wflow_tools.customizers import customize_funcs, strip_decorator
+
+if TYPE_CHECKING:
+    from quacc.types import OptSchema, RunSchema
+
+logger = logging.getLogger(__name__)
+
+
+@job
+def ocp_surface_generator(bulk_atoms, max_miller: int = 1):
+    # This gets ocdata.core.slab.Slab objects which already know which atoms are the surface atoms!
+    return Slab.from_bulk_get_all_slabs(Bulk(bulk_atoms), max_miller)
+
+
+class CustomSlab(Slab):
+    # Custom slab object to ignore the composition of the bulk in the initialization
+    # and make sure that the user did the right things to have the surface tagged for
+    # adsorbate placement
+
+    def __init__(
+        self,
+        slab_atoms: Atoms,
+        bulk: Atoms | None = None,
+        millers: tuple | None = None,
+        shift: float | None = None,
+        top: bool | None = None,
+        min_ab: float = 0.8,
+    ):
+        self.bulk = bulk
+        self.slab_atoms = slab_atoms
+        self.millers = millers
+        self.shift = shift
+        self.top = top
+
+        assert np.linalg.norm(self.slab_atoms.cell[0]) >= min_ab, "Slab not tiled"
+        assert np.linalg.norm(self.slab_atoms.cell[1]) >= min_ab, "Slab not tiled"
+        assert self.has_surface_tagged(), "Slab not tagged"
+        assert len(self.slab_atoms.constraints) > 0, "Sub-surface atoms not constrained"
+
+
+# Not clear this actually needs to a job, maybe just a function if pretty fast?
+@job
+def ocp_adslab_generator(
+    slab: Slab | Atoms,
+    adsorbates_kwargs: list[dict[Any, Any]] | None = None,
+    multiple_adsorbate_slab_config_kwargs: dict[Any, Any] | None = None,
+) -> list[Atoms]:
+    if adsorbates_kwargs is None:
+        logger.warning("No adsorbate kwargs found, defaulting to a single *OH!")
+        adsorbates_kwargs = [{"adsorbate_smiles_from_db": "*OH"}]
+
+    adsorbates = [
+        Adsorbate(**adsorbate_kwargs) for adsorbate_kwargs in adsorbates_kwargs
+    ]
+
+    if isinstance(slab, Atoms):
+        try:
+            slab = CustomSlab(slab_atoms=slab)
+        except AssertionError:
+            slab = CustomSlab(slab_atoms=tile_and_tag_atoms(slab))
+            logger.warning(
+                "The slab was not tagged and/or tiled. "
+                "We did the best we could, but you should be careful and check the results!"
+            )
+
+    if multiple_adsorbate_slab_config_kwargs is None:
+        multiple_adsorbate_slab_config_kwargs = {}
+
+    adslabs = MultipleAdsorbateSlabConfig(
+        copy.deepcopy(slab), adsorbates, **multiple_adsorbate_slab_config_kwargs
+    )
+
+    return adslabs.atoms_list
+
+
+@flow
+def find_adslabs_each_slab(
+    slabs: list[Slab],
+    adsorbates_kwargs: dict[Any, Any],
+    multiple_adsorbate_slab_config_kwargs: dict[Any, Any] | None = None,
+):
+    return [
+        {
+            "slab": slab,
+            "adslabs": ocp_adslab_generator(
+                slab, adsorbates_kwargs, multiple_adsorbate_slab_config_kwargs
+            ),
+        }
+        for slab in slabs
+    ]
+
+
+def detect_anomaly(
+    initial_atoms: Atoms, final_atoms: Atoms
+) -> list[
+    Literal[
+        "adsorbate_dissociated",
+        "adsorbate_desorbed",
+        "surface_changed",
+        "adsorbate_intercalated",
+    ]
+]:
+    atom_tags = initial_atoms.get_tags()
+
+    detector = DetectTrajAnomaly(initial_atoms, final_atoms, atom_tags)
+    anomalies = []
+    if detector.is_adsorbate_dissociated():
+        anomalies.append("adsorbate_dissociated")
+    if detector.is_adsorbate_desorbed():
+        anomalies.append("adsorbate_desorbed")
+    if detector.has_surface_changed():
+        anomalies.append("surface_changed")
+    if detector.is_adsorbate_intercalated():
+        anomalies.append("adsorbate_intercalated")
+    return anomalies
+
+
+@job
+def filter_sort_select_adslabs(
+    adslab_results: list[RunSchema], adslab_anomalies_list: list[list[str]]
+):
+    for adslab_result, adslab_anomalies in zip(
+        adslab_results, adslab_anomalies_list, strict=True
+    ):
+        adslab_result["results"]["adslab_anomalies"] = adslab_anomalies
+
+    adslabs_no_anomalies = [
+        adslab_result
+        for adslab_result in adslab_results
+        if len(adslab_result["results"]["adslab_anomalies"]) == 0
+    ]
+
+    return sorted(adslabs_no_anomalies, key=lambda x: x["results"]["energy"])
+
+
+@flow
+def adsorb_ml_pipeline(
+    slab: Slab,
+    adsorbates_kwargs: dict[Any, Any],
+    multiple_adsorbate_slab_config_kwargs: dict[Any, Any],
+    ml_slab_adslab_relax_job: Job,
+    slab_validate_job: Job,
+    adslab_validate_job: Job,
+    gas_validate_job: Job,
+    num_to_validate_with_DFT: int = 0,
+    ml_gas_references: dict[str, RunSchema] | None = None,
+):
+    unrelaxed_adslab_configurations = ocp_adslab_generator(
+        slab, adsorbates_kwargs, multiple_adsorbate_slab_config_kwargs
+    )
+
+    ml_relaxed_configurations = [
+        ml_slab_adslab_relax_job(adslab_configuration)
+        for adslab_configuration in unrelaxed_adslab_configurations
+    ]
+
+    if ml_gas_references is None:
+        ml_gas_references = {
+            "N2": ml_slab_adslab_relax_job(
+                molecule_pbc("N2", vacuum=10), relax_cell=False
+            ),
+            "CO": ml_slab_adslab_relax_job(
+                molecule_pbc("CO", vacuum=10), relax_cell=False
+            ),
+            "H2": ml_slab_adslab_relax_job(
+                molecule_pbc("H2", vacuum=10), relax_cell=False
+            ),
+            "H2O": ml_slab_adslab_relax_job(
+                molecule_pbc("H2O", vacuum=10), relax_cell=False
+            ),
+        }
+
+    ml_relaxed_slab_result = ml_slab_adslab_relax_job(slab.atoms)
+
+    ml_referenced_configurations = reference_adslab_energies(
+        ml_relaxed_configurations,
+        ml_relaxed_slab_result,
+        CO_result=ml_gas_references["CO"],
+        N2_result=ml_gas_references["N2"],
+        H2_result=ml_gas_references["H2"],
+        H2O_result=ml_gas_references["H2O"],
+    )
+
+    adslab_anomalies_list = [
+        job(detect_anomaly)(
+            relaxed_result["input_atoms"]["atoms"], relaxed_result["atoms"]
+        )
+        for relaxed_result in ml_relaxed_configurations
+    ]
+
+    top_candidates = filter_sort_select_adslabs(
+        adslab_results=ml_referenced_configurations,
+        adslab_anomalies_list=adslab_anomalies_list,
+    )
+
+    if num_to_validate_with_DFT == 0:
+        return {
+            "slab": slab,
+            "unrelaxed_configurations": unrelaxed_adslab_configurations,
+            "adslab_ml_relaxed_configurations": ml_relaxed_configurations,
+            "adslab_anomalies": adslab_anomalies_list,
+        }
+    else:
+        return {
+            "slab": slab,
+            "unrelaxed_configurations": unrelaxed_adslab_configurations,
+            "adslab_ml_relaxed_configurations": ml_relaxed_configurations,
+            "adslab_anomalies": adslab_anomalies_list,
+            "validated_structures": {
+                "validated_adslabs": [
+                    adslab_validate_job(top_candidates[i]["relaxed_configuration"])
+                    for i in range(num_to_validate_with_DFT)
+                ],
+                "slab_validated": slab_validate_job(slab.atoms, relax_cell=False),
+            },
+        }
+
+
+@job
+def reference_adslab_energies(
+    adslab_results: list[dict[str, Any]],
+    slab_result: RunSchema,
+    CO_result: OptSchema | RunSchema,
+    H2_result: OptSchema | RunSchema,
+    H2O_result: OptSchema | RunSchema,
+    N2_result: OptSchema | RunSchema,
+):
+    adslab_results = copy.deepcopy(adslab_results)
+    atomic_energies = {
+        "H": H2_result["results"]["energy"] / 2,
+        "N": N2_result["results"]["energy"] / 2,
+        "O": (H2O_result["results"]["energy"] - H2_result["results"]["energy"]),
+        "C": CO_result["results"]["energy"]
+        - (H2O_result["results"]["energy"] - H2_result["results"]["energy"]),
+    }
+
+    slab_energy = slab_result["results"]["energy"]
+
+    return [
+        recursive_dict_merge(
+            adslab_result,
+            {
+                "results_references": {"atomic_energies": atomic_energies},
+                "referenced_adsorption_energy": adslab_result["results"]["energy"]
+                - slab_energy
+                - sum(
+                    [
+                        atomic_energies[atom.symbol]
+                        for atom in adslab_result["atoms"][
+                            adslab_result["atoms"].get_tags() == 2
+                        ]  # all adsorbate tagged with tag=2!
+                    ]
+                ),
+            },
+        )
+        for adslab_result in adslab_results
+    ]
+
+
+def molecule_pbc(*args, **molecule_kwargs):
+    atoms = molecule(*args, **molecule_kwargs)
+    atoms.pbc = True
+    return atoms
+
+
+@flow
+def standard_ocp_dataset_enumeration(
+    bulk_atoms: Atoms,
+    adsorbates_kwargs: dict[str, Any],
+    multiple_adsorbate_slab_config_kwargs: dict[str, Any],
+    ml_relax_job: Job,
+    slab_validate_job: Job,
+    adslab_validate_job: Job,
+    gas_validate_job: Job,
+    bulk_relax_job: Job | None,
+    job_params: dict[str, dict[str, Any]] | None = None,
+    job_decorators: dict[str, dict[str, Any]] | None = None,
+    max_miller: int = 1,
+    num_to_validate_with_DFT: int = 0,
+):
+    (
+        bulk_relax_job_,
+        ml_slab_adslab_relax_job_,
+        slab_validate_job_,
+        adslab_validate_job_,
+        gas_validate_job_,
+    ) = customize_funcs(
+        [
+            "bulk_relax_job",
+            "ml_slab_adslab_relax_job",
+            "slab_validate_job",
+            "adslab_validate_job",
+            "gas_relax_job",
+        ],
+        [
+            bulk_relax_job,  # type: ignore
+            ml_relax_job,
+            slab_validate_job,
+            adslab_validate_job,
+            gas_validate_job,
+        ],
+        param_swaps=job_params,
+        decorators=job_decorators,  # type: ignore
+    )
+
+    if bulk_relax_job is not None:
+        bulk_atoms = bulk_relax_job_(bulk_atoms, relax_cell=True)["atoms"]
+
+    slabs = ocp_surface_generator(bulk_atoms=bulk_atoms, max_miller=max_miller)
+
+    ml_gas_references = {
+        "N2": ml_slab_adslab_relax_job_(
+            molecule_pbc("N2", vacuum=10), relax_cell=False
+        ),
+        "CO": ml_slab_adslab_relax_job_(
+            molecule_pbc("CO", vacuum=10), relax_cell=False
+        ),
+        "H2": ml_slab_adslab_relax_job_(
+            molecule_pbc("H2", vacuum=10), relax_cell=False
+        ),
+        "H2O": ml_slab_adslab_relax_job_(
+            molecule_pbc("H2O", vacuum=10), relax_cell=False
+        ),
+    }
+
+    @flow
+    def adsorbML_each_surface(slabs, **kwargs):
+        return [
+            # We strip the decorator here so it's a bunch of jobs, not subflows. Helpful for prefect!
+            strip_decorator(adsorb_ml_pipeline)(slab=slab, **kwargs)
+            for slab in slabs
+        ]
+
+    return adsorbML_each_surface(
+        slabs=slabs,
+        adsorbates_kwargs=adsorbates_kwargs,
+        multiple_adsorbate_slab_config_kwargs=multiple_adsorbate_slab_config_kwargs,
+        ml_slab_adslab_relax_job=ml_slab_adslab_relax_job_,
+        slab_validate_job=slab_validate_job_,
+        adslab_validate_job=adslab_validate_job_,
+        gas_validate_job=gas_validate_job_,
+        num_to_validate_with_DFT=num_to_validate_with_DFT,
+        ml_gas_references=ml_gas_references,
+    )

From 770c6da550b756cc32d79ef559be87684deed387 Mon Sep 17 00:00:00 2001
From: Zack Ulissi <zulissi@meta.com>
Date: Fri, 21 Mar 2025 00:44:24 +0000
Subject: [PATCH 02/19] minor fixes

---
 src/quacc/recipes/common/adsorbml.py | 135 +++++++++++++++------------
 1 file changed, 74 insertions(+), 61 deletions(-)

diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py
index 1b9ead18b3..4fd4b32ed4 100644
--- a/src/quacc/recipes/common/adsorbml.py
+++ b/src/quacc/recipes/common/adsorbml.py
@@ -64,10 +64,6 @@ def ocp_adslab_generator(
     adsorbates_kwargs: list[dict[Any, Any]] | None = None,
     multiple_adsorbate_slab_config_kwargs: dict[Any, Any] | None = None,
 ) -> list[Atoms]:
-    if adsorbates_kwargs is None:
-        logger.warning("No adsorbate kwargs found, defaulting to a single *OH!")
-        adsorbates_kwargs = [{"adsorbate_smiles_from_db": "*OH"}]
-
     adsorbates = [
         Adsorbate(**adsorbate_kwargs) for adsorbate_kwargs in adsorbates_kwargs
     ]
@@ -163,6 +159,7 @@ def adsorb_ml_pipeline(
     gas_validate_job: Job,
     num_to_validate_with_DFT: int = 0,
     ml_gas_references: dict[str, RunSchema] | None = None,
+    reference_ml_energies_to_gas_phase: bool = False,
 ):
     unrelaxed_adslab_configurations = ocp_adslab_generator(
         slab, adsorbates_kwargs, multiple_adsorbate_slab_config_kwargs
@@ -173,32 +170,32 @@ def adsorb_ml_pipeline(
         for adslab_configuration in unrelaxed_adslab_configurations
     ]
 
-    if ml_gas_references is None:
-        ml_gas_references = {
-            "N2": ml_slab_adslab_relax_job(
-                molecule_pbc("N2", vacuum=10), relax_cell=False
-            ),
-            "CO": ml_slab_adslab_relax_job(
-                molecule_pbc("CO", vacuum=10), relax_cell=False
-            ),
-            "H2": ml_slab_adslab_relax_job(
-                molecule_pbc("H2", vacuum=10), relax_cell=False
-            ),
-            "H2O": ml_slab_adslab_relax_job(
-                molecule_pbc("H2O", vacuum=10), relax_cell=False
-            ),
-        }
-
     ml_relaxed_slab_result = ml_slab_adslab_relax_job(slab.atoms)
 
-    ml_referenced_configurations = reference_adslab_energies(
-        ml_relaxed_configurations,
-        ml_relaxed_slab_result,
-        CO_result=ml_gas_references["CO"],
-        N2_result=ml_gas_references["N2"],
-        H2_result=ml_gas_references["H2"],
-        H2O_result=ml_gas_references["H2O"],
-    )
+    if reference_ml_energies_to_gas_phase:
+        if ml_gas_references is None:
+            ml_gas_references = {
+                "N2": ml_slab_adslab_relax_job(
+                    molecule_pbc("N2", vacuum=10), relax_cell=False
+                ),
+                "CO": ml_slab_adslab_relax_job(
+                    molecule_pbc("CO", vacuum=10), relax_cell=False
+                ),
+                "H2": ml_slab_adslab_relax_job(
+                    molecule_pbc("H2", vacuum=10), relax_cell=False
+                ),
+                "H2O": ml_slab_adslab_relax_job(
+                    molecule_pbc("H2O", vacuum=10), relax_cell=False
+                ),
+            }
+        ml_relaxed_configurations = reference_adslab_energies(
+            ml_relaxed_configurations,
+            ml_relaxed_slab_result,
+            CO_result=ml_gas_references["CO"],
+            N2_result=ml_gas_references["N2"],
+            H2_result=ml_gas_references["H2"],
+            H2O_result=ml_gas_references["H2O"],
+        )
 
     adslab_anomalies_list = [
         job(detect_anomaly)(
@@ -208,26 +205,24 @@ def adsorb_ml_pipeline(
     ]
 
     top_candidates = filter_sort_select_adslabs(
-        adslab_results=ml_referenced_configurations,
+        adslab_results=ml_relaxed_configurations,
         adslab_anomalies_list=adslab_anomalies_list,
     )
 
     if num_to_validate_with_DFT == 0:
         return {
             "slab": slab,
-            "unrelaxed_configurations": unrelaxed_adslab_configurations,
-            "adslab_ml_relaxed_configurations": ml_relaxed_configurations,
+            "adslab_ml_relaxed_configurations": top_candidates,
             "adslab_anomalies": adslab_anomalies_list,
         }
     else:
         return {
             "slab": slab,
-            "unrelaxed_configurations": unrelaxed_adslab_configurations,
-            "adslab_ml_relaxed_configurations": ml_relaxed_configurations,
+            "adslab_ml_relaxed_configurations": top_candidates,
             "adslab_anomalies": adslab_anomalies_list,
             "validated_structures": {
                 "validated_adslabs": [
-                    adslab_validate_job(top_candidates[i]["relaxed_configuration"])
+                    adslab_validate_job(top_candidates[i]["atoms"])
                     for i in range(num_to_validate_with_DFT)
                 ],
                 "slab_validated": slab_validate_job(slab.atoms, relax_cell=False),
@@ -259,17 +254,29 @@ def reference_adslab_energies(
         recursive_dict_merge(
             adslab_result,
             {
-                "results_references": {"atomic_energies": atomic_energies},
-                "referenced_adsorption_energy": adslab_result["results"]["energy"]
-                - slab_energy
-                - sum(
-                    [
-                        atomic_energies[atom.symbol]
-                        for atom in adslab_result["atoms"][
-                            adslab_result["atoms"].get_tags() == 2
-                        ]  # all adsorbate tagged with tag=2!
-                    ]
-                ),
+                "results": {
+                    "atomic_energies": atomic_energies,
+                    "slab_energy": slab_energy,
+                    "adslab_energy": adslab_result["results"]["energy"],
+                    "gas_reactant_energy": sum(
+                        [
+                            atomic_energies[atom.symbol]
+                            for atom in adslab_result["atoms"][
+                                adslab_result["atoms"].get_tags() == 2
+                            ]  # all adsorbate tagged with tag=2!
+                        ]
+                    ),
+                    "adsorption_energy": adslab_result["results"]["energy"]
+                    - slab_energy
+                    - sum(
+                        [
+                            atomic_energies[atom.symbol]
+                            for atom in adslab_result["atoms"][
+                                adslab_result["atoms"].get_tags() == 2
+                            ]  # all adsorbate tagged with tag=2!
+                        ]
+                    ),
+                }
             },
         )
         for adslab_result in adslab_results
@@ -296,6 +303,8 @@ def standard_ocp_dataset_enumeration(
     job_decorators: dict[str, dict[str, Any]] | None = None,
     max_miller: int = 1,
     num_to_validate_with_DFT: int = 0,
+    reference_ml_energies_to_gas_phase: bool = True,
+    relax_bulk: bool = True,
 ):
     (
         bulk_relax_job_,
@@ -309,7 +318,7 @@ def standard_ocp_dataset_enumeration(
             "ml_slab_adslab_relax_job",
             "slab_validate_job",
             "adslab_validate_job",
-            "gas_relax_job",
+            "gas_validate_job",
         ],
         [
             bulk_relax_job,  # type: ignore
@@ -322,25 +331,28 @@ def standard_ocp_dataset_enumeration(
         decorators=job_decorators,  # type: ignore
     )
 
-    if bulk_relax_job is not None:
+    if relax_bulk:
         bulk_atoms = bulk_relax_job_(bulk_atoms, relax_cell=True)["atoms"]
 
     slabs = ocp_surface_generator(bulk_atoms=bulk_atoms, max_miller=max_miller)
 
-    ml_gas_references = {
-        "N2": ml_slab_adslab_relax_job_(
-            molecule_pbc("N2", vacuum=10), relax_cell=False
-        ),
-        "CO": ml_slab_adslab_relax_job_(
-            molecule_pbc("CO", vacuum=10), relax_cell=False
-        ),
-        "H2": ml_slab_adslab_relax_job_(
-            molecule_pbc("H2", vacuum=10), relax_cell=False
-        ),
-        "H2O": ml_slab_adslab_relax_job_(
-            molecule_pbc("H2O", vacuum=10), relax_cell=False
-        ),
-    }
+    if reference_ml_energies_to_gas_phase:
+        ml_gas_references = {
+            "N2": ml_slab_adslab_relax_job_(
+                molecule_pbc("N2", vacuum=10), relax_cell=False
+            ),
+            "CO": ml_slab_adslab_relax_job_(
+                molecule_pbc("CO", vacuum=10), relax_cell=False
+            ),
+            "H2": ml_slab_adslab_relax_job_(
+                molecule_pbc("H2", vacuum=10), relax_cell=False
+            ),
+            "H2O": ml_slab_adslab_relax_job_(
+                molecule_pbc("H2O", vacuum=10), relax_cell=False
+            ),
+        }
+    else:
+        ml_gas_references = None
 
     @flow
     def adsorbML_each_surface(slabs, **kwargs):
@@ -360,4 +372,5 @@ def adsorbML_each_surface(slabs, **kwargs):
         gas_validate_job=gas_validate_job_,
         num_to_validate_with_DFT=num_to_validate_with_DFT,
         ml_gas_references=ml_gas_references,
+        reference_ml_energies_to_gas_phase=reference_ml_energies_to_gas_phase,
     )

From d0127267d3ad40be74a903c6285d4ac8b2203013 Mon Sep 17 00:00:00 2001
From: Zack Ulissi <zulissi@meta.com>
Date: Fri, 21 Mar 2025 18:04:49 +0000
Subject: [PATCH 03/19] clean up

---
 src/quacc/recipes/common/adsorbml.py | 409 +++++++++++++++++++++------
 1 file changed, 321 insertions(+), 88 deletions(-)

diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py
index 4fd4b32ed4..e415109c1a 100644
--- a/src/quacc/recipes/common/adsorbml.py
+++ b/src/quacc/recipes/common/adsorbml.py
@@ -2,7 +2,7 @@
 
 import copy
 import logging
-from typing import TYPE_CHECKING, Any, Literal
+from typing import TYPE_CHECKING, Any, Literal, TypedDict
 
 import numpy as np
 from ase.atoms import Atoms
@@ -24,10 +24,44 @@
 
 logger = logging.getLogger(__name__)
 
+class AdsorbatesKwargs(TypedDict):
+    adsorbate_type: str
+    position: list[float]
+    orientation: list[float]
+
+class MultipleAdsorbateSlabConfigKwargs(TypedDict):
+    num_adsorbates: int
+    adsorbate_distance: float
+
+class MoleculeResults(TypedDict):
+    N2: RunSchema
+    CO: RunSchema
+    H2: RunSchema
+    H2O: RunSchema
+
+class AtomicReferenceEnergies(TypedDict):
+    H: float
+    N: float
+    O: float
+    C: float
 
 @job
-def ocp_surface_generator(bulk_atoms, max_miller: int = 1):
-    # This gets ocdata.core.slab.Slab objects which already know which atoms are the surface atoms!
+def ocp_surface_generator(bulk_atoms: Atoms, max_miller: int = 1) -> list[Slab]:
+    """
+    Generate surface slabs from bulk atoms.
+
+    Parameters
+    ----------
+    bulk_atoms : Atoms
+        The bulk atomic structure.
+    max_miller : int, optional
+        Maximum Miller index, by default 1.
+
+    Returns
+    -------
+    list[Slab]
+        List of generated surface slabs.
+    """
     return Slab.from_bulk_get_all_slabs(Bulk(bulk_atoms), max_miller)
 
 
@@ -45,6 +79,24 @@ def __init__(
         top: bool | None = None,
         min_ab: float = 0.8,
     ):
+        """
+        Initialize a CustomSlab object.
+
+        Parameters
+        ----------
+        slab_atoms : Atoms
+            The slab atomic structure.
+        bulk : Atoms, optional
+            The bulk atomic structure, by default None.
+        millers : tuple, optional
+            Miller indices, by default None.
+        shift : float, optional
+            Shift value, by default None.
+        top : bool, optional
+            Top value, by default None.
+        min_ab : float, optional
+            Minimum a and b lattice parameters, by default 0.8.
+        """
         self.bulk = bulk
         self.slab_atoms = slab_atoms
         self.millers = millers
@@ -57,13 +109,29 @@ def __init__(
         assert len(self.slab_atoms.constraints) > 0, "Sub-surface atoms not constrained"
 
 
-# Not clear this actually needs to a job, maybe just a function if pretty fast?
 @job
 def ocp_adslab_generator(
     slab: Slab | Atoms,
-    adsorbates_kwargs: list[dict[Any, Any]] | None = None,
-    multiple_adsorbate_slab_config_kwargs: dict[Any, Any] | None = None,
+    adsorbates_kwargs: list[AdsorbatesKwargs] | None = None,
+    multiple_adsorbate_slab_config_kwargs: MultipleAdsorbateSlabConfigKwargs | None = None,
 ) -> list[Atoms]:
+    """
+    Generate adsorbate-slab configurations.
+
+    Parameters
+    ----------
+    slab : Slab | Atoms
+        The slab structure.
+    adsorbates_kwargs : list[AdsorbatesKwargs], optional
+        List of keyword arguments for generating adsorbates, by default None.
+    multiple_adsorbate_slab_config_kwargs : MultipleAdsorbateSlabConfigKwargs, optional
+        Keyword arguments for generating multiple adsorbate-slab configurations, by default None.
+
+    Returns
+    -------
+    list[Atoms]
+        List of generated adsorbate-slab configurations.
+    """
     adsorbates = [
         Adsorbate(**adsorbate_kwargs) for adsorbate_kwargs in adsorbates_kwargs
     ]
@@ -91,9 +159,26 @@ def ocp_adslab_generator(
 @flow
 def find_adslabs_each_slab(
     slabs: list[Slab],
-    adsorbates_kwargs: dict[Any, Any],
-    multiple_adsorbate_slab_config_kwargs: dict[Any, Any] | None = None,
-):
+    adsorbates_kwargs: AdsorbatesKwargs,
+    multiple_adsorbate_slab_config_kwargs: MultipleAdsorbateSlabConfigKwargs | None = None,
+) -> list[dict[str, Any]]:
+    """
+    Find adsorbate-slab configurations for each slab.
+
+    Parameters
+    ----------
+    slabs : list[Slab]
+        List of slabs.
+    adsorbates_kwargs : AdsorbatesKwargs
+        Keyword arguments for generating adsorbates.
+    multiple_adsorbate_slab_config_kwargs : MultipleAdsorbateSlabConfigKwargs, optional
+        Keyword arguments for generating multiple adsorbate-slab configurations, by default None.
+
+    Returns
+    -------
+    list[dict[str, Any]]
+        List of dictionaries containing slabs and their corresponding adsorbate-slab configurations.
+    """
     return [
         {
             "slab": slab,
@@ -115,6 +200,21 @@ def detect_anomaly(
         "adsorbate_intercalated",
     ]
 ]:
+    """
+    Detect anomalies between initial and final atomic structures.
+
+    Parameters
+    ----------
+    initial_atoms : Atoms
+        Initial atomic structure.
+    final_atoms : Atoms
+        Final atomic structure.
+
+    Returns
+    -------
+    list[Literal["adsorbate_dissociated", "adsorbate_desorbed", "surface_changed", "adsorbate_intercalated"]]
+        List of detected anomalies.
+    """
     atom_tags = initial_atoms.get_tags()
 
     detector = DetectTrajAnomaly(initial_atoms, final_atoms, atom_tags)
@@ -133,7 +233,22 @@ def detect_anomaly(
 @job
 def filter_sort_select_adslabs(
     adslab_results: list[RunSchema], adslab_anomalies_list: list[list[str]]
-):
+) -> list[RunSchema]:
+    """
+    Filter, sort, and select adsorbate-slab configurations based on anomalies and energy.
+
+    Parameters
+    ----------
+    adslab_results : list[RunSchema]
+        List of adsorbate-slab results.
+    adslab_anomalies_list : list[list[str]]
+        List of detected anomalies for each adsorbate-slab configuration.
+
+    Returns
+    -------
+    list[RunSchema]
+        Sorted list of adsorbate-slab configurations without anomalies.
+    """
     for adslab_result, adslab_anomalies in zip(
         adslab_results, adslab_anomalies_list, strict=True
     ):
@@ -151,16 +266,51 @@ def filter_sort_select_adslabs(
 @flow
 def adsorb_ml_pipeline(
     slab: Slab,
-    adsorbates_kwargs: dict[Any, Any],
-    multiple_adsorbate_slab_config_kwargs: dict[Any, Any],
+    adsorbates_kwargs: AdsorbatesKwargs,
+    multiple_adsorbate_slab_config_kwargs: MultipleAdsorbateSlabConfigKwargs,
     ml_slab_adslab_relax_job: Job,
     slab_validate_job: Job,
     adslab_validate_job: Job,
     gas_validate_job: Job,
     num_to_validate_with_DFT: int = 0,
-    ml_gas_references: dict[str, RunSchema] | None = None,
     reference_ml_energies_to_gas_phase: bool = False,
-):
+    molecule_results: MoleculeResults | None = None,
+    atomic_reference_energies: AtomicReferenceEnergies | None = None,
+) -> dict[str, Any]:
+    """
+    Run a machine learning-based pipeline for adsorbate-slab systems.
+
+    Parameters
+    ----------
+    slab : Slab
+        The slab structure to which adsorbates will be added.
+    adsorbates_kwargs : AdsorbatesKwargs
+        Keyword arguments for generating adsorbate configurations.
+    multiple_adsorbate_slab_config_kwargs : MultipleAdsorbateSlabConfigKwargs
+        Keyword arguments for generating multiple adsorbate-slab configurations.
+    ml_slab_adslab_relax_job : Job
+        Job for relaxing slab and adsorbate-slab configurations using ML.
+    slab_validate_job : Job
+        Job for validating the slab structure.
+    adslab_validate_job : Job
+        Job for validating the adsorbate-slab structures.
+    gas_validate_job : Job
+        Job for validating gas phase structures.
+    num_to_validate_with_DFT : int, optional
+        Number of top configurations to validate with DFT, by default 0.
+    reference_ml_energies_to_gas_phase : bool, optional
+        Whether to reference ML energies to gas phase, by default False.
+    molecule_results : MoleculeResults, optional
+        Precomputed molecule results for referencing, by default None.
+    atomic_reference_energies : AtomicReferenceEnergies, optional
+        Atomic reference energies for referencing, by default None.
+
+    Returns
+    -------
+    dict
+        Dictionary containing the slab, ML-relaxed adsorbate-slab configurations,
+        detected anomalies, and optionally DFT-validated structures.
+    """
     unrelaxed_adslab_configurations = ocp_adslab_generator(
         slab, adsorbates_kwargs, multiple_adsorbate_slab_config_kwargs
     )
@@ -173,28 +323,16 @@ def adsorb_ml_pipeline(
     ml_relaxed_slab_result = ml_slab_adslab_relax_job(slab.atoms)
 
     if reference_ml_energies_to_gas_phase:
-        if ml_gas_references is None:
-            ml_gas_references = {
-                "N2": ml_slab_adslab_relax_job(
-                    molecule_pbc("N2", vacuum=10), relax_cell=False
-                ),
-                "CO": ml_slab_adslab_relax_job(
-                    molecule_pbc("CO", vacuum=10), relax_cell=False
-                ),
-                "H2": ml_slab_adslab_relax_job(
-                    molecule_pbc("H2", vacuum=10), relax_cell=False
-                ),
-                "H2O": ml_slab_adslab_relax_job(
-                    molecule_pbc("H2O", vacuum=10), relax_cell=False
-                ),
-            }
+        if atomic_reference_energies is None and molecule_results is None:
+            molecule_results = generate_molecule_reference_results(
+                ml_slab_adslab_relax_job
+            )
+
         ml_relaxed_configurations = reference_adslab_energies(
             ml_relaxed_configurations,
             ml_relaxed_slab_result,
-            CO_result=ml_gas_references["CO"],
-            N2_result=ml_gas_references["N2"],
-            H2_result=ml_gas_references["H2"],
-            H2O_result=ml_gas_references["H2O"],
+            atomic_energies=atomic_reference_energies,
+            molecule_results=molecule_results,
         )
 
     adslab_anomalies_list = [
@@ -234,19 +372,47 @@ def adsorb_ml_pipeline(
 def reference_adslab_energies(
     adslab_results: list[dict[str, Any]],
     slab_result: RunSchema,
-    CO_result: OptSchema | RunSchema,
-    H2_result: OptSchema | RunSchema,
-    H2O_result: OptSchema | RunSchema,
-    N2_result: OptSchema | RunSchema,
-):
+    atomic_energies: AtomicReferenceEnergies | None,
+    molecule_results: MoleculeResults | None,
+) -> list[dict[str, Any]]:
+    """
+    Reference adsorbate-slab energies to atomic and slab energies.
+
+    Parameters
+    ----------
+    adslab_results : list[dict[str, Any]]
+        List of adsorbate-slab results.
+    slab_result : RunSchema
+        Result of the slab calculation.
+    atomic_energies : AtomicReferenceEnergies | None
+        Dictionary of atomic energies.
+    molecule_results : MoleculeResults | None
+        Dictionary of molecule results.
+
+    Returns
+    -------
+    list[dict[str, Any]]
+        List of adsorbate-slab results with referenced energies.
+    """
     adslab_results = copy.deepcopy(adslab_results)
-    atomic_energies = {
-        "H": H2_result["results"]["energy"] / 2,
-        "N": N2_result["results"]["energy"] / 2,
-        "O": (H2O_result["results"]["energy"] - H2_result["results"]["energy"]),
-        "C": CO_result["results"]["energy"]
-        - (H2O_result["results"]["energy"] - H2_result["results"]["energy"]),
-    }
+    if atomic_energies is None and molecule_results is not None:
+        atomic_energies = {
+            "H": molecule_results["H2"]["results"]["energy"] / 2,
+            "N": molecule_results["N2"]["results"]["energy"] / 2,
+            "O": (
+                molecule_results["H2O"]["results"]["energy"]
+                - molecule_results["H2"]["results"]["energy"]
+            ),
+            "C": molecule_results["CO"]["results"]["energy"]
+            - (
+                molecule_results["H2O"]["results"]["energy"]
+                - molecule_results["H2"]["results"]["energy"]
+            ),
+        }
+    else:
+        raise Exception(
+            "Missing atomic energies and gas phase energies; unable to continue!"
+        )
 
     slab_energy = slab_result["results"]["energy"]
 
@@ -255,27 +421,29 @@ def reference_adslab_energies(
             adslab_result,
             {
                 "results": {
-                    "atomic_energies": atomic_energies,
-                    "slab_energy": slab_energy,
-                    "adslab_energy": adslab_result["results"]["energy"],
-                    "gas_reactant_energy": sum(
-                        [
-                            atomic_energies[atom.symbol]
-                            for atom in adslab_result["atoms"][
-                                adslab_result["atoms"].get_tags() == 2
-                            ]  # all adsorbate tagged with tag=2!
-                        ]
-                    ),
-                    "adsorption_energy": adslab_result["results"]["energy"]
-                    - slab_energy
-                    - sum(
-                        [
-                            atomic_energies[atom.symbol]
-                            for atom in adslab_result["atoms"][
-                                adslab_result["atoms"].get_tags() == 2
-                            ]  # all adsorbate tagged with tag=2!
-                        ]
-                    ),
+                    "referenced_adsorption_energy": {
+                        "atomic_energies": atomic_energies,
+                        "slab_energy": slab_energy,
+                        "adslab_energy": adslab_result["results"]["energy"],
+                        "gas_reactant_energy": sum(
+                            [
+                                atomic_energies[atom.symbol]
+                                for atom in adslab_result["atoms"][
+                                    adslab_result["atoms"].get_tags() == 2
+                                ]  # all adsorbate tagged with tag=2!
+                            ]
+                        ),
+                        "adsorption_energy": adslab_result["results"]["energy"]
+                        - slab_energy
+                        - sum(
+                            [
+                                atomic_energies[atom.symbol]
+                                for atom in adslab_result["atoms"][
+                                    adslab_result["atoms"].get_tags() == 2
+                                ]  # all adsorbate tagged with tag=2!
+                            ]
+                        ),
+                    }
                 }
             },
         )
@@ -283,17 +451,54 @@ def reference_adslab_energies(
     ]
 
 
-def molecule_pbc(*args, **molecule_kwargs):
+def molecule_pbc(*args: Any, **molecule_kwargs: Any) -> Atoms:
+    """
+    Create a molecule with periodic boundary conditions.
+
+    Parameters
+    ----------
+    *args : Any
+        Positional arguments for the molecule function.
+    **molecule_kwargs : Any
+        Keyword arguments for the molecule function.
+
+    Returns
+    -------
+    Atoms
+        Atomic structure with periodic boundary conditions.
+    """
     atoms = molecule(*args, **molecule_kwargs)
     atoms.pbc = True
     return atoms
 
 
+def generate_molecule_reference_results(relax_job: Job) -> MoleculeResults:
+    """
+    Generate reference results for molecules.
+
+    Parameters
+    ----------
+    relax_job : Job
+        Job for relaxing molecular structures.
+
+    Returns
+    -------
+    MoleculeResults
+        Dictionary of reference results for molecules.
+    """
+    return {
+        "N2": relax_job(molecule_pbc("N2", vacuum=10), relax_cell=False),
+        "CO": relax_job(molecule_pbc("CO", vacuum=10), relax_cell=False),
+        "H2": relax_job(molecule_pbc("H2", vacuum=10), relax_cell=False),
+        "H2O": relax_job(molecule_pbc("H2O", vacuum=10), relax_cell=False),
+    }
+
+
 @flow
-def standard_ocp_dataset_enumeration(
+def bulk_to_surfaces_to_adsorbml(
     bulk_atoms: Atoms,
-    adsorbates_kwargs: dict[str, Any],
-    multiple_adsorbate_slab_config_kwargs: dict[str, Any],
+    adsorbates_kwargs: AdsorbatesKwargs,
+    multiple_adsorbate_slab_config_kwargs: MultipleAdsorbateSlabConfigKwargs,
     ml_relax_job: Job,
     slab_validate_job: Job,
     adslab_validate_job: Job,
@@ -305,7 +510,46 @@ def standard_ocp_dataset_enumeration(
     num_to_validate_with_DFT: int = 0,
     reference_ml_energies_to_gas_phase: bool = True,
     relax_bulk: bool = True,
-):
+) -> list[dict[str, Any]]:
+    """
+    Run a pipeline from bulk atoms to adsorbate-slab configurations using machine learning.
+
+    Parameters
+    ----------
+    bulk_atoms : Atoms
+        The bulk atomic structure.
+    adsorbates_kwargs : AdsorbatesKwargs
+        Keyword arguments for generating adsorbate configurations.
+    multiple_adsorbate_slab_config_kwargs : MultipleAdsorbateSlabConfigKwargs
+        Keyword arguments for generating multiple adsorbate-slab configurations.
+    ml_relax_job : Job
+        Job for relaxing slab and adsorbate-slab configurations using ML.
+    slab_validate_job : Job
+        Job for validating the slab structure.
+    adslab_validate_job : Job
+        Job for validating the adsorbate-slab structures.
+    gas_validate_job : Job
+        Job for validating gas phase structures.
+    bulk_relax_job : Job | None
+        Job for relaxing the bulk structure, by default None.
+    job_params : dict[str, dict[str, Any]], optional
+        Parameters for customizing jobs, by default None.
+    job_decorators : dict[str, dict[str, Any]], optional
+        Decorators for customizing jobs, by default None.
+    max_miller : int, optional
+        Maximum Miller index, by default 1.
+    num_to_validate_with_DFT : int, optional
+        Number of top configurations to validate with DFT, by default 0.
+    reference_ml_energies_to_gas_phase : bool, optional
+        Whether to reference ML energies to gas phase, by default True.
+    relax_bulk : bool, optional
+        Whether to relax the bulk structure, by default True.
+
+    Returns
+    -------
+    list[dict[str, Any]]
+        List of dictionaries containing the results of the pipeline for each slab.
+    """
     (
         bulk_relax_job_,
         ml_slab_adslab_relax_job_,
@@ -337,25 +581,14 @@ def standard_ocp_dataset_enumeration(
     slabs = ocp_surface_generator(bulk_atoms=bulk_atoms, max_miller=max_miller)
 
     if reference_ml_energies_to_gas_phase:
-        ml_gas_references = {
-            "N2": ml_slab_adslab_relax_job_(
-                molecule_pbc("N2", vacuum=10), relax_cell=False
-            ),
-            "CO": ml_slab_adslab_relax_job_(
-                molecule_pbc("CO", vacuum=10), relax_cell=False
-            ),
-            "H2": ml_slab_adslab_relax_job_(
-                molecule_pbc("H2", vacuum=10), relax_cell=False
-            ),
-            "H2O": ml_slab_adslab_relax_job_(
-                molecule_pbc("H2O", vacuum=10), relax_cell=False
-            ),
-        }
+        molecule_results = generate_molecule_reference_results(
+            ml_slab_adslab_relax_job_
+        )
     else:
-        ml_gas_references = None
+        molecule_results = None
 
     @flow
-    def adsorbML_each_surface(slabs, **kwargs):
+    def adsorbML_each_surface(slabs: list[Slab], **kwargs: Any) -> list[dict[str, Any]]:
         return [
             # We strip the decorator here so it's a bunch of jobs, not subflows. Helpful for prefect!
             strip_decorator(adsorb_ml_pipeline)(slab=slab, **kwargs)
@@ -371,6 +604,6 @@ def adsorbML_each_surface(slabs, **kwargs):
         adslab_validate_job=adslab_validate_job_,
         gas_validate_job=gas_validate_job_,
         num_to_validate_with_DFT=num_to_validate_with_DFT,
-        ml_gas_references=ml_gas_references,
+        molecule_results=molecule_results,
         reference_ml_energies_to_gas_phase=reference_ml_energies_to_gas_phase,
     )

From 805874cea562b4e87fc48a8f9a1b17c38fb77ea1 Mon Sep 17 00:00:00 2001
From: Zack Ulissi <zulissi@meta.com>
Date: Fri, 21 Mar 2025 22:49:44 +0000
Subject: [PATCH 04/19] small fixes

---
 src/quacc/recipes/common/adsorbml.py | 48 +++++++++++++++++++---------
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py
index e415109c1a..5ec21a6394 100644
--- a/src/quacc/recipes/common/adsorbml.py
+++ b/src/quacc/recipes/common/adsorbml.py
@@ -395,8 +395,9 @@ def reference_adslab_energies(
         List of adsorbate-slab results with referenced energies.
     """
     adslab_results = copy.deepcopy(adslab_results)
-    if atomic_energies is None and molecule_results is not None:
-        atomic_energies = {
+    if atomic_energies is None:
+        if molecule_results is not None:
+            atomic_energies = {
             "H": molecule_results["H2"]["results"]["energy"] / 2,
             "N": molecule_results["N2"]["results"]["energy"] / 2,
             "O": (
@@ -409,10 +410,11 @@ def reference_adslab_energies(
                 - molecule_results["H2"]["results"]["energy"]
             ),
         }
-    else:
-        raise Exception(
+        else:
+            raise Exception(
             "Missing atomic energies and gas phase energies; unable to continue!"
         )
+        
 
     slab_energy = slab_result["results"]["energy"]
 
@@ -503,18 +505,32 @@ def bulk_to_surfaces_to_adsorbml(
     slab_validate_job: Job,
     adslab_validate_job: Job,
     gas_validate_job: Job,
-    bulk_relax_job: Job | None,
+    max_miller: int = 1,
+    bulk_relax_job: Job | None = None,
     job_params: dict[str, dict[str, Any]] | None = None,
     job_decorators: dict[str, dict[str, Any]] | None = None,
-    max_miller: int = 1,
     num_to_validate_with_DFT: int = 0,
     reference_ml_energies_to_gas_phase: bool = True,
     relax_bulk: bool = True,
+    atomic_reference_energies: dict[str, float] | None = None,
 ) -> list[dict[str, Any]]:
     """
-    Run a pipeline from bulk atoms to adsorbate-slab configurations using machine learning.
-
-    Parameters
+    Run a pipeline from bulk atoms to adsorbate-slab configurations using machine learning!
+    For full details, see the AdsorbML paper (https://arxiv.org/abs/2211.16486, 
+                                     https://www.nature.com/articles/s41524-023-01121-5).
+
+    1. Relax bulk structure if desired
+    2. Generate surface slabs from bulk atoms
+    3. Generate gas phase reference energies if needed
+
+    For each slab generated in (3):
+        1. Generate trial adsorbate-slab configurations
+        2. Relax slab and adsorbate-slab configurations using ML
+        3. Validate slab and adsorbate-slab configurations (check for anomalies like dissociations))
+        4. Reference the energies to gas phase if needed (eg using a total energy ML model)
+        5. Optionally validate top K configurations with DFT single-points or relaxations
+
+   Parameters
     ----------
     bulk_atoms : Atoms
         The bulk atomic structure.
@@ -530,14 +546,14 @@ def bulk_to_surfaces_to_adsorbml(
         Job for validating the adsorbate-slab structures.
     gas_validate_job : Job
         Job for validating gas phase structures.
-    bulk_relax_job : Job | None
+    max_miller : int, optional
+        Maximum Miller index, by default 1.
+    bulk_relax_job : Job | None, optional
         Job for relaxing the bulk structure, by default None.
-    job_params : dict[str, dict[str, Any]], optional
+    job_params : dict[str, dict[str, Any]] | None, optional
         Parameters for customizing jobs, by default None.
-    job_decorators : dict[str, dict[str, Any]], optional
+    job_decorators : dict[str, dict[str, Any]] | None, optional
         Decorators for customizing jobs, by default None.
-    max_miller : int, optional
-        Maximum Miller index, by default 1.
     num_to_validate_with_DFT : int, optional
         Number of top configurations to validate with DFT, by default 0.
     reference_ml_energies_to_gas_phase : bool, optional
@@ -576,11 +592,12 @@ def bulk_to_surfaces_to_adsorbml(
     )
 
     if relax_bulk:
+        bulk_atoms
         bulk_atoms = bulk_relax_job_(bulk_atoms, relax_cell=True)["atoms"]
 
     slabs = ocp_surface_generator(bulk_atoms=bulk_atoms, max_miller=max_miller)
 
-    if reference_ml_energies_to_gas_phase:
+    if reference_ml_energies_to_gas_phase and atomic_reference_energies is not None:
         molecule_results = generate_molecule_reference_results(
             ml_slab_adslab_relax_job_
         )
@@ -606,4 +623,5 @@ def adsorbML_each_surface(slabs: list[Slab], **kwargs: Any) -> list[dict[str, An
         num_to_validate_with_DFT=num_to_validate_with_DFT,
         molecule_results=molecule_results,
         reference_ml_energies_to_gas_phase=reference_ml_energies_to_gas_phase,
+        atomic_reference_energies=atomic_reference_energies
     )

From 3c3118e74ed3bc9606c39ca22cf4eed54de2a7f3 Mon Sep 17 00:00:00 2001
From: Zack Ulissi <zulissi@meta.com>
Date: Fri, 21 Mar 2025 23:38:32 +0000
Subject: [PATCH 05/19] small changes to the docstrings

---
 src/quacc/recipes/common/adsorbml.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py
index 5ec21a6394..e1fbd0712c 100644
--- a/src/quacc/recipes/common/adsorbml.py
+++ b/src/quacc/recipes/common/adsorbml.py
@@ -29,9 +29,7 @@ class AdsorbatesKwargs(TypedDict):
     position: list[float]
     orientation: list[float]
 
-class MultipleAdsorbateSlabConfigKwargs(TypedDict):
-    num_adsorbates: int
-    adsorbate_distance: float
+
 
 class MoleculeResults(TypedDict):
     N2: RunSchema
@@ -113,7 +111,7 @@ def __init__(
 def ocp_adslab_generator(
     slab: Slab | Atoms,
     adsorbates_kwargs: list[AdsorbatesKwargs] | None = None,
-    multiple_adsorbate_slab_config_kwargs: MultipleAdsorbateSlabConfigKwargs | None = None,
+    multiple_adsorbate_slab_config_kwargs: dict[str,Any] | None = None,
 ) -> list[Atoms]:
     """
     Generate adsorbate-slab configurations.
@@ -124,7 +122,7 @@ def ocp_adslab_generator(
         The slab structure.
     adsorbates_kwargs : list[AdsorbatesKwargs], optional
         List of keyword arguments for generating adsorbates, by default None.
-    multiple_adsorbate_slab_config_kwargs : MultipleAdsorbateSlabConfigKwargs, optional
+    multiple_adsorbate_slab_config_kwargs : dict[str,Any], optional
         Keyword arguments for generating multiple adsorbate-slab configurations, by default None.
 
     Returns
@@ -160,7 +158,7 @@ def ocp_adslab_generator(
 def find_adslabs_each_slab(
     slabs: list[Slab],
     adsorbates_kwargs: AdsorbatesKwargs,
-    multiple_adsorbate_slab_config_kwargs: MultipleAdsorbateSlabConfigKwargs | None = None,
+    multiple_adsorbate_slab_config_kwargs: dict[str,Any] | None = None,
 ) -> list[dict[str, Any]]:
     """
     Find adsorbate-slab configurations for each slab.
@@ -171,7 +169,7 @@ def find_adslabs_each_slab(
         List of slabs.
     adsorbates_kwargs : AdsorbatesKwargs
         Keyword arguments for generating adsorbates.
-    multiple_adsorbate_slab_config_kwargs : MultipleAdsorbateSlabConfigKwargs, optional
+    multiple_adsorbate_slab_config_kwargs : dict[str,Any], optional
         Keyword arguments for generating multiple adsorbate-slab configurations, by default None.
 
     Returns
@@ -267,7 +265,7 @@ def filter_sort_select_adslabs(
 def adsorb_ml_pipeline(
     slab: Slab,
     adsorbates_kwargs: AdsorbatesKwargs,
-    multiple_adsorbate_slab_config_kwargs: MultipleAdsorbateSlabConfigKwargs,
+    multiple_adsorbate_slab_config_kwargs: dict[str, Any],
     ml_slab_adslab_relax_job: Job,
     slab_validate_job: Job,
     adslab_validate_job: Job,
@@ -286,7 +284,7 @@ def adsorb_ml_pipeline(
         The slab structure to which adsorbates will be added.
     adsorbates_kwargs : AdsorbatesKwargs
         Keyword arguments for generating adsorbate configurations.
-    multiple_adsorbate_slab_config_kwargs : MultipleAdsorbateSlabConfigKwargs
+    multiple_adsorbate_slab_config_kwargs : dict[str, Any]
         Keyword arguments for generating multiple adsorbate-slab configurations.
     ml_slab_adslab_relax_job : Job
         Job for relaxing slab and adsorbate-slab configurations using ML.
@@ -500,7 +498,7 @@ def generate_molecule_reference_results(relax_job: Job) -> MoleculeResults:
 def bulk_to_surfaces_to_adsorbml(
     bulk_atoms: Atoms,
     adsorbates_kwargs: AdsorbatesKwargs,
-    multiple_adsorbate_slab_config_kwargs: MultipleAdsorbateSlabConfigKwargs,
+    multiple_adsorbate_slab_config_kwargs: dict[str, Any],
     ml_relax_job: Job,
     slab_validate_job: Job,
     adslab_validate_job: Job,
@@ -530,13 +528,13 @@ def bulk_to_surfaces_to_adsorbml(
         4. Reference the energies to gas phase if needed (eg using a total energy ML model)
         5. Optionally validate top K configurations with DFT single-points or relaxations
 
-   Parameters
+    Parameters
     ----------
     bulk_atoms : Atoms
         The bulk atomic structure.
     adsorbates_kwargs : AdsorbatesKwargs
         Keyword arguments for generating adsorbate configurations.
-    multiple_adsorbate_slab_config_kwargs : MultipleAdsorbateSlabConfigKwargs
+    multiple_adsorbate_slab_config_kwargs : dict[str, Any]
         Keyword arguments for generating multiple adsorbate-slab configurations.
     ml_relax_job : Job
         Job for relaxing slab and adsorbate-slab configurations using ML.
@@ -560,6 +558,8 @@ def bulk_to_surfaces_to_adsorbml(
         Whether to reference ML energies to gas phase, by default True.
     relax_bulk : bool, optional
         Whether to relax the bulk structure, by default True.
+    atomic_reference_energies : dict[str, float] | None, optional
+        Atomic reference energies for referencing if known ahead of time, by default None.
 
     Returns
     -------

From f4b653f386eb3280d1072055ee0dc5867bbe3c34 Mon Sep 17 00:00:00 2001
From: Zack Ulissi <zulissi@meta.com>
Date: Wed, 26 Mar 2025 00:27:34 +0000
Subject: [PATCH 06/19] small fixes, and move slab relaxation first

---
 src/quacc/recipes/common/adsorbml.py | 83 +++++++++++++++++++---------
 1 file changed, 56 insertions(+), 27 deletions(-)

diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py
index e1fbd0712c..f564b6e1b2 100644
--- a/src/quacc/recipes/common/adsorbml.py
+++ b/src/quacc/recipes/common/adsorbml.py
@@ -20,7 +20,7 @@
 from quacc.wflow_tools.customizers import customize_funcs, strip_decorator
 
 if TYPE_CHECKING:
-    from quacc.types import OptSchema, RunSchema
+    from quacc.types import RunSchema
 
 logger = logging.getLogger(__name__)
 
@@ -70,7 +70,7 @@ class CustomSlab(Slab):
 
     def __init__(
         self,
-        slab_atoms: Atoms,
+        atoms: Atoms,
         bulk: Atoms | None = None,
         millers: tuple | None = None,
         shift: float | None = None,
@@ -82,7 +82,7 @@ def __init__(
 
         Parameters
         ----------
-        slab_atoms : Atoms
+        atoms : Atoms
             The slab atomic structure.
         bulk : Atoms, optional
             The bulk atomic structure, by default None.
@@ -96,15 +96,15 @@ def __init__(
             Minimum a and b lattice parameters, by default 0.8.
         """
         self.bulk = bulk
-        self.slab_atoms = slab_atoms
+        self.atoms = atoms
         self.millers = millers
         self.shift = shift
         self.top = top
 
-        assert np.linalg.norm(self.slab_atoms.cell[0]) >= min_ab, "Slab not tiled"
-        assert np.linalg.norm(self.slab_atoms.cell[1]) >= min_ab, "Slab not tiled"
+        assert np.linalg.norm(self.atoms.cell[0]) >= min_ab, f"Slab not tiled, you need to repeat it to at least {min_ab}"
+        assert np.linalg.norm(self.atoms.cell[1]) >= min_ab, f"Slab not tiled, you need to repeat it to at least {min_ab}"
         assert self.has_surface_tagged(), "Slab not tagged"
-        assert len(self.slab_atoms.constraints) > 0, "Sub-surface atoms not constrained"
+        assert len(self.atoms.constraints) > 0, "Sub-surface atoms not constrained"
 
 
 @job
@@ -136,9 +136,9 @@ def ocp_adslab_generator(
 
     if isinstance(slab, Atoms):
         try:
-            slab = CustomSlab(slab_atoms=slab)
+            slab = CustomSlab(atoms=slab)
         except AssertionError:
-            slab = CustomSlab(slab_atoms=tile_and_tag_atoms(slab))
+            slab = CustomSlab(atoms=tile_and_tag_atoms(slab))
             logger.warning(
                 "The slab was not tagged and/or tiled. "
                 "We did the best we could, but you should be careful and check the results!"
@@ -151,6 +151,10 @@ def ocp_adslab_generator(
         copy.deepcopy(slab), adsorbates, **multiple_adsorbate_slab_config_kwargs
     )
 
+    atoms_list = adslabs.atoms_list
+    for atoms in atoms_list:
+        atoms.pbc = True
+
     return adslabs.atoms_list
 
 
@@ -278,6 +282,13 @@ def adsorb_ml_pipeline(
     """
     Run a machine learning-based pipeline for adsorbate-slab systems.
 
+    1. Relax slab using ML
+    2. Generate trial adsorbate-slab configurations for the relaxed slab
+    3. Relax adsorbate-slab configurations using ML
+    4. Validate slab and adsorbate-slab configurations (check for anomalies like dissociations))
+    5. Reference the energies to gas phase if needed (eg using a total energy ML model)
+    6. Optionally validate top K configurations with DFT single-points or relaxations
+
     Parameters
     ----------
     slab : Slab
@@ -309,8 +320,12 @@ def adsorb_ml_pipeline(
         Dictionary containing the slab, ML-relaxed adsorbate-slab configurations,
         detected anomalies, and optionally DFT-validated structures.
     """
+
+    slab.atoms.pbc=True
+    ml_relaxed_slab_result = ml_slab_adslab_relax_job(slab.atoms)
+
     unrelaxed_adslab_configurations = ocp_adslab_generator(
-        slab, adsorbates_kwargs, multiple_adsorbate_slab_config_kwargs
+        ml_relaxed_slab_result["atoms"], adsorbates_kwargs, multiple_adsorbate_slab_config_kwargs
     )
 
     ml_relaxed_configurations = [
@@ -318,7 +333,7 @@ def adsorb_ml_pipeline(
         for adslab_configuration in unrelaxed_adslab_configurations
     ]
 
-    ml_relaxed_slab_result = ml_slab_adslab_relax_job(slab.atoms)
+
 
     if reference_ml_energies_to_gas_phase:
         if atomic_reference_energies is None and molecule_results is None:
@@ -352,18 +367,32 @@ def adsorb_ml_pipeline(
             "adslab_anomalies": adslab_anomalies_list,
         }
     else:
+        dft_validated_adslabs = [
+                    adslab_validate_job(top_candidates[i]["atoms"], relax_cell=False)
+                    for i in range(num_to_validate_with_DFT)
+                ]
+
+        dft_validated_slab = slab_validate_job(slab.atoms, relax_cell=False)
+
+        if reference_ml_energies_to_gas_phase:
+            if atomic_reference_energies is None and molecule_results is None:
+                molecule_results = generate_molecule_reference_results(
+                    gas_validate_job
+                )
+
+            dft_validated_adslabs = reference_adslab_energies(
+                dft_validated_adslabs,
+                dft_validated_slab,
+                atomic_energies=atomic_reference_energies,
+                molecule_results=molecule_results,
+            )
+
         return {
             "slab": slab,
             "adslab_ml_relaxed_configurations": top_candidates,
             "adslab_anomalies": adslab_anomalies_list,
-            "validated_structures": {
-                "validated_adslabs": [
-                    adslab_validate_job(top_candidates[i]["atoms"])
-                    for i in range(num_to_validate_with_DFT)
-                ],
-                "slab_validated": slab_validate_job(slab.atoms, relax_cell=False),
-            },
-        }
+            "validated_structures": {"slab": dft_validated_slab, "adslabs": dft_validated_adslabs}}
+
 
 
 @job
@@ -412,7 +441,7 @@ def reference_adslab_energies(
             raise Exception(
             "Missing atomic energies and gas phase energies; unable to continue!"
         )
-        
+
 
     slab_energy = slab_result["results"]["energy"]
 
@@ -514,7 +543,7 @@ def bulk_to_surfaces_to_adsorbml(
 ) -> list[dict[str, Any]]:
     """
     Run a pipeline from bulk atoms to adsorbate-slab configurations using machine learning!
-    For full details, see the AdsorbML paper (https://arxiv.org/abs/2211.16486, 
+    For full details, see the AdsorbML paper (https://arxiv.org/abs/2211.16486,
                                      https://www.nature.com/articles/s41524-023-01121-5).
 
     1. Relax bulk structure if desired
@@ -522,11 +551,12 @@ def bulk_to_surfaces_to_adsorbml(
     3. Generate gas phase reference energies if needed
 
     For each slab generated in (3):
-        1. Generate trial adsorbate-slab configurations
-        2. Relax slab and adsorbate-slab configurations using ML
-        3. Validate slab and adsorbate-slab configurations (check for anomalies like dissociations))
-        4. Reference the energies to gas phase if needed (eg using a total energy ML model)
-        5. Optionally validate top K configurations with DFT single-points or relaxations
+        1. Relax slab using ML
+        2. Generate trial adsorbate-slab configurations for the relaxed slab
+        3. Relax adsorbate-slab configurations using ML
+        4. Validate slab and adsorbate-slab configurations (check for anomalies like dissociations))
+        5. Reference the energies to gas phase if needed (eg using a total energy ML model)
+        6. Optionally validate top K configurations with DFT single-points or relaxations
 
     Parameters
     ----------
@@ -592,7 +622,6 @@ def bulk_to_surfaces_to_adsorbml(
     )
 
     if relax_bulk:
-        bulk_atoms
         bulk_atoms = bulk_relax_job_(bulk_atoms, relax_cell=True)["atoms"]
 
     slabs = ocp_surface_generator(bulk_atoms=bulk_atoms, max_miller=max_miller)

From 04cb215c6e232a93774f4c0d7e52377f023dbd85 Mon Sep 17 00:00:00 2001
From: Zack Ulissi <zulissi@meta.com>
Date: Wed, 26 Mar 2025 14:15:42 +0000
Subject: [PATCH 07/19] many fixes

---
 pyproject.toml                                |   2 +-
 src/quacc/recipes/common/adsorbml.py          | 124 ++++------
 src/quacc/recipes/mlp/adsorbml.py             | 234 ++++++++++++++++++
 src/quacc/types.py                            |  33 +++
 .../core/recipes/mlp_recipes/test_adsorbml.py | 146 +++++++++++
 .../recipes/mlp_recipes/test_core_recipes.py  |   6 +-
 .../mlp_recipes/test_elastic_recipes.py       |   2 +-
 tests/requirements-mlp1.txt                   |   1 +
 tests/requirements-mlp2.txt                   |   1 +
 9 files changed, 462 insertions(+), 87 deletions(-)
 create mode 100644 src/quacc/recipes/mlp/adsorbml.py
 create mode 100644 tests/core/recipes/mlp_recipes/test_adsorbml.py

diff --git a/pyproject.toml b/pyproject.toml
index 91b2f348cf..c16566fd01 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,7 +50,7 @@ mlp1 = ["chgnet>=0.3.3", "torch-dftd>=0.4.0", "sevenn>=0.10.1",  "orb-models>=0.
 mlp2 = ["mace-torch>=0.3.3", "matgl>=1.1.2"]
 mp = ["atomate2>=0.0.14"]
 newtonnet = ["newtonnet>=1.1"]
-ocp = ["fairchem-data-oc>=0.2.0"]
+ocdata = ["fairchem-data-oc>=0.2.0"]
 parsl = ["parsl[monitoring]>=2024.5.27; platform_system!='Windows'"]
 phonons = ["phonopy>=2.20.0", "seekpath>=2.1.0"]
 prefect = ["prefect[dask]>=2.19.0", "dask-jobqueue>=0.8.2"]
diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py
index f564b6e1b2..c98dd92544 100644
--- a/src/quacc/recipes/common/adsorbml.py
+++ b/src/quacc/recipes/common/adsorbml.py
@@ -2,7 +2,7 @@
 
 import copy
 import logging
-from typing import TYPE_CHECKING, Any, Literal, TypedDict
+from typing import TYPE_CHECKING, Any, Literal
 
 import numpy as np
 from ase.atoms import Atoms
@@ -17,31 +17,22 @@
 
 from quacc import Job, flow, job
 from quacc.utils.dicts import recursive_dict_merge
-from quacc.wflow_tools.customizers import customize_funcs, strip_decorator
+from quacc.wflow_tools.customizers import strip_decorator
 
 if TYPE_CHECKING:
-    from quacc.types import RunSchema
+    from quacc.types import (
+        AdsorbMLSchema,
+        AtomicReferenceEnergies,
+        MoleculeReferenceResults,
+        OptSchema,
+        RunSchema,
+    )
 
 logger = logging.getLogger(__name__)
 
-class AdsorbatesKwargs(TypedDict):
-    adsorbate_type: str
-    position: list[float]
-    orientation: list[float]
-
 
 
-class MoleculeResults(TypedDict):
-    N2: RunSchema
-    CO: RunSchema
-    H2: RunSchema
-    H2O: RunSchema
 
-class AtomicReferenceEnergies(TypedDict):
-    H: float
-    N: float
-    O: float
-    C: float
 
 @job
 def ocp_surface_generator(bulk_atoms: Atoms, max_miller: int = 1) -> list[Slab]:
@@ -110,7 +101,7 @@ def __init__(
 @job
 def ocp_adslab_generator(
     slab: Slab | Atoms,
-    adsorbates_kwargs: list[AdsorbatesKwargs] | None = None,
+    adsorbates_kwargs: list[dict[str,Any]] | None = None,
     multiple_adsorbate_slab_config_kwargs: dict[str,Any] | None = None,
 ) -> list[Atoms]:
     """
@@ -120,7 +111,7 @@ def ocp_adslab_generator(
     ----------
     slab : Slab | Atoms
         The slab structure.
-    adsorbates_kwargs : list[AdsorbatesKwargs], optional
+    adsorbates_kwargs : list[dict[str,Any]], optional
         List of keyword arguments for generating adsorbates, by default None.
     multiple_adsorbate_slab_config_kwargs : dict[str,Any], optional
         Keyword arguments for generating multiple adsorbate-slab configurations, by default None.
@@ -161,9 +152,9 @@ def ocp_adslab_generator(
 @flow
 def find_adslabs_each_slab(
     slabs: list[Slab],
-    adsorbates_kwargs: AdsorbatesKwargs,
+    adsorbates_kwargs: dict[str,Any],
     multiple_adsorbate_slab_config_kwargs: dict[str,Any] | None = None,
-) -> list[dict[str, Any]]:
+) -> list[dict[str, Slab | list[Atoms]]]:
     """
     Find adsorbate-slab configurations for each slab.
 
@@ -234,21 +225,21 @@ def detect_anomaly(
 
 @job
 def filter_sort_select_adslabs(
-    adslab_results: list[RunSchema], adslab_anomalies_list: list[list[str]]
-) -> list[RunSchema]:
+    adslab_results: list[OptSchema], adslab_anomalies_list: list[list[str]]
+) -> list[OptSchema]:
     """
     Filter, sort, and select adsorbate-slab configurations based on anomalies and energy.
 
     Parameters
     ----------
-    adslab_results : list[RunSchema]
+    adslab_results : list[OptSchema]
         List of adsorbate-slab results.
     adslab_anomalies_list : list[list[str]]
         List of detected anomalies for each adsorbate-slab configuration.
 
     Returns
     -------
-    list[RunSchema]
+    list[OptSchema]
         Sorted list of adsorbate-slab configurations without anomalies.
     """
     for adslab_result, adslab_anomalies in zip(
@@ -268,7 +259,7 @@ def filter_sort_select_adslabs(
 @flow
 def adsorb_ml_pipeline(
     slab: Slab,
-    adsorbates_kwargs: AdsorbatesKwargs,
+    adsorbates_kwargs: dict[str,Any],
     multiple_adsorbate_slab_config_kwargs: dict[str, Any],
     ml_slab_adslab_relax_job: Job,
     slab_validate_job: Job,
@@ -276,9 +267,9 @@ def adsorb_ml_pipeline(
     gas_validate_job: Job,
     num_to_validate_with_DFT: int = 0,
     reference_ml_energies_to_gas_phase: bool = False,
-    molecule_results: MoleculeResults | None = None,
+    molecule_results: MoleculeReferenceResults | None = None,
     atomic_reference_energies: AtomicReferenceEnergies | None = None,
-) -> dict[str, Any]:
+) -> AdsorbMLSchema:
     """
     Run a machine learning-based pipeline for adsorbate-slab systems.
 
@@ -293,7 +284,7 @@ def adsorb_ml_pipeline(
     ----------
     slab : Slab
         The slab structure to which adsorbates will be added.
-    adsorbates_kwargs : AdsorbatesKwargs
+    adsorbates_kwargs : dict[str,Any]
         Keyword arguments for generating adsorbate configurations.
     multiple_adsorbate_slab_config_kwargs : dict[str, Any]
         Keyword arguments for generating multiple adsorbate-slab configurations.
@@ -309,7 +300,7 @@ def adsorb_ml_pipeline(
         Number of top configurations to validate with DFT, by default 0.
     reference_ml_energies_to_gas_phase : bool, optional
         Whether to reference ML energies to gas phase, by default False.
-    molecule_results : MoleculeResults, optional
+    molecule_results : MoleculeReferenceResults, optional
         Precomputed molecule results for referencing, by default None.
     atomic_reference_energies : AtomicReferenceEnergies, optional
         Atomic reference energies for referencing, by default None.
@@ -333,8 +324,6 @@ def adsorb_ml_pipeline(
         for adslab_configuration in unrelaxed_adslab_configurations
     ]
 
-
-
     if reference_ml_energies_to_gas_phase:
         if atomic_reference_energies is None and molecule_results is None:
             molecule_results = generate_molecule_reference_results(
@@ -362,8 +351,8 @@ def adsorb_ml_pipeline(
 
     if num_to_validate_with_DFT == 0:
         return {
-            "slab": slab,
-            "adslab_ml_relaxed_configurations": top_candidates,
+            "slab": slab.get_metadata_dict(),
+            "adslabs": top_candidates,
             "adslab_anomalies": adslab_anomalies_list,
         }
     else:
@@ -388,8 +377,8 @@ def adsorb_ml_pipeline(
             )
 
         return {
-            "slab": slab,
-            "adslab_ml_relaxed_configurations": top_candidates,
+            "slab": slab.get_metadata_dict(),
+            "adslabs": top_candidates,
             "adslab_anomalies": adslab_anomalies_list,
             "validated_structures": {"slab": dft_validated_slab, "adslabs": dft_validated_adslabs}}
 
@@ -397,11 +386,11 @@ def adsorb_ml_pipeline(
 
 @job
 def reference_adslab_energies(
-    adslab_results: list[dict[str, Any]],
+    adslab_results: list[OptSchema],
     slab_result: RunSchema,
     atomic_energies: AtomicReferenceEnergies | None,
-    molecule_results: MoleculeResults | None,
-) -> list[dict[str, Any]]:
+    molecule_results: MoleculeReferenceResults | None,
+) -> list[OptSchema]:
     """
     Reference adsorbate-slab energies to atomic and slab energies.
 
@@ -413,7 +402,7 @@ def reference_adslab_energies(
         Result of the slab calculation.
     atomic_energies : AtomicReferenceEnergies | None
         Dictionary of atomic energies.
-    molecule_results : MoleculeResults | None
+    molecule_results : MoleculeReferenceResults | None
         Dictionary of molecule results.
 
     Returns
@@ -501,7 +490,7 @@ def molecule_pbc(*args: Any, **molecule_kwargs: Any) -> Atoms:
     return atoms
 
 
-def generate_molecule_reference_results(relax_job: Job) -> MoleculeResults:
+def generate_molecule_reference_results(relax_job: Job) -> MoleculeReferenceResults:
     """
     Generate reference results for molecules.
 
@@ -512,7 +501,7 @@ def generate_molecule_reference_results(relax_job: Job) -> MoleculeResults:
 
     Returns
     -------
-    MoleculeResults
+    MoleculeReferenceResults
         Dictionary of reference results for molecules.
     """
     return {
@@ -526,7 +515,7 @@ def generate_molecule_reference_results(relax_job: Job) -> MoleculeResults:
 @flow
 def bulk_to_surfaces_to_adsorbml(
     bulk_atoms: Atoms,
-    adsorbates_kwargs: AdsorbatesKwargs,
+    adsorbates_kwargs: dict[str,Any],
     multiple_adsorbate_slab_config_kwargs: dict[str, Any],
     ml_relax_job: Job,
     slab_validate_job: Job,
@@ -534,8 +523,7 @@ def bulk_to_surfaces_to_adsorbml(
     gas_validate_job: Job,
     max_miller: int = 1,
     bulk_relax_job: Job | None = None,
-    job_params: dict[str, dict[str, Any]] | None = None,
-    job_decorators: dict[str, dict[str, Any]] | None = None,
+
     num_to_validate_with_DFT: int = 0,
     reference_ml_energies_to_gas_phase: bool = True,
     relax_bulk: bool = True,
@@ -578,10 +566,6 @@ def bulk_to_surfaces_to_adsorbml(
         Maximum Miller index, by default 1.
     bulk_relax_job : Job | None, optional
         Job for relaxing the bulk structure, by default None.
-    job_params : dict[str, dict[str, Any]] | None, optional
-        Parameters for customizing jobs, by default None.
-    job_decorators : dict[str, dict[str, Any]] | None, optional
-        Decorators for customizing jobs, by default None.
     num_to_validate_with_DFT : int, optional
         Number of top configurations to validate with DFT, by default 0.
     reference_ml_energies_to_gas_phase : bool, optional
@@ -593,42 +577,18 @@ def bulk_to_surfaces_to_adsorbml(
 
     Returns
     -------
-    list[dict[str, Any]]
-        List of dictionaries containing the results of the pipeline for each slab.
+    list[AdsorbMLSchema]
+        List of AdsorbML results for each slab
     """
-    (
-        bulk_relax_job_,
-        ml_slab_adslab_relax_job_,
-        slab_validate_job_,
-        adslab_validate_job_,
-        gas_validate_job_,
-    ) = customize_funcs(
-        [
-            "bulk_relax_job",
-            "ml_slab_adslab_relax_job",
-            "slab_validate_job",
-            "adslab_validate_job",
-            "gas_validate_job",
-        ],
-        [
-            bulk_relax_job,  # type: ignore
-            ml_relax_job,
-            slab_validate_job,
-            adslab_validate_job,
-            gas_validate_job,
-        ],
-        param_swaps=job_params,
-        decorators=job_decorators,  # type: ignore
-    )
 
     if relax_bulk:
-        bulk_atoms = bulk_relax_job_(bulk_atoms, relax_cell=True)["atoms"]
+        bulk_atoms = bulk_relax_job(bulk_atoms, relax_cell=True)["atoms"]
 
     slabs = ocp_surface_generator(bulk_atoms=bulk_atoms, max_miller=max_miller)
 
     if reference_ml_energies_to_gas_phase and atomic_reference_energies is not None:
         molecule_results = generate_molecule_reference_results(
-            ml_slab_adslab_relax_job_
+            ml_relax_job
         )
     else:
         molecule_results = None
@@ -645,10 +605,10 @@ def adsorbML_each_surface(slabs: list[Slab], **kwargs: Any) -> list[dict[str, An
         slabs=slabs,
         adsorbates_kwargs=adsorbates_kwargs,
         multiple_adsorbate_slab_config_kwargs=multiple_adsorbate_slab_config_kwargs,
-        ml_slab_adslab_relax_job=ml_slab_adslab_relax_job_,
-        slab_validate_job=slab_validate_job_,
-        adslab_validate_job=adslab_validate_job_,
-        gas_validate_job=gas_validate_job_,
+        ml_slab_adslab_relax_job=ml_relax_job,
+        slab_validate_job=slab_validate_job,
+        adslab_validate_job=adslab_validate_job,
+        gas_validate_job=gas_validate_job,
         num_to_validate_with_DFT=num_to_validate_with_DFT,
         molecule_results=molecule_results,
         reference_ml_energies_to_gas_phase=reference_ml_energies_to_gas_phase,
diff --git a/src/quacc/recipes/mlp/adsorbml.py b/src/quacc/recipes/mlp/adsorbml.py
new file mode 100644
index 0000000000..83bf378685
--- /dev/null
+++ b/src/quacc/recipes/mlp/adsorbml.py
@@ -0,0 +1,234 @@
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Any
+
+from quacc import Job, flow
+from quacc.recipes.common.adsorbml import (
+    adsorb_ml_pipeline as adsorb_ml_pipeline_,
+    bulk_to_surfaces_to_adsorbml as bulk_to_surfaces_to_adsorbml_,
+)
+from quacc.wflow_tools.customizers import customize_funcs
+
+if TYPE_CHECKING:
+    from ase.atoms import Atoms
+    from fairchem.data.oc.core.slab import Slab
+
+    from quacc.types import (
+        AdsorbMLSchema,
+        AtomicReferenceEnergies,
+        MoleculeReferenceResults,
+    )
+
+
+
+logger = logging.getLogger(__name__)
+
+
+@flow
+def adsorb_ml_pipeline(
+    slab: Slab,
+    adsorbates_kwargs: dict[str, Any],
+    multiple_adsorbate_slab_config_kwargs: dict[str, Any],
+    ml_slab_adslab_relax_job: Job,
+    slab_validate_job: Job,
+    adslab_validate_job: Job,
+    gas_validate_job: Job,
+    num_to_validate_with_DFT: int = 0,
+    reference_ml_energies_to_gas_phase: bool = False,
+    molecule_results: MoleculeReferenceResults | None = None,
+    atomic_reference_energies: AtomicReferenceEnergies | None = None,
+    job_params: dict[str, dict[str, Any]] | None = None,
+    job_decorators: dict[str, dict[str, Any]] | None = None,
+) -> AdsorbMLSchema:
+    """
+    Run a machine learning-based pipeline for adsorbate-slab systems.
+
+    1. Relax slab using ML
+    2. Generate trial adsorbate-slab configurations for the relaxed slab
+    3. Relax adsorbate-slab configurations using ML
+    4. Validate slab and adsorbate-slab configurations (check for anomalies like dissociations))
+    5. Reference the energies to gas phase if needed (eg using a total energy ML model)
+    6. Optionally validate top K configurations with DFT single-points or relaxations
+
+    Parameters
+    ----------
+    slab : Slab
+        The slab structure to which adsorbates will be added.
+    adsorbates_kwargs : dict[str, Any]
+        Keyword arguments for generating adsorbate configurations.
+    multiple_adsorbate_slab_config_kwargs : dict[str, Any]
+        Keyword arguments for generating multiple adsorbate-slab configurations.
+    ml_slab_adslab_relax_job : Job
+        Job for relaxing slab and adsorbate-slab configurations using ML.
+    slab_validate_job : Job
+        Job for validating the slab structure.
+    adslab_validate_job : Job
+        Job for validating the adsorbate-slab structures.
+    gas_validate_job : Job
+        Job for validating gas phase structures.
+    num_to_validate_with_DFT : int, optional
+        Number of top configurations to validate with DFT, by default 0.
+    reference_ml_energies_to_gas_phase : bool, optional
+        Whether to reference ML energies to gas phase, by default False.
+    molecule_results : MoleculeReferenceResults, optional
+        Precomputed molecule results for referencing, by default None.
+    atomic_reference_energies : AtomicReferenceEnergies, optional
+        Atomic reference energies for referencing, by default None.
+
+    Returns
+    -------
+    dict
+        Dictionary containing the slab, ML-relaxed adsorbate-slab configurations,
+        detected anomalies, and optionally DFT-validated structures.
+    """
+
+    (
+        ml_slab_adslab_relax_job_,
+        slab_validate_job_,
+        adslab_validate_job_,
+        gas_validate_job_,
+    ) = customize_funcs(
+        [
+            "ml_slab_adslab_relax_job",
+            "slab_validate_job",
+            "adslab_validate_job",
+            "gas_validate_job",
+        ],
+        [
+            ml_slab_adslab_relax_job,
+            slab_validate_job,
+            adslab_validate_job,
+            gas_validate_job,
+        ],
+        param_swaps=job_params,
+        decorators=job_decorators,  # type: ignore
+    )
+
+    return adsorb_ml_pipeline_(
+        slab,
+        adsorbates_kwargs,
+        multiple_adsorbate_slab_config_kwargs,
+        ml_slab_adslab_relax_job_,
+        slab_validate_job_,
+        adslab_validate_job_,
+        gas_validate_job_,
+        num_to_validate_with_DFT,
+        reference_ml_energies_to_gas_phase,
+        molecule_results,
+        atomic_reference_energies,
+    )
+
+
+@flow
+def bulk_to_surfaces_to_adsorbml(
+    bulk_atoms: Atoms,
+    adsorbates_kwargs: dict[str,Any],
+    multiple_adsorbate_slab_config_kwargs: dict[str, Any],
+    ml_relax_job: Job,
+    slab_validate_job: Job,
+    adslab_validate_job: Job,
+    gas_validate_job: Job,
+    max_miller: int = 1,
+    bulk_relax_job: Job | None = None,
+    job_params: dict[str, dict[str, Any]] | None = None,
+    job_decorators: dict[str, dict[str, Any]] | None = None,
+    num_to_validate_with_DFT: int = 0,
+    reference_ml_energies_to_gas_phase: bool = True,
+    relax_bulk: bool = True,
+    atomic_reference_energies: dict[str, float] | None = None,
+) -> list[dict[str, Any]]:
+    """
+    Run a pipeline from bulk atoms to adsorbate-slab configurations using machine learning!
+    For full details, see the AdsorbML paper (https://arxiv.org/abs/2211.16486,
+                                     https://www.nature.com/articles/s41524-023-01121-5).
+
+    1. Relax bulk structure if desired
+    2. Generate surface slabs from bulk atoms
+    3. Generate gas phase reference energies if needed
+
+    For each slab generated in (3):
+        1. Relax slab using ML
+        2. Generate trial adsorbate-slab configurations for the relaxed slab
+        3. Relax adsorbate-slab configurations using ML
+        4. Validate slab and adsorbate-slab configurations (check for anomalies like dissociations))
+        5. Reference the energies to gas phase if needed (eg using a total energy ML model)
+        6. Optionally validate top K configurations with DFT single-points or relaxations
+
+    Parameters
+    ----------
+    bulk_atoms : Atoms
+        The bulk atomic structure.
+    adsorbates_kwargs : AdsorbatesKwargs
+        Keyword arguments for generating adsorbate configurations.
+    multiple_adsorbate_slab_config_kwargs : dict[str, Any]
+        Keyword arguments for generating multiple adsorbate-slab configurations.
+    ml_relax_job : Job
+        Job for relaxing slab and adsorbate-slab configurations using ML.
+    slab_validate_job : Job
+        Job for validating the slab structure.
+    adslab_validate_job : Job
+        Job for validating the adsorbate-slab structures.
+    gas_validate_job : Job
+        Job for validating gas phase structures.
+    max_miller : int, optional
+        Maximum Miller index, by default 1.
+    bulk_relax_job : Job | None, optional
+        Job for relaxing the bulk structure, by default None.
+    job_params : dict[str, dict[str, Any]] | None, optional
+        Parameters for customizing jobs, by default None.
+    job_decorators : dict[str, dict[str, Any]] | None, optional
+        Decorators for customizing jobs, by default None.
+    num_to_validate_with_DFT : int, optional
+        Number of top configurations to validate with DFT, by default 0.
+    reference_ml_energies_to_gas_phase : bool, optional
+        Whether to reference ML energies to gas phase, by default True.
+    relax_bulk : bool, optional
+        Whether to relax the bulk structure, by default True.
+    atomic_reference_energies : dict[str, float] | None, optional
+        Atomic reference energies for referencing if known ahead of time, by default None.
+
+    Returns
+    -------
+    list[AdsorbMLSchema]
+        List of AdsorbML results for each slab
+    """
+    (
+        bulk_relax_job_,
+        ml_relax_job_,
+        slab_validate_job_,
+        adslab_validate_job_,
+        gas_validate_job_,
+    ) = customize_funcs(
+        [
+            "bulk_relax_job",
+            "ml_relax_job",
+            "slab_validate_job",
+            "adslab_validate_job",
+            "gas_validate_job",
+        ],
+        [
+            bulk_relax_job,  # type: ignore
+            ml_relax_job,
+            slab_validate_job,
+            adslab_validate_job,
+            gas_validate_job,
+        ],
+        param_swaps=job_params,
+        decorators=job_decorators,  # type: ignore
+    )
+    return bulk_to_surfaces_to_adsorbml_(
+        bulk_atoms,
+        adsorbates_kwargs,
+        multiple_adsorbate_slab_config_kwargs,
+        ml_relax_job_,
+        slab_validate_job_,
+        adslab_validate_job_,
+        gas_validate_job_,
+        max_miller,
+        bulk_relax_job_,
+        num_to_validate_with_DFT,
+        reference_ml_energies_to_gas_phase,
+        relax_bulk,
+        atomic_reference_energies,
+    )
diff --git a/src/quacc/types.py b/src/quacc/types.py
index 335b176bfc..cd06566a6c 100644
--- a/src/quacc/types.py
+++ b/src/quacc/types.py
@@ -539,6 +539,39 @@ class ElasticSchema(TypedDict):
         undeformed_result: RunSchema | OptSchema
         elasticity_doc: ElasticityDoc
 
+    class SlabMetadata(TypedDict):
+        millers: tuple[int, int, int]
+        shift: tuple[float, float, float]
+        vacuum: float
+        surface_area: float
+        volume: float
+
+    class Slab(TypedDict):
+        slab_atomsobject: Atoms
+        slab_metadata: SlabMetadata
+
+    class AdsorbMLSchema(TypedDict):
+        slab: Slab
+        adslabs: list[OptSchema]
+        adslab_anomalies: list[Literal[
+            "adsorbate_dissociated",
+            "adsorbate_desorbed",
+            "surface_changed",
+            "adsorbate_intercalated",]]
+        validated_structures: NotRequired[dict[str, Any]]
+
+    class MoleculeReferenceResults(TypedDict):
+        N2: RunSchema
+        CO: RunSchema
+        H2: RunSchema
+        H2O: RunSchema
+
+    class AtomicReferenceEnergies(TypedDict):
+        H: float
+        N: float
+        O: float
+        C: float
+
     class VibThermoSchema(VibSchema, ThermoSchema):
         """Combined Vibrations and Thermo schema"""
 
diff --git a/tests/core/recipes/mlp_recipes/test_adsorbml.py b/tests/core/recipes/mlp_recipes/test_adsorbml.py
new file mode 100644
index 0000000000..9b16e07cd8
--- /dev/null
+++ b/tests/core/recipes/mlp_recipes/test_adsorbml.py
@@ -0,0 +1,146 @@
+from __future__ import annotations
+
+import numpy as np
+import pytest
+from ase.build import bulk
+from quacc.recipes.mlp.core import relax_job
+
+from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml
+
+torch = pytest.importorskip("torch")
+
+from importlib.util import find_spec
+
+methods = []
+if has_mace := find_spec("mace"):
+    methods.append("mace-mp-0")
+
+if has_matgl := find_spec("matgl"):
+    methods.append("m3gnet")
+
+if has_chgnet := find_spec("chgnet"):
+    methods.append("chgnet")
+
+if has_sevennet := find_spec("sevenn"):
+    methods.append("sevennet")
+
+if has_orb := find_spec("orb_models"):
+    methods.append("orb")
+
+if find_spec("fairchem"):
+    from huggingface_hub.utils._auth import get_token
+
+    if get_token():
+        methods.append("fairchem")
+
+
+def _set_dtype(size, type_="float"):
+    globals()[f"{type_}_th"] = getattr(torch, f"{type_}{size}")
+    globals()[f"{type_}_np"] = getattr(np, f"{type_}{size}")
+    torch.set_default_dtype(getattr(torch, f"float{size}"))
+
+
+@pytest.mark.parametrize("method", methods)
+def test_total_energy_adsorbml(tmp_path, monkeypatch, method):
+    monkeypatch.chdir(tmp_path)
+
+    if method == "mace-mp-0":
+        _set_dtype(64)
+    else:
+        _set_dtype(32)
+
+    if method == "fairchem":
+        calc_kwargs = {
+            "model_name": "EquiformerV2-31M-OMAT24-MP-sAlex",
+            "local_cache": "./fairchem_checkpoint_cache/",
+            "seed": 42,
+        }
+    else:
+        calc_kwargs = {}
+
+    ref_CO_Cu111_adsorption_energy = {
+        "chgnet": -1.5,
+        "m3gnet": -0.5,
+        "mace-mp-0": -0.5,
+        "sevennet": -0.98,
+        "orb": -0.5,
+        "fairchem": -0.5,
+    }
+
+    bulk_atoms = bulk("Cu")
+
+    outputs =  bulk_to_surfaces_to_adsorbml(
+    bulk_atoms,
+    adsorbates_kwargs=[{"adsorbate_smiles_from_db": "*CO"}],
+    multiple_adsorbate_slab_config_kwargs={"num_configurations": 1},
+    ml_relax_job=relax_job,
+    slab_validate_job=relax_job,
+    adslab_validate_job=relax_job,
+    gas_validate_job=relax_job,
+    bulk_relax_job=relax_job,
+    job_params={
+        "all": dict(
+            method=method,
+            **calc_kwargs,
+            opt_params={"max_steps": 500},
+        ),
+    },
+    max_miller=1,
+    num_to_validate_with_DFT=1,
+    reference_ml_energies_to_gas_phase=True,
+    relax_bulk=True,
+)
+    index_111_facet = np.where([result["slab"]["slab_metadata"]['millers'] == (1, 1, 1) for result in outputs])[0][0]
+
+    assert len(outputs) > 0
+    assert outputs[index_111_facet]["adslabs"][0]["results"]["referenced_adsorption_energy"][
+        "adsorption_energy"
+    ]  == pytest.approx(
+        ref_CO_Cu111_adsorption_energy[method], abs=0.1
+    )
+    
+
+
+@pytest.mark.parametrize("fairchem_checkpoint", ["EquiformerV2-31M-S2EF-OC20-All+MD"])
+def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint):
+    monkeypatch.chdir(tmp_path)
+
+    calc_kwargs = {
+        "model_name": fairchem_checkpoint,
+        "local_cache": "./fairchem_checkpoint_cache/",
+        "seed": 42,
+    }
+
+    ref_CO_Cu111_adsorption_energy = {
+        "EquiformerV2-31M-S2EF-OC20-All+MD": -0.5,
+    }
+
+    bulk_atoms = bulk("Cu")
+
+    outputs = bulk_to_surfaces_to_adsorbml(
+        bulk_atoms,
+        adsorbates_kwargs=[{"adsorbate_smiles_from_db": "*CO"}],
+        multiple_adsorbate_slab_config_kwargs={"num_configurations": 1},
+        ml_relax_job=relax_job,
+        slab_validate_job=relax_job,
+        adslab_validate_job=relax_job,
+        gas_validate_job=relax_job,
+        bulk_relax_job=relax_job,
+        job_params={
+            "all": dict(
+                method="fairchem",
+                **calc_kwargs,
+            ),
+        },
+        max_miller=1,
+        num_to_validate_with_DFT=1,
+        reference_ml_energies_to_gas_phase=False,
+        relax_bulk=False,
+    )
+    index_111_facet = np.where([result["slab"]["slab_metadata"]["millers"] == (1, 1, 1) for result in outputs])[0][0]
+
+    assert len(outputs) > 0
+    assert outputs[index_111_facet]["adslabs"][0]["results"]["energy"] == pytest.approx(
+        ref_CO_Cu111_adsorption_energy[fairchem_checkpoint], abs=0.1
+    )
+    
\ No newline at end of file
diff --git a/tests/core/recipes/mlp_recipes/test_core_recipes.py b/tests/core/recipes/mlp_recipes/test_core_recipes.py
index 669c68cb8e..6d9d5b2879 100644
--- a/tests/core/recipes/mlp_recipes/test_core_recipes.py
+++ b/tests/core/recipes/mlp_recipes/test_core_recipes.py
@@ -59,7 +59,7 @@ def test_static_job(tmp_path, monkeypatch, method):
     if method == "fairchem":
         # Note that for this to work, you need HF_TOKEN env variable set!
         calc_kwargs = {
-            "model_name": "EquiformerV2-31M-OMAT24-mp-salex",
+            "model_name": "EquiformerV2-31M-OMAT24-MP-sAlex",
             "local_cache": "./fairchem_checkpoint_cache/",
         }
     else:
@@ -117,7 +117,7 @@ def test_relax_job(tmp_path, monkeypatch, method):
     if method == "fairchem":
         # Note that for this to work, you need HF_TOKEN env variable set!
         calc_kwargs = {
-            "model_name": "EquiformerV2-31M-OMAT24-mp-salex",
+            "model_name": "EquiformerV2-31M-OMAT24-MP-sAlex",
             "local_cache": "./fairchem_checkpoint_cache/",
         }
     else:
@@ -169,7 +169,7 @@ def test_relax_cell_job(tmp_path, monkeypatch, method):
     if method == "fairchem":
         # Note that for this to work, you need HF_TOKEN env variable set!
         calc_kwargs = {
-            "model_name": "EquiformerV2-31M-OMAT24-mp-salex",
+            "model_name": "EquiformerV2-31M-OMAT24-MP-sAlex",
             "local_cache": "./fairchem_checkpoint_cache/",
         }
     else:
diff --git a/tests/core/recipes/mlp_recipes/test_elastic_recipes.py b/tests/core/recipes/mlp_recipes/test_elastic_recipes.py
index a1fb3c32eb..23509fee09 100644
--- a/tests/core/recipes/mlp_recipes/test_elastic_recipes.py
+++ b/tests/core/recipes/mlp_recipes/test_elastic_recipes.py
@@ -50,7 +50,7 @@ def test_elastic_jobs(tmp_path, monkeypatch, method):
 
     if method == "fairchem":
         calc_kwargs = {
-            "model_name": "EquiformerV2-31M-OMAT24-mp-salex",
+            "model_name": "EquiformerV2-31M-OMAT24-MP-sAlex",
             "local_cache": "./fairchem_checkpoint_cache/",
         }
     else:
diff --git a/tests/requirements-mlp1.txt b/tests/requirements-mlp1.txt
index 44dae38751..8ebcb8ab2a 100644
--- a/tests/requirements-mlp1.txt
+++ b/tests/requirements-mlp1.txt
@@ -10,3 +10,4 @@ torch_geometric==2.6.1
 torch_scatter==2.1.2
 torch_sparse==0.6.18
 torch_cluster==1.6.3
+fairchem-data-oc==0.2.1
\ No newline at end of file
diff --git a/tests/requirements-mlp2.txt b/tests/requirements-mlp2.txt
index aa31143880..f12449c333 100644
--- a/tests/requirements-mlp2.txt
+++ b/tests/requirements-mlp2.txt
@@ -1,2 +1,3 @@
 mace-torch==0.3.12
 matgl @ git+https://github.com/materialsvirtuallab/matgl.git
+fairchem-data-oc==0.2.1
\ No newline at end of file

From 7c35be671afa48f74b3bc59acc5d8d0b7604d7f0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 26 Mar 2025 14:24:58 +0000
Subject: [PATCH 08/19] pre-commit auto-fixes

---
 src/quacc/recipes/common/adsorbml.py          | 82 +++++++++----------
 src/quacc/recipes/mlp/adsorbml.py             |  5 +-
 src/quacc/types.py                            | 13 +--
 .../core/recipes/mlp_recipes/test_adsorbml.py | 67 +++++++--------
 tests/requirements-mlp1.txt                   |  2 +-
 tests/requirements-mlp2.txt                   |  2 +-
 6 files changed, 82 insertions(+), 89 deletions(-)

diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py
index c98dd92544..5115686a15 100644
--- a/src/quacc/recipes/common/adsorbml.py
+++ b/src/quacc/recipes/common/adsorbml.py
@@ -31,9 +31,6 @@
 logger = logging.getLogger(__name__)
 
 
-
-
-
 @job
 def ocp_surface_generator(bulk_atoms: Atoms, max_miller: int = 1) -> list[Slab]:
     """
@@ -92,8 +89,12 @@ def __init__(
         self.shift = shift
         self.top = top
 
-        assert np.linalg.norm(self.atoms.cell[0]) >= min_ab, f"Slab not tiled, you need to repeat it to at least {min_ab}"
-        assert np.linalg.norm(self.atoms.cell[1]) >= min_ab, f"Slab not tiled, you need to repeat it to at least {min_ab}"
+        assert np.linalg.norm(self.atoms.cell[0]) >= min_ab, (
+            f"Slab not tiled, you need to repeat it to at least {min_ab}"
+        )
+        assert np.linalg.norm(self.atoms.cell[1]) >= min_ab, (
+            f"Slab not tiled, you need to repeat it to at least {min_ab}"
+        )
         assert self.has_surface_tagged(), "Slab not tagged"
         assert len(self.atoms.constraints) > 0, "Sub-surface atoms not constrained"
 
@@ -101,8 +102,8 @@ def __init__(
 @job
 def ocp_adslab_generator(
     slab: Slab | Atoms,
-    adsorbates_kwargs: list[dict[str,Any]] | None = None,
-    multiple_adsorbate_slab_config_kwargs: dict[str,Any] | None = None,
+    adsorbates_kwargs: list[dict[str, Any]] | None = None,
+    multiple_adsorbate_slab_config_kwargs: dict[str, Any] | None = None,
 ) -> list[Atoms]:
     """
     Generate adsorbate-slab configurations.
@@ -152,8 +153,8 @@ def ocp_adslab_generator(
 @flow
 def find_adslabs_each_slab(
     slabs: list[Slab],
-    adsorbates_kwargs: dict[str,Any],
-    multiple_adsorbate_slab_config_kwargs: dict[str,Any] | None = None,
+    adsorbates_kwargs: dict[str, Any],
+    multiple_adsorbate_slab_config_kwargs: dict[str, Any] | None = None,
 ) -> list[dict[str, Slab | list[Atoms]]]:
     """
     Find adsorbate-slab configurations for each slab.
@@ -259,7 +260,7 @@ def filter_sort_select_adslabs(
 @flow
 def adsorb_ml_pipeline(
     slab: Slab,
-    adsorbates_kwargs: dict[str,Any],
+    adsorbates_kwargs: dict[str, Any],
     multiple_adsorbate_slab_config_kwargs: dict[str, Any],
     ml_slab_adslab_relax_job: Job,
     slab_validate_job: Job,
@@ -312,11 +313,13 @@ def adsorb_ml_pipeline(
         detected anomalies, and optionally DFT-validated structures.
     """
 
-    slab.atoms.pbc=True
+    slab.atoms.pbc = True
     ml_relaxed_slab_result = ml_slab_adslab_relax_job(slab.atoms)
 
     unrelaxed_adslab_configurations = ocp_adslab_generator(
-        ml_relaxed_slab_result["atoms"], adsorbates_kwargs, multiple_adsorbate_slab_config_kwargs
+        ml_relaxed_slab_result["atoms"],
+        adsorbates_kwargs,
+        multiple_adsorbate_slab_config_kwargs,
     )
 
     ml_relaxed_configurations = [
@@ -357,17 +360,15 @@ def adsorb_ml_pipeline(
         }
     else:
         dft_validated_adslabs = [
-                    adslab_validate_job(top_candidates[i]["atoms"], relax_cell=False)
-                    for i in range(num_to_validate_with_DFT)
-                ]
+            adslab_validate_job(top_candidates[i]["atoms"], relax_cell=False)
+            for i in range(num_to_validate_with_DFT)
+        ]
 
         dft_validated_slab = slab_validate_job(slab.atoms, relax_cell=False)
 
         if reference_ml_energies_to_gas_phase:
             if atomic_reference_energies is None and molecule_results is None:
-                molecule_results = generate_molecule_reference_results(
-                    gas_validate_job
-                )
+                molecule_results = generate_molecule_reference_results(gas_validate_job)
 
             dft_validated_adslabs = reference_adslab_energies(
                 dft_validated_adslabs,
@@ -380,8 +381,11 @@ def adsorb_ml_pipeline(
             "slab": slab.get_metadata_dict(),
             "adslabs": top_candidates,
             "adslab_anomalies": adslab_anomalies_list,
-            "validated_structures": {"slab": dft_validated_slab, "adslabs": dft_validated_adslabs}}
-
+            "validated_structures": {
+                "slab": dft_validated_slab,
+                "adslabs": dft_validated_adslabs,
+            },
+        }
 
 
 @job
@@ -414,23 +418,22 @@ def reference_adslab_energies(
     if atomic_energies is None:
         if molecule_results is not None:
             atomic_energies = {
-            "H": molecule_results["H2"]["results"]["energy"] / 2,
-            "N": molecule_results["N2"]["results"]["energy"] / 2,
-            "O": (
-                molecule_results["H2O"]["results"]["energy"]
-                - molecule_results["H2"]["results"]["energy"]
-            ),
-            "C": molecule_results["CO"]["results"]["energy"]
-            - (
-                molecule_results["H2O"]["results"]["energy"]
-                - molecule_results["H2"]["results"]["energy"]
-            ),
-        }
+                "H": molecule_results["H2"]["results"]["energy"] / 2,
+                "N": molecule_results["N2"]["results"]["energy"] / 2,
+                "O": (
+                    molecule_results["H2O"]["results"]["energy"]
+                    - molecule_results["H2"]["results"]["energy"]
+                ),
+                "C": molecule_results["CO"]["results"]["energy"]
+                - (
+                    molecule_results["H2O"]["results"]["energy"]
+                    - molecule_results["H2"]["results"]["energy"]
+                ),
+            }
         else:
             raise Exception(
-            "Missing atomic energies and gas phase energies; unable to continue!"
-        )
-
+                "Missing atomic energies and gas phase energies; unable to continue!"
+            )
 
     slab_energy = slab_result["results"]["energy"]
 
@@ -515,7 +518,7 @@ def generate_molecule_reference_results(relax_job: Job) -> MoleculeReferenceResu
 @flow
 def bulk_to_surfaces_to_adsorbml(
     bulk_atoms: Atoms,
-    adsorbates_kwargs: dict[str,Any],
+    adsorbates_kwargs: dict[str, Any],
     multiple_adsorbate_slab_config_kwargs: dict[str, Any],
     ml_relax_job: Job,
     slab_validate_job: Job,
@@ -523,7 +526,6 @@ def bulk_to_surfaces_to_adsorbml(
     gas_validate_job: Job,
     max_miller: int = 1,
     bulk_relax_job: Job | None = None,
-
     num_to_validate_with_DFT: int = 0,
     reference_ml_energies_to_gas_phase: bool = True,
     relax_bulk: bool = True,
@@ -587,9 +589,7 @@ def bulk_to_surfaces_to_adsorbml(
     slabs = ocp_surface_generator(bulk_atoms=bulk_atoms, max_miller=max_miller)
 
     if reference_ml_energies_to_gas_phase and atomic_reference_energies is not None:
-        molecule_results = generate_molecule_reference_results(
-            ml_relax_job
-        )
+        molecule_results = generate_molecule_reference_results(ml_relax_job)
     else:
         molecule_results = None
 
@@ -612,5 +612,5 @@ def adsorbML_each_surface(slabs: list[Slab], **kwargs: Any) -> list[dict[str, An
         num_to_validate_with_DFT=num_to_validate_with_DFT,
         molecule_results=molecule_results,
         reference_ml_energies_to_gas_phase=reference_ml_energies_to_gas_phase,
-        atomic_reference_energies=atomic_reference_energies
+        atomic_reference_energies=atomic_reference_energies,
     )
diff --git a/src/quacc/recipes/mlp/adsorbml.py b/src/quacc/recipes/mlp/adsorbml.py
index 83bf378685..29a6955c5e 100644
--- a/src/quacc/recipes/mlp/adsorbml.py
+++ b/src/quacc/recipes/mlp/adsorbml.py
@@ -4,8 +4,8 @@
 from typing import TYPE_CHECKING, Any
 
 from quacc import Job, flow
+from quacc.recipes.common.adsorbml import adsorb_ml_pipeline as adsorb_ml_pipeline_
 from quacc.recipes.common.adsorbml import (
-    adsorb_ml_pipeline as adsorb_ml_pipeline_,
     bulk_to_surfaces_to_adsorbml as bulk_to_surfaces_to_adsorbml_,
 )
 from quacc.wflow_tools.customizers import customize_funcs
@@ -21,7 +21,6 @@
     )
 
 
-
 logger = logging.getLogger(__name__)
 
 
@@ -123,7 +122,7 @@ def adsorb_ml_pipeline(
 @flow
 def bulk_to_surfaces_to_adsorbml(
     bulk_atoms: Atoms,
-    adsorbates_kwargs: dict[str,Any],
+    adsorbates_kwargs: dict[str, Any],
     multiple_adsorbate_slab_config_kwargs: dict[str, Any],
     ml_relax_job: Job,
     slab_validate_job: Job,
diff --git a/src/quacc/types.py b/src/quacc/types.py
index cd06566a6c..460519478b 100644
--- a/src/quacc/types.py
+++ b/src/quacc/types.py
@@ -553,11 +553,14 @@ class Slab(TypedDict):
     class AdsorbMLSchema(TypedDict):
         slab: Slab
         adslabs: list[OptSchema]
-        adslab_anomalies: list[Literal[
-            "adsorbate_dissociated",
-            "adsorbate_desorbed",
-            "surface_changed",
-            "adsorbate_intercalated",]]
+        adslab_anomalies: list[
+            Literal[
+                "adsorbate_dissociated",
+                "adsorbate_desorbed",
+                "surface_changed",
+                "adsorbate_intercalated",
+            ]
+        ]
         validated_structures: NotRequired[dict[str, Any]]
 
     class MoleculeReferenceResults(TypedDict):
diff --git a/tests/core/recipes/mlp_recipes/test_adsorbml.py b/tests/core/recipes/mlp_recipes/test_adsorbml.py
index 9b16e07cd8..a1df515c78 100644
--- a/tests/core/recipes/mlp_recipes/test_adsorbml.py
+++ b/tests/core/recipes/mlp_recipes/test_adsorbml.py
@@ -3,9 +3,9 @@
 import numpy as np
 import pytest
 from ase.build import bulk
-from quacc.recipes.mlp.core import relax_job
 
 from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml
+from quacc.recipes.mlp.core import relax_job
 
 torch = pytest.importorskip("torch")
 
@@ -69,36 +69,33 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method):
 
     bulk_atoms = bulk("Cu")
 
-    outputs =  bulk_to_surfaces_to_adsorbml(
-    bulk_atoms,
-    adsorbates_kwargs=[{"adsorbate_smiles_from_db": "*CO"}],
-    multiple_adsorbate_slab_config_kwargs={"num_configurations": 1},
-    ml_relax_job=relax_job,
-    slab_validate_job=relax_job,
-    adslab_validate_job=relax_job,
-    gas_validate_job=relax_job,
-    bulk_relax_job=relax_job,
-    job_params={
-        "all": dict(
-            method=method,
-            **calc_kwargs,
-            opt_params={"max_steps": 500},
-        ),
-    },
-    max_miller=1,
-    num_to_validate_with_DFT=1,
-    reference_ml_energies_to_gas_phase=True,
-    relax_bulk=True,
-)
-    index_111_facet = np.where([result["slab"]["slab_metadata"]['millers'] == (1, 1, 1) for result in outputs])[0][0]
+    outputs = bulk_to_surfaces_to_adsorbml(
+        bulk_atoms,
+        adsorbates_kwargs=[{"adsorbate_smiles_from_db": "*CO"}],
+        multiple_adsorbate_slab_config_kwargs={"num_configurations": 1},
+        ml_relax_job=relax_job,
+        slab_validate_job=relax_job,
+        adslab_validate_job=relax_job,
+        gas_validate_job=relax_job,
+        bulk_relax_job=relax_job,
+        job_params={
+            "all": dict(method=method, **calc_kwargs, opt_params={"max_steps": 500})
+        },
+        max_miller=1,
+        num_to_validate_with_DFT=1,
+        reference_ml_energies_to_gas_phase=True,
+        relax_bulk=True,
+    )
+    index_111_facet = np.where(
+        [result["slab"]["slab_metadata"]["millers"] == (1, 1, 1) for result in outputs]
+    )[0][0]
 
     assert len(outputs) > 0
-    assert outputs[index_111_facet]["adslabs"][0]["results"]["referenced_adsorption_energy"][
-        "adsorption_energy"
-    ]  == pytest.approx(
+    assert outputs[index_111_facet]["adslabs"][0]["results"][
+        "referenced_adsorption_energy"
+    ]["adsorption_energy"] == pytest.approx(
         ref_CO_Cu111_adsorption_energy[method], abs=0.1
     )
-    
 
 
 @pytest.mark.parametrize("fairchem_checkpoint", ["EquiformerV2-31M-S2EF-OC20-All+MD"])
@@ -111,9 +108,7 @@ def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint):
         "seed": 42,
     }
 
-    ref_CO_Cu111_adsorption_energy = {
-        "EquiformerV2-31M-S2EF-OC20-All+MD": -0.5,
-    }
+    ref_CO_Cu111_adsorption_energy = {"EquiformerV2-31M-S2EF-OC20-All+MD": -0.5}
 
     bulk_atoms = bulk("Cu")
 
@@ -126,21 +121,17 @@ def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint):
         adslab_validate_job=relax_job,
         gas_validate_job=relax_job,
         bulk_relax_job=relax_job,
-        job_params={
-            "all": dict(
-                method="fairchem",
-                **calc_kwargs,
-            ),
-        },
+        job_params={"all": dict(method="fairchem", **calc_kwargs)},
         max_miller=1,
         num_to_validate_with_DFT=1,
         reference_ml_energies_to_gas_phase=False,
         relax_bulk=False,
     )
-    index_111_facet = np.where([result["slab"]["slab_metadata"]["millers"] == (1, 1, 1) for result in outputs])[0][0]
+    index_111_facet = np.where(
+        [result["slab"]["slab_metadata"]["millers"] == (1, 1, 1) for result in outputs]
+    )[0][0]
 
     assert len(outputs) > 0
     assert outputs[index_111_facet]["adslabs"][0]["results"]["energy"] == pytest.approx(
         ref_CO_Cu111_adsorption_energy[fairchem_checkpoint], abs=0.1
     )
-    
\ No newline at end of file
diff --git a/tests/requirements-mlp1.txt b/tests/requirements-mlp1.txt
index 8ebcb8ab2a..108e0fe762 100644
--- a/tests/requirements-mlp1.txt
+++ b/tests/requirements-mlp1.txt
@@ -10,4 +10,4 @@ torch_geometric==2.6.1
 torch_scatter==2.1.2
 torch_sparse==0.6.18
 torch_cluster==1.6.3
-fairchem-data-oc==0.2.1
\ No newline at end of file
+fairchem-data-oc==0.2.1
diff --git a/tests/requirements-mlp2.txt b/tests/requirements-mlp2.txt
index f12449c333..0623938ec3 100644
--- a/tests/requirements-mlp2.txt
+++ b/tests/requirements-mlp2.txt
@@ -1,3 +1,3 @@
 mace-torch==0.3.12
 matgl @ git+https://github.com/materialsvirtuallab/matgl.git
-fairchem-data-oc==0.2.1
\ No newline at end of file
+fairchem-data-oc==0.2.1

From ddd17010bbc6bf17816635d628ae899507491965 Mon Sep 17 00:00:00 2001
From: Zack Ulissi <zulissi@meta.com>
Date: Wed, 26 Mar 2025 14:27:55 +0000
Subject: [PATCH 09/19] add ignore for O energy

---
 src/quacc/types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/quacc/types.py b/src/quacc/types.py
index 460519478b..eaf1de4fa4 100644
--- a/src/quacc/types.py
+++ b/src/quacc/types.py
@@ -572,7 +572,7 @@ class MoleculeReferenceResults(TypedDict):
     class AtomicReferenceEnergies(TypedDict):
         H: float
         N: float
-        O: float
+        O: float # noqa: E741
         C: float
 
     class VibThermoSchema(VibSchema, ThermoSchema):

From 8c1fa59876664a7e1604b2fd63338b1a8748a14c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 26 Mar 2025 14:28:08 +0000
Subject: [PATCH 10/19] pre-commit auto-fixes

---
 src/quacc/types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/quacc/types.py b/src/quacc/types.py
index eaf1de4fa4..58c4b37070 100644
--- a/src/quacc/types.py
+++ b/src/quacc/types.py
@@ -572,7 +572,7 @@ class MoleculeReferenceResults(TypedDict):
     class AtomicReferenceEnergies(TypedDict):
         H: float
         N: float
-        O: float # noqa: E741
+        O: float  # noqa: E741
         C: float
 
     class VibThermoSchema(VibSchema, ThermoSchema):

From 9b72e61be95a4ae965fb42b9e6a721542f282321 Mon Sep 17 00:00:00 2001
From: Zack Ulissi <zulissi@meta.com>
Date: Wed, 26 Mar 2025 14:30:43 +0000
Subject: [PATCH 11/19] back off fairchem data version

---
 tests/requirements-mlp1.txt | 2 +-
 tests/requirements-mlp2.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/requirements-mlp1.txt b/tests/requirements-mlp1.txt
index 0c20c6a56d..5bebe4089f 100644
--- a/tests/requirements-mlp1.txt
+++ b/tests/requirements-mlp1.txt
@@ -10,4 +10,4 @@ torch_geometric==2.6.1
 torch_scatter==2.1.2
 torch_sparse==0.6.18
 torch_cluster==1.6.3
-fairchem-data-oc==0.2.1
+fairchem-data-oc==0.2.0
diff --git a/tests/requirements-mlp2.txt b/tests/requirements-mlp2.txt
index 0623938ec3..98d4fecb3a 100644
--- a/tests/requirements-mlp2.txt
+++ b/tests/requirements-mlp2.txt
@@ -1,3 +1,3 @@
 mace-torch==0.3.12
 matgl @ git+https://github.com/materialsvirtuallab/matgl.git
-fairchem-data-oc==0.2.1
+fairchem-data-oc==0.2.0

From 719376ce7fca574fb6b0f6c6032b214a6c1f04bb Mon Sep 17 00:00:00 2001
From: Zack Ulissi <zulissi@meta.com>
Date: Wed, 26 Mar 2025 14:38:17 +0000
Subject: [PATCH 12/19] skip adsorbml tests if fairchem-data-oc not installed

---
 pyproject.toml                                |  2 +-
 .../core/recipes/mlp_recipes/test_adsorbml.py | 19 +++++++++++++++----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c16566fd01..69ae9da93b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,7 +50,7 @@ mlp1 = ["chgnet>=0.3.3", "torch-dftd>=0.4.0", "sevenn>=0.10.1",  "orb-models>=0.
 mlp2 = ["mace-torch>=0.3.3", "matgl>=1.1.2"]
 mp = ["atomate2>=0.0.14"]
 newtonnet = ["newtonnet>=1.1"]
-ocdata = ["fairchem-data-oc>=0.2.0"]
+adsorbml = ["fairchem-data-oc>=0.2.0"]
 parsl = ["parsl[monitoring]>=2024.5.27; platform_system!='Windows'"]
 phonons = ["phonopy>=2.20.0", "seekpath>=2.1.0"]
 prefect = ["prefect[dask]>=2.19.0", "dask-jobqueue>=0.8.2"]
diff --git a/tests/core/recipes/mlp_recipes/test_adsorbml.py b/tests/core/recipes/mlp_recipes/test_adsorbml.py
index a1df515c78..acae09dcdc 100644
--- a/tests/core/recipes/mlp_recipes/test_adsorbml.py
+++ b/tests/core/recipes/mlp_recipes/test_adsorbml.py
@@ -4,7 +4,6 @@
 import pytest
 from ase.build import bulk
 
-from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml
 from quacc.recipes.mlp.core import relax_job
 
 torch = pytest.importorskip("torch")
@@ -27,7 +26,7 @@
 if has_orb := find_spec("orb_models"):
     methods.append("orb")
 
-if find_spec("fairchem"):
+if find_spec("fairchem.core"):
     from huggingface_hub.utils._auth import get_token
 
     if get_token():
@@ -39,9 +38,15 @@ def _set_dtype(size, type_="float"):
     globals()[f"{type_}_np"] = getattr(np, f"{type_}{size}")
     torch.set_default_dtype(getattr(torch, f"float{size}"))
 
-
+@pytest.mark.skipif(
+    not has_fairchem_data_oc,
+    reason="fairchem-data-oc python package not available",
+)
 @pytest.mark.parametrize("method", methods)
 def test_total_energy_adsorbml(tmp_path, monkeypatch, method):
+
+    from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml
+
     monkeypatch.chdir(tmp_path)
 
     if method == "mace-mp-0":
@@ -97,9 +102,15 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method):
         ref_CO_Cu111_adsorption_energy[method], abs=0.1
     )
 
-
+@pytest.mark.skipif(
+    not has_fairchem_data_oc,
+    reason="fairchem-data-oc python package not available",
+)
 @pytest.mark.parametrize("fairchem_checkpoint", ["EquiformerV2-31M-S2EF-OC20-All+MD"])
 def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint):
+
+    from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml
+
     monkeypatch.chdir(tmp_path)
 
     calc_kwargs = {

From c1798cbb787087cfadad6af2f51aa05c9734183c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 26 Mar 2025 14:38:31 +0000
Subject: [PATCH 13/19] pre-commit auto-fixes

---
 tests/core/recipes/mlp_recipes/test_adsorbml.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tests/core/recipes/mlp_recipes/test_adsorbml.py b/tests/core/recipes/mlp_recipes/test_adsorbml.py
index acae09dcdc..a61bb7d0bf 100644
--- a/tests/core/recipes/mlp_recipes/test_adsorbml.py
+++ b/tests/core/recipes/mlp_recipes/test_adsorbml.py
@@ -38,13 +38,12 @@ def _set_dtype(size, type_="float"):
     globals()[f"{type_}_np"] = getattr(np, f"{type_}{size}")
     torch.set_default_dtype(getattr(torch, f"float{size}"))
 
+
 @pytest.mark.skipif(
-    not has_fairchem_data_oc,
-    reason="fairchem-data-oc python package not available",
+    not has_fairchem_data_oc, reason="fairchem-data-oc python package not available"
 )
 @pytest.mark.parametrize("method", methods)
 def test_total_energy_adsorbml(tmp_path, monkeypatch, method):
-
     from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml
 
     monkeypatch.chdir(tmp_path)
@@ -102,13 +101,12 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method):
         ref_CO_Cu111_adsorption_energy[method], abs=0.1
     )
 
+
 @pytest.mark.skipif(
-    not has_fairchem_data_oc,
-    reason="fairchem-data-oc python package not available",
+    not has_fairchem_data_oc, reason="fairchem-data-oc python package not available"
 )
 @pytest.mark.parametrize("fairchem_checkpoint", ["EquiformerV2-31M-S2EF-OC20-All+MD"])
 def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint):
-
     from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml
 
     monkeypatch.chdir(tmp_path)

From 238a57ba95d78344d93eaf1943238a7c3e863f1b Mon Sep 17 00:00:00 2001
From: Zack Ulissi <zulissi@meta.com>
Date: Wed, 26 Mar 2025 14:40:25 +0000
Subject: [PATCH 14/19] fix test skip

---
 tests/core/recipes/mlp_recipes/test_adsorbml.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/tests/core/recipes/mlp_recipes/test_adsorbml.py b/tests/core/recipes/mlp_recipes/test_adsorbml.py
index a61bb7d0bf..757980a5af 100644
--- a/tests/core/recipes/mlp_recipes/test_adsorbml.py
+++ b/tests/core/recipes/mlp_recipes/test_adsorbml.py
@@ -32,18 +32,20 @@
     if get_token():
         methods.append("fairchem")
 
+has_fairchem_data_oc = find_spec("fairchem.data.oc")
 
 def _set_dtype(size, type_="float"):
     globals()[f"{type_}_th"] = getattr(torch, f"{type_}{size}")
     globals()[f"{type_}_np"] = getattr(np, f"{type_}{size}")
     torch.set_default_dtype(getattr(torch, f"float{size}"))
 
-
 @pytest.mark.skipif(
-    not has_fairchem_data_oc, reason="fairchem-data-oc python package not available"
+    not has_fairchem_data_oc,
+    reason="fairchem-data-oc python package not available",
 )
 @pytest.mark.parametrize("method", methods)
 def test_total_energy_adsorbml(tmp_path, monkeypatch, method):
+
     from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml
 
     monkeypatch.chdir(tmp_path)
@@ -101,14 +103,15 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method):
         ref_CO_Cu111_adsorption_energy[method], abs=0.1
     )
 
-
 @pytest.mark.skipif(
-    not has_fairchem_data_oc, reason="fairchem-data-oc python package not available"
+    not has_fairchem_data_oc,
+    reason="fairchem-data-oc python package not available",
 )
 @pytest.mark.parametrize("fairchem_checkpoint", ["EquiformerV2-31M-S2EF-OC20-All+MD"])
 def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint):
-    from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml
 
+    from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml
+    
     monkeypatch.chdir(tmp_path)
 
     calc_kwargs = {

From d8bae84613b06a0a842aa2ad8daa82e26f1a06fc Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 26 Mar 2025 14:43:33 +0000
Subject: [PATCH 15/19] pre-commit auto-fixes

---
 tests/core/recipes/mlp_recipes/test_adsorbml.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/tests/core/recipes/mlp_recipes/test_adsorbml.py b/tests/core/recipes/mlp_recipes/test_adsorbml.py
index 757980a5af..34af40da2f 100644
--- a/tests/core/recipes/mlp_recipes/test_adsorbml.py
+++ b/tests/core/recipes/mlp_recipes/test_adsorbml.py
@@ -34,18 +34,18 @@
 
 has_fairchem_data_oc = find_spec("fairchem.data.oc")
 
+
 def _set_dtype(size, type_="float"):
     globals()[f"{type_}_th"] = getattr(torch, f"{type_}{size}")
     globals()[f"{type_}_np"] = getattr(np, f"{type_}{size}")
     torch.set_default_dtype(getattr(torch, f"float{size}"))
 
+
 @pytest.mark.skipif(
-    not has_fairchem_data_oc,
-    reason="fairchem-data-oc python package not available",
+    not has_fairchem_data_oc, reason="fairchem-data-oc python package not available"
 )
 @pytest.mark.parametrize("method", methods)
 def test_total_energy_adsorbml(tmp_path, monkeypatch, method):
-
     from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml
 
     monkeypatch.chdir(tmp_path)
@@ -103,15 +103,14 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method):
         ref_CO_Cu111_adsorption_energy[method], abs=0.1
     )
 
+
 @pytest.mark.skipif(
-    not has_fairchem_data_oc,
-    reason="fairchem-data-oc python package not available",
+    not has_fairchem_data_oc, reason="fairchem-data-oc python package not available"
 )
 @pytest.mark.parametrize("fairchem_checkpoint", ["EquiformerV2-31M-S2EF-OC20-All+MD"])
 def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint):
-
     from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml
-    
+
     monkeypatch.chdir(tmp_path)
 
     calc_kwargs = {

From 3688f16fcf50c5d3478b13f2364395bb200ab529 Mon Sep 17 00:00:00 2001
From: Zack Ulissi <zulissi@meta.com>
Date: Wed, 26 Mar 2025 14:43:46 +0000
Subject: [PATCH 16/19] fix tests

---
 tests/core/recipes/mlp_recipes/test_core_recipes.py    | 2 +-
 tests/core/recipes/mlp_recipes/test_elastic_recipes.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/core/recipes/mlp_recipes/test_core_recipes.py b/tests/core/recipes/mlp_recipes/test_core_recipes.py
index 6d9d5b2879..0de3717cf4 100644
--- a/tests/core/recipes/mlp_recipes/test_core_recipes.py
+++ b/tests/core/recipes/mlp_recipes/test_core_recipes.py
@@ -27,7 +27,7 @@
 if has_orb := find_spec("orb_models"):
     methods.append("orb")
 
-if find_spec("fairchem"):
+if find_spec("fairchem.core"):
     from huggingface_hub.utils._auth import get_token
 
     if get_token():
diff --git a/tests/core/recipes/mlp_recipes/test_elastic_recipes.py b/tests/core/recipes/mlp_recipes/test_elastic_recipes.py
index 23509fee09..4d638b4298 100644
--- a/tests/core/recipes/mlp_recipes/test_elastic_recipes.py
+++ b/tests/core/recipes/mlp_recipes/test_elastic_recipes.py
@@ -26,7 +26,7 @@
 if has_orb := find_spec("orb_models"):
     methods.append("orb")
 
-if find_spec("fairchem"):
+if find_spec("fairchem.core"):
     from huggingface_hub.utils._auth import get_token
 
     if get_token():

From f694404ad856bf07f5cdbb92523f0b1ae5d4efc7 Mon Sep 17 00:00:00 2001
From: Zack Ulissi <zulissi@meta.com>
Date: Wed, 26 Mar 2025 15:59:20 +0000
Subject: [PATCH 17/19] small test fixes

---
 .../core/recipes/mlp_recipes/test_adsorbml.py | 24 +++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/tests/core/recipes/mlp_recipes/test_adsorbml.py b/tests/core/recipes/mlp_recipes/test_adsorbml.py
index 34af40da2f..1e6d0fb81a 100644
--- a/tests/core/recipes/mlp_recipes/test_adsorbml.py
+++ b/tests/core/recipes/mlp_recipes/test_adsorbml.py
@@ -26,7 +26,7 @@
 if has_orb := find_spec("orb_models"):
     methods.append("orb")
 
-if find_spec("fairchem.core"):
+if has_fairchem := find_spec("fairchem.core"):
     from huggingface_hub.utils._auth import get_token
 
     if get_token():
@@ -57,19 +57,34 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method):
 
     if method == "fairchem":
         calc_kwargs = {
-            "model_name": "EquiformerV2-31M-OMAT24-MP-sAlex",
+            "model_name": "EquiformerV2-31M-OMAT24",
             "local_cache": "./fairchem_checkpoint_cache/",
             "seed": 42,
         }
+        # Computed from DFT since we only need this once; this also serves as a test
+        # that the reference energies are being used correctly
+        OMAT_energies = {
+            "H2": -6.77226619,
+            "H2O": -14.23238513,
+            "N2": -16.65252624,
+            "CO": -14.79264030,
+        }
+        atomic_reference_energies = {
+            "H": OMAT_energies["H2"] / 2,
+            "N": OMAT_energies["N2"] / 2,
+            "O": (OMAT_energies["H2O"] - OMAT_energies["H2"]),
+            "C": OMAT_energies["CO"] - (OMAT_energies["H2O"] - OMAT_energies["H2"]),
+        }
     else:
         calc_kwargs = {}
+        atomic_reference_energies = None
 
     ref_CO_Cu111_adsorption_energy = {
         "chgnet": -1.5,
         "m3gnet": -0.5,
         "mace-mp-0": -0.5,
         "sevennet": -0.98,
-        "orb": -0.5,
+        "orb": -1.04,
         "fairchem": -0.5,
     }
 
@@ -89,6 +104,7 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method):
         },
         max_miller=1,
         num_to_validate_with_DFT=1,
+        atomic_reference_energies=atomic_reference_energies,
         reference_ml_energies_to_gas_phase=True,
         relax_bulk=True,
     )
@@ -105,7 +121,7 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method):
 
 
 @pytest.mark.skipif(
-    not has_fairchem_data_oc, reason="fairchem-data-oc python package not available"
+    not (has_fairchem_data_oc and has_fairchem), reason="fairchem-data-oc python package not available"
 )
 @pytest.mark.parametrize("fairchem_checkpoint", ["EquiformerV2-31M-S2EF-OC20-All+MD"])
 def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint):

From 99b0857a115d88db9ec039317ec959abacdddb05 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 26 Mar 2025 16:00:48 +0000
Subject: [PATCH 18/19] pre-commit auto-fixes

---
 tests/core/recipes/mlp_recipes/test_adsorbml.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/core/recipes/mlp_recipes/test_adsorbml.py b/tests/core/recipes/mlp_recipes/test_adsorbml.py
index 1e6d0fb81a..5dae8e9520 100644
--- a/tests/core/recipes/mlp_recipes/test_adsorbml.py
+++ b/tests/core/recipes/mlp_recipes/test_adsorbml.py
@@ -121,7 +121,8 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method):
 
 
 @pytest.mark.skipif(
-    not (has_fairchem_data_oc and has_fairchem), reason="fairchem-data-oc python package not available"
+    not (has_fairchem_data_oc and has_fairchem),
+    reason="fairchem-data-oc python package not available",
 )
 @pytest.mark.parametrize("fairchem_checkpoint", ["EquiformerV2-31M-S2EF-OC20-All+MD"])
 def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint):

From 67c02968cc5539515675e38a5f66fca0a177e53d Mon Sep 17 00:00:00 2001
From: Zack Ulissi <zulissi@meta.com>
Date: Wed, 26 Mar 2025 20:21:41 +0000
Subject: [PATCH 19/19] always reference DFT validations

---
 src/quacc/recipes/common/adsorbml.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py
index 5115686a15..79c9bd0c97 100644
--- a/src/quacc/recipes/common/adsorbml.py
+++ b/src/quacc/recipes/common/adsorbml.py
@@ -366,16 +366,16 @@ def adsorb_ml_pipeline(
 
         dft_validated_slab = slab_validate_job(slab.atoms, relax_cell=False)
 
-        if reference_ml_energies_to_gas_phase:
-            if atomic_reference_energies is None and molecule_results is None:
-                molecule_results = generate_molecule_reference_results(gas_validate_job)
-
-            dft_validated_adslabs = reference_adslab_energies(
-                dft_validated_adslabs,
-                dft_validated_slab,
-                atomic_energies=atomic_reference_energies,
-                molecule_results=molecule_results,
-            )
+        # If we're running DFT calculations, we always need to reference energies!
+        if atomic_reference_energies is None and molecule_results is None:
+            molecule_results = generate_molecule_reference_results(gas_validate_job)
+
+        dft_validated_adslabs = reference_adslab_energies(
+            dft_validated_adslabs,
+            dft_validated_slab,
+            atomic_energies=atomic_reference_energies,
+            molecule_results=molecule_results,
+        )
 
         return {
             "slab": slab.get_metadata_dict(),