From a5e361a523e0aa915514ab7bd40b8a8de614a7d9 Mon Sep 17 00:00:00 2001 From: Zack Ulissi Date: Thu, 20 Mar 2025 23:45:12 +0000 Subject: [PATCH 01/19] first stab --- pyproject.toml | 1 + src/quacc/recipes/common/adsorbml.py | 363 +++++++++++++++++++++++++++ 2 files changed, 364 insertions(+) create mode 100644 src/quacc/recipes/common/adsorbml.py diff --git a/pyproject.toml b/pyproject.toml index fa87273af4..91b2f348cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ mlp1 = ["chgnet>=0.3.3", "torch-dftd>=0.4.0", "sevenn>=0.10.1", "orb-models>=0. mlp2 = ["mace-torch>=0.3.3", "matgl>=1.1.2"] mp = ["atomate2>=0.0.14"] newtonnet = ["newtonnet>=1.1"] +ocp = ["fairchem-data-oc>=0.2.0"] parsl = ["parsl[monitoring]>=2024.5.27; platform_system!='Windows'"] phonons = ["phonopy>=2.20.0", "seekpath>=2.1.0"] prefect = ["prefect[dask]>=2.19.0", "dask-jobqueue>=0.8.2"] diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py new file mode 100644 index 0000000000..1b9ead18b3 --- /dev/null +++ b/src/quacc/recipes/common/adsorbml.py @@ -0,0 +1,363 @@ +from __future__ import annotations + +import copy +import logging +from typing import TYPE_CHECKING, Any, Literal + +import numpy as np +from ase.atoms import Atoms +from ase.build import molecule +from fairchem.data.oc.core.adsorbate import Adsorbate +from fairchem.data.oc.core.bulk import Bulk +from fairchem.data.oc.core.multi_adsorbate_slab_config import ( + MultipleAdsorbateSlabConfig, +) +from fairchem.data.oc.core.slab import Slab, tile_and_tag_atoms +from fairchem.data.oc.utils import DetectTrajAnomaly + +from quacc import Job, flow, job +from quacc.utils.dicts import recursive_dict_merge +from quacc.wflow_tools.customizers import customize_funcs, strip_decorator + +if TYPE_CHECKING: + from quacc.types import OptSchema, RunSchema + +logger = logging.getLogger(__name__) + + +@job +def ocp_surface_generator(bulk_atoms, max_miller: int = 1): + # This gets ocdata.core.slab.Slab objects which already know which atoms are the surface atoms! + return Slab.from_bulk_get_all_slabs(Bulk(bulk_atoms), max_miller) + + +class CustomSlab(Slab): + # Custom slab object to ignore the composition of the bulk in the initialization + # and make sure that the user did the right things to have the surface tagged for + # adsorbate placement + + def __init__( + self, + slab_atoms: Atoms, + bulk: Atoms | None = None, + millers: tuple | None = None, + shift: float | None = None, + top: bool | None = None, + min_ab: float = 0.8, + ): + self.bulk = bulk + self.slab_atoms = slab_atoms + self.millers = millers + self.shift = shift + self.top = top + + assert np.linalg.norm(self.slab_atoms.cell[0]) >= min_ab, "Slab not tiled" + assert np.linalg.norm(self.slab_atoms.cell[1]) >= min_ab, "Slab not tiled" + assert self.has_surface_tagged(), "Slab not tagged" + assert len(self.slab_atoms.constraints) > 0, "Sub-surface atoms not constrained" + + +# Not clear this actually needs to a job, maybe just a function if pretty fast? +@job +def ocp_adslab_generator( + slab: Slab | Atoms, + adsorbates_kwargs: list[dict[Any, Any]] | None = None, + multiple_adsorbate_slab_config_kwargs: dict[Any, Any] | None = None, +) -> list[Atoms]: + if adsorbates_kwargs is None: + logger.warning("No adsorbate kwargs found, defaulting to a single *OH!") + adsorbates_kwargs = [{"adsorbate_smiles_from_db": "*OH"}] + + adsorbates = [ + Adsorbate(**adsorbate_kwargs) for adsorbate_kwargs in adsorbates_kwargs + ] + + if isinstance(slab, Atoms): + try: + slab = CustomSlab(slab_atoms=slab) + except AssertionError: + slab = CustomSlab(slab_atoms=tile_and_tag_atoms(slab)) + logger.warning( + "The slab was not tagged and/or tiled. " + "We did the best we could, but you should be careful and check the results!" + ) + + if multiple_adsorbate_slab_config_kwargs is None: + multiple_adsorbate_slab_config_kwargs = {} + + adslabs = MultipleAdsorbateSlabConfig( + copy.deepcopy(slab), adsorbates, **multiple_adsorbate_slab_config_kwargs + ) + + return adslabs.atoms_list + + +@flow +def find_adslabs_each_slab( + slabs: list[Slab], + adsorbates_kwargs: dict[Any, Any], + multiple_adsorbate_slab_config_kwargs: dict[Any, Any] | None = None, +): + return [ + { + "slab": slab, + "adslabs": ocp_adslab_generator( + slab, adsorbates_kwargs, multiple_adsorbate_slab_config_kwargs + ), + } + for slab in slabs + ] + + +def detect_anomaly( + initial_atoms: Atoms, final_atoms: Atoms +) -> list[ + Literal[ + "adsorbate_dissociated", + "adsorbate_desorbed", + "surface_changed", + "adsorbate_intercalated", + ] +]: + atom_tags = initial_atoms.get_tags() + + detector = DetectTrajAnomaly(initial_atoms, final_atoms, atom_tags) + anomalies = [] + if detector.is_adsorbate_dissociated(): + anomalies.append("adsorbate_dissociated") + if detector.is_adsorbate_desorbed(): + anomalies.append("adsorbate_desorbed") + if detector.has_surface_changed(): + anomalies.append("surface_changed") + if detector.is_adsorbate_intercalated(): + anomalies.append("adsorbate_intercalated") + return anomalies + + +@job +def filter_sort_select_adslabs( + adslab_results: list[RunSchema], adslab_anomalies_list: list[list[str]] +): + for adslab_result, adslab_anomalies in zip( + adslab_results, adslab_anomalies_list, strict=True + ): + adslab_result["results"]["adslab_anomalies"] = adslab_anomalies + + adslabs_no_anomalies = [ + adslab_result + for adslab_result in adslab_results + if len(adslab_result["results"]["adslab_anomalies"]) == 0 + ] + + return sorted(adslabs_no_anomalies, key=lambda x: x["results"]["energy"]) + + +@flow +def adsorb_ml_pipeline( + slab: Slab, + adsorbates_kwargs: dict[Any, Any], + multiple_adsorbate_slab_config_kwargs: dict[Any, Any], + ml_slab_adslab_relax_job: Job, + slab_validate_job: Job, + adslab_validate_job: Job, + gas_validate_job: Job, + num_to_validate_with_DFT: int = 0, + ml_gas_references: dict[str, RunSchema] | None = None, +): + unrelaxed_adslab_configurations = ocp_adslab_generator( + slab, adsorbates_kwargs, multiple_adsorbate_slab_config_kwargs + ) + + ml_relaxed_configurations = [ + ml_slab_adslab_relax_job(adslab_configuration) + for adslab_configuration in unrelaxed_adslab_configurations + ] + + if ml_gas_references is None: + ml_gas_references = { + "N2": ml_slab_adslab_relax_job( + molecule_pbc("N2", vacuum=10), relax_cell=False + ), + "CO": ml_slab_adslab_relax_job( + molecule_pbc("CO", vacuum=10), relax_cell=False + ), + "H2": ml_slab_adslab_relax_job( + molecule_pbc("H2", vacuum=10), relax_cell=False + ), + "H2O": ml_slab_adslab_relax_job( + molecule_pbc("H2O", vacuum=10), relax_cell=False + ), + } + + ml_relaxed_slab_result = ml_slab_adslab_relax_job(slab.atoms) + + ml_referenced_configurations = reference_adslab_energies( + ml_relaxed_configurations, + ml_relaxed_slab_result, + CO_result=ml_gas_references["CO"], + N2_result=ml_gas_references["N2"], + H2_result=ml_gas_references["H2"], + H2O_result=ml_gas_references["H2O"], + ) + + adslab_anomalies_list = [ + job(detect_anomaly)( + relaxed_result["input_atoms"]["atoms"], relaxed_result["atoms"] + ) + for relaxed_result in ml_relaxed_configurations + ] + + top_candidates = filter_sort_select_adslabs( + adslab_results=ml_referenced_configurations, + adslab_anomalies_list=adslab_anomalies_list, + ) + + if num_to_validate_with_DFT == 0: + return { + "slab": slab, + "unrelaxed_configurations": unrelaxed_adslab_configurations, + "adslab_ml_relaxed_configurations": ml_relaxed_configurations, + "adslab_anomalies": adslab_anomalies_list, + } + else: + return { + "slab": slab, + "unrelaxed_configurations": unrelaxed_adslab_configurations, + "adslab_ml_relaxed_configurations": ml_relaxed_configurations, + "adslab_anomalies": adslab_anomalies_list, + "validated_structures": { + "validated_adslabs": [ + adslab_validate_job(top_candidates[i]["relaxed_configuration"]) + for i in range(num_to_validate_with_DFT) + ], + "slab_validated": slab_validate_job(slab.atoms, relax_cell=False), + }, + } + + +@job +def reference_adslab_energies( + adslab_results: list[dict[str, Any]], + slab_result: RunSchema, + CO_result: OptSchema | RunSchema, + H2_result: OptSchema | RunSchema, + H2O_result: OptSchema | RunSchema, + N2_result: OptSchema | RunSchema, +): + adslab_results = copy.deepcopy(adslab_results) + atomic_energies = { + "H": H2_result["results"]["energy"] / 2, + "N": N2_result["results"]["energy"] / 2, + "O": (H2O_result["results"]["energy"] - H2_result["results"]["energy"]), + "C": CO_result["results"]["energy"] + - (H2O_result["results"]["energy"] - H2_result["results"]["energy"]), + } + + slab_energy = slab_result["results"]["energy"] + + return [ + recursive_dict_merge( + adslab_result, + { + "results_references": {"atomic_energies": atomic_energies}, + "referenced_adsorption_energy": adslab_result["results"]["energy"] + - slab_energy + - sum( + [ + atomic_energies[atom.symbol] + for atom in adslab_result["atoms"][ + adslab_result["atoms"].get_tags() == 2 + ] # all adsorbate tagged with tag=2! + ] + ), + }, + ) + for adslab_result in adslab_results + ] + + +def molecule_pbc(*args, **molecule_kwargs): + atoms = molecule(*args, **molecule_kwargs) + atoms.pbc = True + return atoms + + +@flow +def standard_ocp_dataset_enumeration( + bulk_atoms: Atoms, + adsorbates_kwargs: dict[str, Any], + multiple_adsorbate_slab_config_kwargs: dict[str, Any], + ml_relax_job: Job, + slab_validate_job: Job, + adslab_validate_job: Job, + gas_validate_job: Job, + bulk_relax_job: Job | None, + job_params: dict[str, dict[str, Any]] | None = None, + job_decorators: dict[str, dict[str, Any]] | None = None, + max_miller: int = 1, + num_to_validate_with_DFT: int = 0, +): + ( + bulk_relax_job_, + ml_slab_adslab_relax_job_, + slab_validate_job_, + adslab_validate_job_, + gas_validate_job_, + ) = customize_funcs( + [ + "bulk_relax_job", + "ml_slab_adslab_relax_job", + "slab_validate_job", + "adslab_validate_job", + "gas_relax_job", + ], + [ + bulk_relax_job, # type: ignore + ml_relax_job, + slab_validate_job, + adslab_validate_job, + gas_validate_job, + ], + param_swaps=job_params, + decorators=job_decorators, # type: ignore + ) + + if bulk_relax_job is not None: + bulk_atoms = bulk_relax_job_(bulk_atoms, relax_cell=True)["atoms"] + + slabs = ocp_surface_generator(bulk_atoms=bulk_atoms, max_miller=max_miller) + + ml_gas_references = { + "N2": ml_slab_adslab_relax_job_( + molecule_pbc("N2", vacuum=10), relax_cell=False + ), + "CO": ml_slab_adslab_relax_job_( + molecule_pbc("CO", vacuum=10), relax_cell=False + ), + "H2": ml_slab_adslab_relax_job_( + molecule_pbc("H2", vacuum=10), relax_cell=False + ), + "H2O": ml_slab_adslab_relax_job_( + molecule_pbc("H2O", vacuum=10), relax_cell=False + ), + } + + @flow + def adsorbML_each_surface(slabs, **kwargs): + return [ + # We strip the decorator here so it's a bunch of jobs, not subflows. Helpful for prefect! + strip_decorator(adsorb_ml_pipeline)(slab=slab, **kwargs) + for slab in slabs + ] + + return adsorbML_each_surface( + slabs=slabs, + adsorbates_kwargs=adsorbates_kwargs, + multiple_adsorbate_slab_config_kwargs=multiple_adsorbate_slab_config_kwargs, + ml_slab_adslab_relax_job=ml_slab_adslab_relax_job_, + slab_validate_job=slab_validate_job_, + adslab_validate_job=adslab_validate_job_, + gas_validate_job=gas_validate_job_, + num_to_validate_with_DFT=num_to_validate_with_DFT, + ml_gas_references=ml_gas_references, + ) From 770c6da550b756cc32d79ef559be87684deed387 Mon Sep 17 00:00:00 2001 From: Zack Ulissi Date: Fri, 21 Mar 2025 00:44:24 +0000 Subject: [PATCH 02/19] minor fixes --- src/quacc/recipes/common/adsorbml.py | 135 +++++++++++++++------------ 1 file changed, 74 insertions(+), 61 deletions(-) diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py index 1b9ead18b3..4fd4b32ed4 100644 --- a/src/quacc/recipes/common/adsorbml.py +++ b/src/quacc/recipes/common/adsorbml.py @@ -64,10 +64,6 @@ def ocp_adslab_generator( adsorbates_kwargs: list[dict[Any, Any]] | None = None, multiple_adsorbate_slab_config_kwargs: dict[Any, Any] | None = None, ) -> list[Atoms]: - if adsorbates_kwargs is None: - logger.warning("No adsorbate kwargs found, defaulting to a single *OH!") - adsorbates_kwargs = [{"adsorbate_smiles_from_db": "*OH"}] - adsorbates = [ Adsorbate(**adsorbate_kwargs) for adsorbate_kwargs in adsorbates_kwargs ] @@ -163,6 +159,7 @@ def adsorb_ml_pipeline( gas_validate_job: Job, num_to_validate_with_DFT: int = 0, ml_gas_references: dict[str, RunSchema] | None = None, + reference_ml_energies_to_gas_phase: bool = False, ): unrelaxed_adslab_configurations = ocp_adslab_generator( slab, adsorbates_kwargs, multiple_adsorbate_slab_config_kwargs @@ -173,32 +170,32 @@ def adsorb_ml_pipeline( for adslab_configuration in unrelaxed_adslab_configurations ] - if ml_gas_references is None: - ml_gas_references = { - "N2": ml_slab_adslab_relax_job( - molecule_pbc("N2", vacuum=10), relax_cell=False - ), - "CO": ml_slab_adslab_relax_job( - molecule_pbc("CO", vacuum=10), relax_cell=False - ), - "H2": ml_slab_adslab_relax_job( - molecule_pbc("H2", vacuum=10), relax_cell=False - ), - "H2O": ml_slab_adslab_relax_job( - molecule_pbc("H2O", vacuum=10), relax_cell=False - ), - } - ml_relaxed_slab_result = ml_slab_adslab_relax_job(slab.atoms) - ml_referenced_configurations = reference_adslab_energies( - ml_relaxed_configurations, - ml_relaxed_slab_result, - CO_result=ml_gas_references["CO"], - N2_result=ml_gas_references["N2"], - H2_result=ml_gas_references["H2"], - H2O_result=ml_gas_references["H2O"], - ) + if reference_ml_energies_to_gas_phase: + if ml_gas_references is None: + ml_gas_references = { + "N2": ml_slab_adslab_relax_job( + molecule_pbc("N2", vacuum=10), relax_cell=False + ), + "CO": ml_slab_adslab_relax_job( + molecule_pbc("CO", vacuum=10), relax_cell=False + ), + "H2": ml_slab_adslab_relax_job( + molecule_pbc("H2", vacuum=10), relax_cell=False + ), + "H2O": ml_slab_adslab_relax_job( + molecule_pbc("H2O", vacuum=10), relax_cell=False + ), + } + ml_relaxed_configurations = reference_adslab_energies( + ml_relaxed_configurations, + ml_relaxed_slab_result, + CO_result=ml_gas_references["CO"], + N2_result=ml_gas_references["N2"], + H2_result=ml_gas_references["H2"], + H2O_result=ml_gas_references["H2O"], + ) adslab_anomalies_list = [ job(detect_anomaly)( @@ -208,26 +205,24 @@ def adsorb_ml_pipeline( ] top_candidates = filter_sort_select_adslabs( - adslab_results=ml_referenced_configurations, + adslab_results=ml_relaxed_configurations, adslab_anomalies_list=adslab_anomalies_list, ) if num_to_validate_with_DFT == 0: return { "slab": slab, - "unrelaxed_configurations": unrelaxed_adslab_configurations, - "adslab_ml_relaxed_configurations": ml_relaxed_configurations, + "adslab_ml_relaxed_configurations": top_candidates, "adslab_anomalies": adslab_anomalies_list, } else: return { "slab": slab, - "unrelaxed_configurations": unrelaxed_adslab_configurations, - "adslab_ml_relaxed_configurations": ml_relaxed_configurations, + "adslab_ml_relaxed_configurations": top_candidates, "adslab_anomalies": adslab_anomalies_list, "validated_structures": { "validated_adslabs": [ - adslab_validate_job(top_candidates[i]["relaxed_configuration"]) + adslab_validate_job(top_candidates[i]["atoms"]) for i in range(num_to_validate_with_DFT) ], "slab_validated": slab_validate_job(slab.atoms, relax_cell=False), @@ -259,17 +254,29 @@ def reference_adslab_energies( recursive_dict_merge( adslab_result, { - "results_references": {"atomic_energies": atomic_energies}, - "referenced_adsorption_energy": adslab_result["results"]["energy"] - - slab_energy - - sum( - [ - atomic_energies[atom.symbol] - for atom in adslab_result["atoms"][ - adslab_result["atoms"].get_tags() == 2 - ] # all adsorbate tagged with tag=2! - ] - ), + "results": { + "atomic_energies": atomic_energies, + "slab_energy": slab_energy, + "adslab_energy": adslab_result["results"]["energy"], + "gas_reactant_energy": sum( + [ + atomic_energies[atom.symbol] + for atom in adslab_result["atoms"][ + adslab_result["atoms"].get_tags() == 2 + ] # all adsorbate tagged with tag=2! + ] + ), + "adsorption_energy": adslab_result["results"]["energy"] + - slab_energy + - sum( + [ + atomic_energies[atom.symbol] + for atom in adslab_result["atoms"][ + adslab_result["atoms"].get_tags() == 2 + ] # all adsorbate tagged with tag=2! + ] + ), + } }, ) for adslab_result in adslab_results @@ -296,6 +303,8 @@ def standard_ocp_dataset_enumeration( job_decorators: dict[str, dict[str, Any]] | None = None, max_miller: int = 1, num_to_validate_with_DFT: int = 0, + reference_ml_energies_to_gas_phase: bool = True, + relax_bulk: bool = True, ): ( bulk_relax_job_, @@ -309,7 +318,7 @@ def standard_ocp_dataset_enumeration( "ml_slab_adslab_relax_job", "slab_validate_job", "adslab_validate_job", - "gas_relax_job", + "gas_validate_job", ], [ bulk_relax_job, # type: ignore @@ -322,25 +331,28 @@ def standard_ocp_dataset_enumeration( decorators=job_decorators, # type: ignore ) - if bulk_relax_job is not None: + if relax_bulk: bulk_atoms = bulk_relax_job_(bulk_atoms, relax_cell=True)["atoms"] slabs = ocp_surface_generator(bulk_atoms=bulk_atoms, max_miller=max_miller) - ml_gas_references = { - "N2": ml_slab_adslab_relax_job_( - molecule_pbc("N2", vacuum=10), relax_cell=False - ), - "CO": ml_slab_adslab_relax_job_( - molecule_pbc("CO", vacuum=10), relax_cell=False - ), - "H2": ml_slab_adslab_relax_job_( - molecule_pbc("H2", vacuum=10), relax_cell=False - ), - "H2O": ml_slab_adslab_relax_job_( - molecule_pbc("H2O", vacuum=10), relax_cell=False - ), - } + if reference_ml_energies_to_gas_phase: + ml_gas_references = { + "N2": ml_slab_adslab_relax_job_( + molecule_pbc("N2", vacuum=10), relax_cell=False + ), + "CO": ml_slab_adslab_relax_job_( + molecule_pbc("CO", vacuum=10), relax_cell=False + ), + "H2": ml_slab_adslab_relax_job_( + molecule_pbc("H2", vacuum=10), relax_cell=False + ), + "H2O": ml_slab_adslab_relax_job_( + molecule_pbc("H2O", vacuum=10), relax_cell=False + ), + } + else: + ml_gas_references = None @flow def adsorbML_each_surface(slabs, **kwargs): @@ -360,4 +372,5 @@ def adsorbML_each_surface(slabs, **kwargs): gas_validate_job=gas_validate_job_, num_to_validate_with_DFT=num_to_validate_with_DFT, ml_gas_references=ml_gas_references, + reference_ml_energies_to_gas_phase=reference_ml_energies_to_gas_phase, ) From d0127267d3ad40be74a903c6285d4ac8b2203013 Mon Sep 17 00:00:00 2001 From: Zack Ulissi Date: Fri, 21 Mar 2025 18:04:49 +0000 Subject: [PATCH 03/19] clean up --- src/quacc/recipes/common/adsorbml.py | 409 +++++++++++++++++++++------ 1 file changed, 321 insertions(+), 88 deletions(-) diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py index 4fd4b32ed4..e415109c1a 100644 --- a/src/quacc/recipes/common/adsorbml.py +++ b/src/quacc/recipes/common/adsorbml.py @@ -2,7 +2,7 @@ import copy import logging -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any, Literal, TypedDict import numpy as np from ase.atoms import Atoms @@ -24,10 +24,44 @@ logger = logging.getLogger(__name__) +class AdsorbatesKwargs(TypedDict): + adsorbate_type: str + position: list[float] + orientation: list[float] + +class MultipleAdsorbateSlabConfigKwargs(TypedDict): + num_adsorbates: int + adsorbate_distance: float + +class MoleculeResults(TypedDict): + N2: RunSchema + CO: RunSchema + H2: RunSchema + H2O: RunSchema + +class AtomicReferenceEnergies(TypedDict): + H: float + N: float + O: float + C: float @job -def ocp_surface_generator(bulk_atoms, max_miller: int = 1): - # This gets ocdata.core.slab.Slab objects which already know which atoms are the surface atoms! +def ocp_surface_generator(bulk_atoms: Atoms, max_miller: int = 1) -> list[Slab]: + """ + Generate surface slabs from bulk atoms. + + Parameters + ---------- + bulk_atoms : Atoms + The bulk atomic structure. + max_miller : int, optional + Maximum Miller index, by default 1. + + Returns + ------- + list[Slab] + List of generated surface slabs. + """ return Slab.from_bulk_get_all_slabs(Bulk(bulk_atoms), max_miller) @@ -45,6 +79,24 @@ def __init__( top: bool | None = None, min_ab: float = 0.8, ): + """ + Initialize a CustomSlab object. + + Parameters + ---------- + slab_atoms : Atoms + The slab atomic structure. + bulk : Atoms, optional + The bulk atomic structure, by default None. + millers : tuple, optional + Miller indices, by default None. + shift : float, optional + Shift value, by default None. + top : bool, optional + Top value, by default None. + min_ab : float, optional + Minimum a and b lattice parameters, by default 0.8. + """ self.bulk = bulk self.slab_atoms = slab_atoms self.millers = millers @@ -57,13 +109,29 @@ def __init__( assert len(self.slab_atoms.constraints) > 0, "Sub-surface atoms not constrained" -# Not clear this actually needs to a job, maybe just a function if pretty fast? @job def ocp_adslab_generator( slab: Slab | Atoms, - adsorbates_kwargs: list[dict[Any, Any]] | None = None, - multiple_adsorbate_slab_config_kwargs: dict[Any, Any] | None = None, + adsorbates_kwargs: list[AdsorbatesKwargs] | None = None, + multiple_adsorbate_slab_config_kwargs: MultipleAdsorbateSlabConfigKwargs | None = None, ) -> list[Atoms]: + """ + Generate adsorbate-slab configurations. + + Parameters + ---------- + slab : Slab | Atoms + The slab structure. + adsorbates_kwargs : list[AdsorbatesKwargs], optional + List of keyword arguments for generating adsorbates, by default None. + multiple_adsorbate_slab_config_kwargs : MultipleAdsorbateSlabConfigKwargs, optional + Keyword arguments for generating multiple adsorbate-slab configurations, by default None. + + Returns + ------- + list[Atoms] + List of generated adsorbate-slab configurations. + """ adsorbates = [ Adsorbate(**adsorbate_kwargs) for adsorbate_kwargs in adsorbates_kwargs ] @@ -91,9 +159,26 @@ def ocp_adslab_generator( @flow def find_adslabs_each_slab( slabs: list[Slab], - adsorbates_kwargs: dict[Any, Any], - multiple_adsorbate_slab_config_kwargs: dict[Any, Any] | None = None, -): + adsorbates_kwargs: AdsorbatesKwargs, + multiple_adsorbate_slab_config_kwargs: MultipleAdsorbateSlabConfigKwargs | None = None, +) -> list[dict[str, Any]]: + """ + Find adsorbate-slab configurations for each slab. + + Parameters + ---------- + slabs : list[Slab] + List of slabs. + adsorbates_kwargs : AdsorbatesKwargs + Keyword arguments for generating adsorbates. + multiple_adsorbate_slab_config_kwargs : MultipleAdsorbateSlabConfigKwargs, optional + Keyword arguments for generating multiple adsorbate-slab configurations, by default None. + + Returns + ------- + list[dict[str, Any]] + List of dictionaries containing slabs and their corresponding adsorbate-slab configurations. + """ return [ { "slab": slab, @@ -115,6 +200,21 @@ def detect_anomaly( "adsorbate_intercalated", ] ]: + """ + Detect anomalies between initial and final atomic structures. + + Parameters + ---------- + initial_atoms : Atoms + Initial atomic structure. + final_atoms : Atoms + Final atomic structure. + + Returns + ------- + list[Literal["adsorbate_dissociated", "adsorbate_desorbed", "surface_changed", "adsorbate_intercalated"]] + List of detected anomalies. + """ atom_tags = initial_atoms.get_tags() detector = DetectTrajAnomaly(initial_atoms, final_atoms, atom_tags) @@ -133,7 +233,22 @@ def detect_anomaly( @job def filter_sort_select_adslabs( adslab_results: list[RunSchema], adslab_anomalies_list: list[list[str]] -): +) -> list[RunSchema]: + """ + Filter, sort, and select adsorbate-slab configurations based on anomalies and energy. + + Parameters + ---------- + adslab_results : list[RunSchema] + List of adsorbate-slab results. + adslab_anomalies_list : list[list[str]] + List of detected anomalies for each adsorbate-slab configuration. + + Returns + ------- + list[RunSchema] + Sorted list of adsorbate-slab configurations without anomalies. + """ for adslab_result, adslab_anomalies in zip( adslab_results, adslab_anomalies_list, strict=True ): @@ -151,16 +266,51 @@ def filter_sort_select_adslabs( @flow def adsorb_ml_pipeline( slab: Slab, - adsorbates_kwargs: dict[Any, Any], - multiple_adsorbate_slab_config_kwargs: dict[Any, Any], + adsorbates_kwargs: AdsorbatesKwargs, + multiple_adsorbate_slab_config_kwargs: MultipleAdsorbateSlabConfigKwargs, ml_slab_adslab_relax_job: Job, slab_validate_job: Job, adslab_validate_job: Job, gas_validate_job: Job, num_to_validate_with_DFT: int = 0, - ml_gas_references: dict[str, RunSchema] | None = None, reference_ml_energies_to_gas_phase: bool = False, -): + molecule_results: MoleculeResults | None = None, + atomic_reference_energies: AtomicReferenceEnergies | None = None, +) -> dict[str, Any]: + """ + Run a machine learning-based pipeline for adsorbate-slab systems. + + Parameters + ---------- + slab : Slab + The slab structure to which adsorbates will be added. + adsorbates_kwargs : AdsorbatesKwargs + Keyword arguments for generating adsorbate configurations. + multiple_adsorbate_slab_config_kwargs : MultipleAdsorbateSlabConfigKwargs + Keyword arguments for generating multiple adsorbate-slab configurations. + ml_slab_adslab_relax_job : Job + Job for relaxing slab and adsorbate-slab configurations using ML. + slab_validate_job : Job + Job for validating the slab structure. + adslab_validate_job : Job + Job for validating the adsorbate-slab structures. + gas_validate_job : Job + Job for validating gas phase structures. + num_to_validate_with_DFT : int, optional + Number of top configurations to validate with DFT, by default 0. + reference_ml_energies_to_gas_phase : bool, optional + Whether to reference ML energies to gas phase, by default False. + molecule_results : MoleculeResults, optional + Precomputed molecule results for referencing, by default None. + atomic_reference_energies : AtomicReferenceEnergies, optional + Atomic reference energies for referencing, by default None. + + Returns + ------- + dict + Dictionary containing the slab, ML-relaxed adsorbate-slab configurations, + detected anomalies, and optionally DFT-validated structures. + """ unrelaxed_adslab_configurations = ocp_adslab_generator( slab, adsorbates_kwargs, multiple_adsorbate_slab_config_kwargs ) @@ -173,28 +323,16 @@ def adsorb_ml_pipeline( ml_relaxed_slab_result = ml_slab_adslab_relax_job(slab.atoms) if reference_ml_energies_to_gas_phase: - if ml_gas_references is None: - ml_gas_references = { - "N2": ml_slab_adslab_relax_job( - molecule_pbc("N2", vacuum=10), relax_cell=False - ), - "CO": ml_slab_adslab_relax_job( - molecule_pbc("CO", vacuum=10), relax_cell=False - ), - "H2": ml_slab_adslab_relax_job( - molecule_pbc("H2", vacuum=10), relax_cell=False - ), - "H2O": ml_slab_adslab_relax_job( - molecule_pbc("H2O", vacuum=10), relax_cell=False - ), - } + if atomic_reference_energies is None and molecule_results is None: + molecule_results = generate_molecule_reference_results( + ml_slab_adslab_relax_job + ) + ml_relaxed_configurations = reference_adslab_energies( ml_relaxed_configurations, ml_relaxed_slab_result, - CO_result=ml_gas_references["CO"], - N2_result=ml_gas_references["N2"], - H2_result=ml_gas_references["H2"], - H2O_result=ml_gas_references["H2O"], + atomic_energies=atomic_reference_energies, + molecule_results=molecule_results, ) adslab_anomalies_list = [ @@ -234,19 +372,47 @@ def adsorb_ml_pipeline( def reference_adslab_energies( adslab_results: list[dict[str, Any]], slab_result: RunSchema, - CO_result: OptSchema | RunSchema, - H2_result: OptSchema | RunSchema, - H2O_result: OptSchema | RunSchema, - N2_result: OptSchema | RunSchema, -): + atomic_energies: AtomicReferenceEnergies | None, + molecule_results: MoleculeResults | None, +) -> list[dict[str, Any]]: + """ + Reference adsorbate-slab energies to atomic and slab energies. + + Parameters + ---------- + adslab_results : list[dict[str, Any]] + List of adsorbate-slab results. + slab_result : RunSchema + Result of the slab calculation. + atomic_energies : AtomicReferenceEnergies | None + Dictionary of atomic energies. + molecule_results : MoleculeResults | None + Dictionary of molecule results. + + Returns + ------- + list[dict[str, Any]] + List of adsorbate-slab results with referenced energies. + """ adslab_results = copy.deepcopy(adslab_results) - atomic_energies = { - "H": H2_result["results"]["energy"] / 2, - "N": N2_result["results"]["energy"] / 2, - "O": (H2O_result["results"]["energy"] - H2_result["results"]["energy"]), - "C": CO_result["results"]["energy"] - - (H2O_result["results"]["energy"] - H2_result["results"]["energy"]), - } + if atomic_energies is None and molecule_results is not None: + atomic_energies = { + "H": molecule_results["H2"]["results"]["energy"] / 2, + "N": molecule_results["N2"]["results"]["energy"] / 2, + "O": ( + molecule_results["H2O"]["results"]["energy"] + - molecule_results["H2"]["results"]["energy"] + ), + "C": molecule_results["CO"]["results"]["energy"] + - ( + molecule_results["H2O"]["results"]["energy"] + - molecule_results["H2"]["results"]["energy"] + ), + } + else: + raise Exception( + "Missing atomic energies and gas phase energies; unable to continue!" + ) slab_energy = slab_result["results"]["energy"] @@ -255,27 +421,29 @@ def reference_adslab_energies( adslab_result, { "results": { - "atomic_energies": atomic_energies, - "slab_energy": slab_energy, - "adslab_energy": adslab_result["results"]["energy"], - "gas_reactant_energy": sum( - [ - atomic_energies[atom.symbol] - for atom in adslab_result["atoms"][ - adslab_result["atoms"].get_tags() == 2 - ] # all adsorbate tagged with tag=2! - ] - ), - "adsorption_energy": adslab_result["results"]["energy"] - - slab_energy - - sum( - [ - atomic_energies[atom.symbol] - for atom in adslab_result["atoms"][ - adslab_result["atoms"].get_tags() == 2 - ] # all adsorbate tagged with tag=2! - ] - ), + "referenced_adsorption_energy": { + "atomic_energies": atomic_energies, + "slab_energy": slab_energy, + "adslab_energy": adslab_result["results"]["energy"], + "gas_reactant_energy": sum( + [ + atomic_energies[atom.symbol] + for atom in adslab_result["atoms"][ + adslab_result["atoms"].get_tags() == 2 + ] # all adsorbate tagged with tag=2! + ] + ), + "adsorption_energy": adslab_result["results"]["energy"] + - slab_energy + - sum( + [ + atomic_energies[atom.symbol] + for atom in adslab_result["atoms"][ + adslab_result["atoms"].get_tags() == 2 + ] # all adsorbate tagged with tag=2! + ] + ), + } } }, ) @@ -283,17 +451,54 @@ def reference_adslab_energies( ] -def molecule_pbc(*args, **molecule_kwargs): +def molecule_pbc(*args: Any, **molecule_kwargs: Any) -> Atoms: + """ + Create a molecule with periodic boundary conditions. + + Parameters + ---------- + *args : Any + Positional arguments for the molecule function. + **molecule_kwargs : Any + Keyword arguments for the molecule function. + + Returns + ------- + Atoms + Atomic structure with periodic boundary conditions. + """ atoms = molecule(*args, **molecule_kwargs) atoms.pbc = True return atoms +def generate_molecule_reference_results(relax_job: Job) -> MoleculeResults: + """ + Generate reference results for molecules. + + Parameters + ---------- + relax_job : Job + Job for relaxing molecular structures. + + Returns + ------- + MoleculeResults + Dictionary of reference results for molecules. + """ + return { + "N2": relax_job(molecule_pbc("N2", vacuum=10), relax_cell=False), + "CO": relax_job(molecule_pbc("CO", vacuum=10), relax_cell=False), + "H2": relax_job(molecule_pbc("H2", vacuum=10), relax_cell=False), + "H2O": relax_job(molecule_pbc("H2O", vacuum=10), relax_cell=False), + } + + @flow -def standard_ocp_dataset_enumeration( +def bulk_to_surfaces_to_adsorbml( bulk_atoms: Atoms, - adsorbates_kwargs: dict[str, Any], - multiple_adsorbate_slab_config_kwargs: dict[str, Any], + adsorbates_kwargs: AdsorbatesKwargs, + multiple_adsorbate_slab_config_kwargs: MultipleAdsorbateSlabConfigKwargs, ml_relax_job: Job, slab_validate_job: Job, adslab_validate_job: Job, @@ -305,7 +510,46 @@ def standard_ocp_dataset_enumeration( num_to_validate_with_DFT: int = 0, reference_ml_energies_to_gas_phase: bool = True, relax_bulk: bool = True, -): +) -> list[dict[str, Any]]: + """ + Run a pipeline from bulk atoms to adsorbate-slab configurations using machine learning. + + Parameters + ---------- + bulk_atoms : Atoms + The bulk atomic structure. + adsorbates_kwargs : AdsorbatesKwargs + Keyword arguments for generating adsorbate configurations. + multiple_adsorbate_slab_config_kwargs : MultipleAdsorbateSlabConfigKwargs + Keyword arguments for generating multiple adsorbate-slab configurations. + ml_relax_job : Job + Job for relaxing slab and adsorbate-slab configurations using ML. + slab_validate_job : Job + Job for validating the slab structure. + adslab_validate_job : Job + Job for validating the adsorbate-slab structures. + gas_validate_job : Job + Job for validating gas phase structures. + bulk_relax_job : Job | None + Job for relaxing the bulk structure, by default None. + job_params : dict[str, dict[str, Any]], optional + Parameters for customizing jobs, by default None. + job_decorators : dict[str, dict[str, Any]], optional + Decorators for customizing jobs, by default None. + max_miller : int, optional + Maximum Miller index, by default 1. + num_to_validate_with_DFT : int, optional + Number of top configurations to validate with DFT, by default 0. + reference_ml_energies_to_gas_phase : bool, optional + Whether to reference ML energies to gas phase, by default True. + relax_bulk : bool, optional + Whether to relax the bulk structure, by default True. + + Returns + ------- + list[dict[str, Any]] + List of dictionaries containing the results of the pipeline for each slab. + """ ( bulk_relax_job_, ml_slab_adslab_relax_job_, @@ -337,25 +581,14 @@ def standard_ocp_dataset_enumeration( slabs = ocp_surface_generator(bulk_atoms=bulk_atoms, max_miller=max_miller) if reference_ml_energies_to_gas_phase: - ml_gas_references = { - "N2": ml_slab_adslab_relax_job_( - molecule_pbc("N2", vacuum=10), relax_cell=False - ), - "CO": ml_slab_adslab_relax_job_( - molecule_pbc("CO", vacuum=10), relax_cell=False - ), - "H2": ml_slab_adslab_relax_job_( - molecule_pbc("H2", vacuum=10), relax_cell=False - ), - "H2O": ml_slab_adslab_relax_job_( - molecule_pbc("H2O", vacuum=10), relax_cell=False - ), - } + molecule_results = generate_molecule_reference_results( + ml_slab_adslab_relax_job_ + ) else: - ml_gas_references = None + molecule_results = None @flow - def adsorbML_each_surface(slabs, **kwargs): + def adsorbML_each_surface(slabs: list[Slab], **kwargs: Any) -> list[dict[str, Any]]: return [ # We strip the decorator here so it's a bunch of jobs, not subflows. Helpful for prefect! strip_decorator(adsorb_ml_pipeline)(slab=slab, **kwargs) @@ -371,6 +604,6 @@ def adsorbML_each_surface(slabs, **kwargs): adslab_validate_job=adslab_validate_job_, gas_validate_job=gas_validate_job_, num_to_validate_with_DFT=num_to_validate_with_DFT, - ml_gas_references=ml_gas_references, + molecule_results=molecule_results, reference_ml_energies_to_gas_phase=reference_ml_energies_to_gas_phase, ) From 805874cea562b4e87fc48a8f9a1b17c38fb77ea1 Mon Sep 17 00:00:00 2001 From: Zack Ulissi Date: Fri, 21 Mar 2025 22:49:44 +0000 Subject: [PATCH 04/19] small fixes --- src/quacc/recipes/common/adsorbml.py | 48 +++++++++++++++++++--------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py index e415109c1a..5ec21a6394 100644 --- a/src/quacc/recipes/common/adsorbml.py +++ b/src/quacc/recipes/common/adsorbml.py @@ -395,8 +395,9 @@ def reference_adslab_energies( List of adsorbate-slab results with referenced energies. """ adslab_results = copy.deepcopy(adslab_results) - if atomic_energies is None and molecule_results is not None: - atomic_energies = { + if atomic_energies is None: + if molecule_results is not None: + atomic_energies = { "H": molecule_results["H2"]["results"]["energy"] / 2, "N": molecule_results["N2"]["results"]["energy"] / 2, "O": ( @@ -409,10 +410,11 @@ def reference_adslab_energies( - molecule_results["H2"]["results"]["energy"] ), } - else: - raise Exception( + else: + raise Exception( "Missing atomic energies and gas phase energies; unable to continue!" ) + slab_energy = slab_result["results"]["energy"] @@ -503,18 +505,32 @@ def bulk_to_surfaces_to_adsorbml( slab_validate_job: Job, adslab_validate_job: Job, gas_validate_job: Job, - bulk_relax_job: Job | None, + max_miller: int = 1, + bulk_relax_job: Job | None = None, job_params: dict[str, dict[str, Any]] | None = None, job_decorators: dict[str, dict[str, Any]] | None = None, - max_miller: int = 1, num_to_validate_with_DFT: int = 0, reference_ml_energies_to_gas_phase: bool = True, relax_bulk: bool = True, + atomic_reference_energies: dict[str, float] | None = None, ) -> list[dict[str, Any]]: """ - Run a pipeline from bulk atoms to adsorbate-slab configurations using machine learning. - - Parameters + Run a pipeline from bulk atoms to adsorbate-slab configurations using machine learning! + For full details, see the AdsorbML paper (https://arxiv.org/abs/2211.16486, + https://www.nature.com/articles/s41524-023-01121-5). + + 1. Relax bulk structure if desired + 2. Generate surface slabs from bulk atoms + 3. Generate gas phase reference energies if needed + + For each slab generated in (3): + 1. Generate trial adsorbate-slab configurations + 2. Relax slab and adsorbate-slab configurations using ML + 3. Validate slab and adsorbate-slab configurations (check for anomalies like dissociations)) + 4. Reference the energies to gas phase if needed (eg using a total energy ML model) + 5. Optionally validate top K configurations with DFT single-points or relaxations + + Parameters ---------- bulk_atoms : Atoms The bulk atomic structure. @@ -530,14 +546,14 @@ def bulk_to_surfaces_to_adsorbml( Job for validating the adsorbate-slab structures. gas_validate_job : Job Job for validating gas phase structures. - bulk_relax_job : Job | None + max_miller : int, optional + Maximum Miller index, by default 1. + bulk_relax_job : Job | None, optional Job for relaxing the bulk structure, by default None. - job_params : dict[str, dict[str, Any]], optional + job_params : dict[str, dict[str, Any]] | None, optional Parameters for customizing jobs, by default None. - job_decorators : dict[str, dict[str, Any]], optional + job_decorators : dict[str, dict[str, Any]] | None, optional Decorators for customizing jobs, by default None. - max_miller : int, optional - Maximum Miller index, by default 1. num_to_validate_with_DFT : int, optional Number of top configurations to validate with DFT, by default 0. reference_ml_energies_to_gas_phase : bool, optional @@ -576,11 +592,12 @@ def bulk_to_surfaces_to_adsorbml( ) if relax_bulk: + bulk_atoms bulk_atoms = bulk_relax_job_(bulk_atoms, relax_cell=True)["atoms"] slabs = ocp_surface_generator(bulk_atoms=bulk_atoms, max_miller=max_miller) - if reference_ml_energies_to_gas_phase: + if reference_ml_energies_to_gas_phase and atomic_reference_energies is not None: molecule_results = generate_molecule_reference_results( ml_slab_adslab_relax_job_ ) @@ -606,4 +623,5 @@ def adsorbML_each_surface(slabs: list[Slab], **kwargs: Any) -> list[dict[str, An num_to_validate_with_DFT=num_to_validate_with_DFT, molecule_results=molecule_results, reference_ml_energies_to_gas_phase=reference_ml_energies_to_gas_phase, + atomic_reference_energies=atomic_reference_energies ) From 3c3118e74ed3bc9606c39ca22cf4eed54de2a7f3 Mon Sep 17 00:00:00 2001 From: Zack Ulissi Date: Fri, 21 Mar 2025 23:38:32 +0000 Subject: [PATCH 05/19] small changes to the docstrings --- src/quacc/recipes/common/adsorbml.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py index 5ec21a6394..e1fbd0712c 100644 --- a/src/quacc/recipes/common/adsorbml.py +++ b/src/quacc/recipes/common/adsorbml.py @@ -29,9 +29,7 @@ class AdsorbatesKwargs(TypedDict): position: list[float] orientation: list[float] -class MultipleAdsorbateSlabConfigKwargs(TypedDict): - num_adsorbates: int - adsorbate_distance: float + class MoleculeResults(TypedDict): N2: RunSchema @@ -113,7 +111,7 @@ def __init__( def ocp_adslab_generator( slab: Slab | Atoms, adsorbates_kwargs: list[AdsorbatesKwargs] | None = None, - multiple_adsorbate_slab_config_kwargs: MultipleAdsorbateSlabConfigKwargs | None = None, + multiple_adsorbate_slab_config_kwargs: dict[str,Any] | None = None, ) -> list[Atoms]: """ Generate adsorbate-slab configurations. @@ -124,7 +122,7 @@ def ocp_adslab_generator( The slab structure. adsorbates_kwargs : list[AdsorbatesKwargs], optional List of keyword arguments for generating adsorbates, by default None. - multiple_adsorbate_slab_config_kwargs : MultipleAdsorbateSlabConfigKwargs, optional + multiple_adsorbate_slab_config_kwargs : dict[str,Any], optional Keyword arguments for generating multiple adsorbate-slab configurations, by default None. Returns @@ -160,7 +158,7 @@ def ocp_adslab_generator( def find_adslabs_each_slab( slabs: list[Slab], adsorbates_kwargs: AdsorbatesKwargs, - multiple_adsorbate_slab_config_kwargs: MultipleAdsorbateSlabConfigKwargs | None = None, + multiple_adsorbate_slab_config_kwargs: dict[str,Any] | None = None, ) -> list[dict[str, Any]]: """ Find adsorbate-slab configurations for each slab. @@ -171,7 +169,7 @@ def find_adslabs_each_slab( List of slabs. adsorbates_kwargs : AdsorbatesKwargs Keyword arguments for generating adsorbates. - multiple_adsorbate_slab_config_kwargs : MultipleAdsorbateSlabConfigKwargs, optional + multiple_adsorbate_slab_config_kwargs : dict[str,Any], optional Keyword arguments for generating multiple adsorbate-slab configurations, by default None. Returns @@ -267,7 +265,7 @@ def filter_sort_select_adslabs( def adsorb_ml_pipeline( slab: Slab, adsorbates_kwargs: AdsorbatesKwargs, - multiple_adsorbate_slab_config_kwargs: MultipleAdsorbateSlabConfigKwargs, + multiple_adsorbate_slab_config_kwargs: dict[str, Any], ml_slab_adslab_relax_job: Job, slab_validate_job: Job, adslab_validate_job: Job, @@ -286,7 +284,7 @@ def adsorb_ml_pipeline( The slab structure to which adsorbates will be added. adsorbates_kwargs : AdsorbatesKwargs Keyword arguments for generating adsorbate configurations. - multiple_adsorbate_slab_config_kwargs : MultipleAdsorbateSlabConfigKwargs + multiple_adsorbate_slab_config_kwargs : dict[str, Any] Keyword arguments for generating multiple adsorbate-slab configurations. ml_slab_adslab_relax_job : Job Job for relaxing slab and adsorbate-slab configurations using ML. @@ -500,7 +498,7 @@ def generate_molecule_reference_results(relax_job: Job) -> MoleculeResults: def bulk_to_surfaces_to_adsorbml( bulk_atoms: Atoms, adsorbates_kwargs: AdsorbatesKwargs, - multiple_adsorbate_slab_config_kwargs: MultipleAdsorbateSlabConfigKwargs, + multiple_adsorbate_slab_config_kwargs: dict[str, Any], ml_relax_job: Job, slab_validate_job: Job, adslab_validate_job: Job, @@ -530,13 +528,13 @@ def bulk_to_surfaces_to_adsorbml( 4. Reference the energies to gas phase if needed (eg using a total energy ML model) 5. Optionally validate top K configurations with DFT single-points or relaxations - Parameters + Parameters ---------- bulk_atoms : Atoms The bulk atomic structure. adsorbates_kwargs : AdsorbatesKwargs Keyword arguments for generating adsorbate configurations. - multiple_adsorbate_slab_config_kwargs : MultipleAdsorbateSlabConfigKwargs + multiple_adsorbate_slab_config_kwargs : dict[str, Any] Keyword arguments for generating multiple adsorbate-slab configurations. ml_relax_job : Job Job for relaxing slab and adsorbate-slab configurations using ML. @@ -560,6 +558,8 @@ def bulk_to_surfaces_to_adsorbml( Whether to reference ML energies to gas phase, by default True. relax_bulk : bool, optional Whether to relax the bulk structure, by default True. + atomic_reference_energies : dict[str, float] | None, optional + Atomic reference energies for referencing if known ahead of time, by default None. Returns ------- From f4b653f386eb3280d1072055ee0dc5867bbe3c34 Mon Sep 17 00:00:00 2001 From: Zack Ulissi Date: Wed, 26 Mar 2025 00:27:34 +0000 Subject: [PATCH 06/19] small fixes, and move slab relaxation first --- src/quacc/recipes/common/adsorbml.py | 83 +++++++++++++++++++--------- 1 file changed, 56 insertions(+), 27 deletions(-) diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py index e1fbd0712c..f564b6e1b2 100644 --- a/src/quacc/recipes/common/adsorbml.py +++ b/src/quacc/recipes/common/adsorbml.py @@ -20,7 +20,7 @@ from quacc.wflow_tools.customizers import customize_funcs, strip_decorator if TYPE_CHECKING: - from quacc.types import OptSchema, RunSchema + from quacc.types import RunSchema logger = logging.getLogger(__name__) @@ -70,7 +70,7 @@ class CustomSlab(Slab): def __init__( self, - slab_atoms: Atoms, + atoms: Atoms, bulk: Atoms | None = None, millers: tuple | None = None, shift: float | None = None, @@ -82,7 +82,7 @@ def __init__( Parameters ---------- - slab_atoms : Atoms + atoms : Atoms The slab atomic structure. bulk : Atoms, optional The bulk atomic structure, by default None. @@ -96,15 +96,15 @@ def __init__( Minimum a and b lattice parameters, by default 0.8. """ self.bulk = bulk - self.slab_atoms = slab_atoms + self.atoms = atoms self.millers = millers self.shift = shift self.top = top - assert np.linalg.norm(self.slab_atoms.cell[0]) >= min_ab, "Slab not tiled" - assert np.linalg.norm(self.slab_atoms.cell[1]) >= min_ab, "Slab not tiled" + assert np.linalg.norm(self.atoms.cell[0]) >= min_ab, f"Slab not tiled, you need to repeat it to at least {min_ab}" + assert np.linalg.norm(self.atoms.cell[1]) >= min_ab, f"Slab not tiled, you need to repeat it to at least {min_ab}" assert self.has_surface_tagged(), "Slab not tagged" - assert len(self.slab_atoms.constraints) > 0, "Sub-surface atoms not constrained" + assert len(self.atoms.constraints) > 0, "Sub-surface atoms not constrained" @job @@ -136,9 +136,9 @@ def ocp_adslab_generator( if isinstance(slab, Atoms): try: - slab = CustomSlab(slab_atoms=slab) + slab = CustomSlab(atoms=slab) except AssertionError: - slab = CustomSlab(slab_atoms=tile_and_tag_atoms(slab)) + slab = CustomSlab(atoms=tile_and_tag_atoms(slab)) logger.warning( "The slab was not tagged and/or tiled. " "We did the best we could, but you should be careful and check the results!" @@ -151,6 +151,10 @@ def ocp_adslab_generator( copy.deepcopy(slab), adsorbates, **multiple_adsorbate_slab_config_kwargs ) + atoms_list = adslabs.atoms_list + for atoms in atoms_list: + atoms.pbc = True + return adslabs.atoms_list @@ -278,6 +282,13 @@ def adsorb_ml_pipeline( """ Run a machine learning-based pipeline for adsorbate-slab systems. + 1. Relax slab using ML + 2. Generate trial adsorbate-slab configurations for the relaxed slab + 3. Relax adsorbate-slab configurations using ML + 4. Validate slab and adsorbate-slab configurations (check for anomalies like dissociations)) + 5. Reference the energies to gas phase if needed (eg using a total energy ML model) + 6. Optionally validate top K configurations with DFT single-points or relaxations + Parameters ---------- slab : Slab @@ -309,8 +320,12 @@ def adsorb_ml_pipeline( Dictionary containing the slab, ML-relaxed adsorbate-slab configurations, detected anomalies, and optionally DFT-validated structures. """ + + slab.atoms.pbc=True + ml_relaxed_slab_result = ml_slab_adslab_relax_job(slab.atoms) + unrelaxed_adslab_configurations = ocp_adslab_generator( - slab, adsorbates_kwargs, multiple_adsorbate_slab_config_kwargs + ml_relaxed_slab_result["atoms"], adsorbates_kwargs, multiple_adsorbate_slab_config_kwargs ) ml_relaxed_configurations = [ @@ -318,7 +333,7 @@ def adsorb_ml_pipeline( for adslab_configuration in unrelaxed_adslab_configurations ] - ml_relaxed_slab_result = ml_slab_adslab_relax_job(slab.atoms) + if reference_ml_energies_to_gas_phase: if atomic_reference_energies is None and molecule_results is None: @@ -352,18 +367,32 @@ def adsorb_ml_pipeline( "adslab_anomalies": adslab_anomalies_list, } else: + dft_validated_adslabs = [ + adslab_validate_job(top_candidates[i]["atoms"], relax_cell=False) + for i in range(num_to_validate_with_DFT) + ] + + dft_validated_slab = slab_validate_job(slab.atoms, relax_cell=False) + + if reference_ml_energies_to_gas_phase: + if atomic_reference_energies is None and molecule_results is None: + molecule_results = generate_molecule_reference_results( + gas_validate_job + ) + + dft_validated_adslabs = reference_adslab_energies( + dft_validated_adslabs, + dft_validated_slab, + atomic_energies=atomic_reference_energies, + molecule_results=molecule_results, + ) + return { "slab": slab, "adslab_ml_relaxed_configurations": top_candidates, "adslab_anomalies": adslab_anomalies_list, - "validated_structures": { - "validated_adslabs": [ - adslab_validate_job(top_candidates[i]["atoms"]) - for i in range(num_to_validate_with_DFT) - ], - "slab_validated": slab_validate_job(slab.atoms, relax_cell=False), - }, - } + "validated_structures": {"slab": dft_validated_slab, "adslabs": dft_validated_adslabs}} + @job @@ -412,7 +441,7 @@ def reference_adslab_energies( raise Exception( "Missing atomic energies and gas phase energies; unable to continue!" ) - + slab_energy = slab_result["results"]["energy"] @@ -514,7 +543,7 @@ def bulk_to_surfaces_to_adsorbml( ) -> list[dict[str, Any]]: """ Run a pipeline from bulk atoms to adsorbate-slab configurations using machine learning! - For full details, see the AdsorbML paper (https://arxiv.org/abs/2211.16486, + For full details, see the AdsorbML paper (https://arxiv.org/abs/2211.16486, https://www.nature.com/articles/s41524-023-01121-5). 1. Relax bulk structure if desired @@ -522,11 +551,12 @@ def bulk_to_surfaces_to_adsorbml( 3. Generate gas phase reference energies if needed For each slab generated in (3): - 1. Generate trial adsorbate-slab configurations - 2. Relax slab and adsorbate-slab configurations using ML - 3. Validate slab and adsorbate-slab configurations (check for anomalies like dissociations)) - 4. Reference the energies to gas phase if needed (eg using a total energy ML model) - 5. Optionally validate top K configurations with DFT single-points or relaxations + 1. Relax slab using ML + 2. Generate trial adsorbate-slab configurations for the relaxed slab + 3. Relax adsorbate-slab configurations using ML + 4. Validate slab and adsorbate-slab configurations (check for anomalies like dissociations)) + 5. Reference the energies to gas phase if needed (eg using a total energy ML model) + 6. Optionally validate top K configurations with DFT single-points or relaxations Parameters ---------- @@ -592,7 +622,6 @@ def bulk_to_surfaces_to_adsorbml( ) if relax_bulk: - bulk_atoms bulk_atoms = bulk_relax_job_(bulk_atoms, relax_cell=True)["atoms"] slabs = ocp_surface_generator(bulk_atoms=bulk_atoms, max_miller=max_miller) From 04cb215c6e232a93774f4c0d7e52377f023dbd85 Mon Sep 17 00:00:00 2001 From: Zack Ulissi Date: Wed, 26 Mar 2025 14:15:42 +0000 Subject: [PATCH 07/19] many fixes --- pyproject.toml | 2 +- src/quacc/recipes/common/adsorbml.py | 124 ++++------ src/quacc/recipes/mlp/adsorbml.py | 234 ++++++++++++++++++ src/quacc/types.py | 33 +++ .../core/recipes/mlp_recipes/test_adsorbml.py | 146 +++++++++++ .../recipes/mlp_recipes/test_core_recipes.py | 6 +- .../mlp_recipes/test_elastic_recipes.py | 2 +- tests/requirements-mlp1.txt | 1 + tests/requirements-mlp2.txt | 1 + 9 files changed, 462 insertions(+), 87 deletions(-) create mode 100644 src/quacc/recipes/mlp/adsorbml.py create mode 100644 tests/core/recipes/mlp_recipes/test_adsorbml.py diff --git a/pyproject.toml b/pyproject.toml index 91b2f348cf..c16566fd01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,7 @@ mlp1 = ["chgnet>=0.3.3", "torch-dftd>=0.4.0", "sevenn>=0.10.1", "orb-models>=0. mlp2 = ["mace-torch>=0.3.3", "matgl>=1.1.2"] mp = ["atomate2>=0.0.14"] newtonnet = ["newtonnet>=1.1"] -ocp = ["fairchem-data-oc>=0.2.0"] +ocdata = ["fairchem-data-oc>=0.2.0"] parsl = ["parsl[monitoring]>=2024.5.27; platform_system!='Windows'"] phonons = ["phonopy>=2.20.0", "seekpath>=2.1.0"] prefect = ["prefect[dask]>=2.19.0", "dask-jobqueue>=0.8.2"] diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py index f564b6e1b2..c98dd92544 100644 --- a/src/quacc/recipes/common/adsorbml.py +++ b/src/quacc/recipes/common/adsorbml.py @@ -2,7 +2,7 @@ import copy import logging -from typing import TYPE_CHECKING, Any, Literal, TypedDict +from typing import TYPE_CHECKING, Any, Literal import numpy as np from ase.atoms import Atoms @@ -17,31 +17,22 @@ from quacc import Job, flow, job from quacc.utils.dicts import recursive_dict_merge -from quacc.wflow_tools.customizers import customize_funcs, strip_decorator +from quacc.wflow_tools.customizers import strip_decorator if TYPE_CHECKING: - from quacc.types import RunSchema + from quacc.types import ( + AdsorbMLSchema, + AtomicReferenceEnergies, + MoleculeReferenceResults, + OptSchema, + RunSchema, + ) logger = logging.getLogger(__name__) -class AdsorbatesKwargs(TypedDict): - adsorbate_type: str - position: list[float] - orientation: list[float] - -class MoleculeResults(TypedDict): - N2: RunSchema - CO: RunSchema - H2: RunSchema - H2O: RunSchema -class AtomicReferenceEnergies(TypedDict): - H: float - N: float - O: float - C: float @job def ocp_surface_generator(bulk_atoms: Atoms, max_miller: int = 1) -> list[Slab]: @@ -110,7 +101,7 @@ def __init__( @job def ocp_adslab_generator( slab: Slab | Atoms, - adsorbates_kwargs: list[AdsorbatesKwargs] | None = None, + adsorbates_kwargs: list[dict[str,Any]] | None = None, multiple_adsorbate_slab_config_kwargs: dict[str,Any] | None = None, ) -> list[Atoms]: """ @@ -120,7 +111,7 @@ def ocp_adslab_generator( ---------- slab : Slab | Atoms The slab structure. - adsorbates_kwargs : list[AdsorbatesKwargs], optional + adsorbates_kwargs : list[dict[str,Any]], optional List of keyword arguments for generating adsorbates, by default None. multiple_adsorbate_slab_config_kwargs : dict[str,Any], optional Keyword arguments for generating multiple adsorbate-slab configurations, by default None. @@ -161,9 +152,9 @@ def ocp_adslab_generator( @flow def find_adslabs_each_slab( slabs: list[Slab], - adsorbates_kwargs: AdsorbatesKwargs, + adsorbates_kwargs: dict[str,Any], multiple_adsorbate_slab_config_kwargs: dict[str,Any] | None = None, -) -> list[dict[str, Any]]: +) -> list[dict[str, Slab | list[Atoms]]]: """ Find adsorbate-slab configurations for each slab. @@ -234,21 +225,21 @@ def detect_anomaly( @job def filter_sort_select_adslabs( - adslab_results: list[RunSchema], adslab_anomalies_list: list[list[str]] -) -> list[RunSchema]: + adslab_results: list[OptSchema], adslab_anomalies_list: list[list[str]] +) -> list[OptSchema]: """ Filter, sort, and select adsorbate-slab configurations based on anomalies and energy. Parameters ---------- - adslab_results : list[RunSchema] + adslab_results : list[OptSchema] List of adsorbate-slab results. adslab_anomalies_list : list[list[str]] List of detected anomalies for each adsorbate-slab configuration. Returns ------- - list[RunSchema] + list[OptSchema] Sorted list of adsorbate-slab configurations without anomalies. """ for adslab_result, adslab_anomalies in zip( @@ -268,7 +259,7 @@ def filter_sort_select_adslabs( @flow def adsorb_ml_pipeline( slab: Slab, - adsorbates_kwargs: AdsorbatesKwargs, + adsorbates_kwargs: dict[str,Any], multiple_adsorbate_slab_config_kwargs: dict[str, Any], ml_slab_adslab_relax_job: Job, slab_validate_job: Job, @@ -276,9 +267,9 @@ def adsorb_ml_pipeline( gas_validate_job: Job, num_to_validate_with_DFT: int = 0, reference_ml_energies_to_gas_phase: bool = False, - molecule_results: MoleculeResults | None = None, + molecule_results: MoleculeReferenceResults | None = None, atomic_reference_energies: AtomicReferenceEnergies | None = None, -) -> dict[str, Any]: +) -> AdsorbMLSchema: """ Run a machine learning-based pipeline for adsorbate-slab systems. @@ -293,7 +284,7 @@ def adsorb_ml_pipeline( ---------- slab : Slab The slab structure to which adsorbates will be added. - adsorbates_kwargs : AdsorbatesKwargs + adsorbates_kwargs : dict[str,Any] Keyword arguments for generating adsorbate configurations. multiple_adsorbate_slab_config_kwargs : dict[str, Any] Keyword arguments for generating multiple adsorbate-slab configurations. @@ -309,7 +300,7 @@ def adsorb_ml_pipeline( Number of top configurations to validate with DFT, by default 0. reference_ml_energies_to_gas_phase : bool, optional Whether to reference ML energies to gas phase, by default False. - molecule_results : MoleculeResults, optional + molecule_results : MoleculeReferenceResults, optional Precomputed molecule results for referencing, by default None. atomic_reference_energies : AtomicReferenceEnergies, optional Atomic reference energies for referencing, by default None. @@ -333,8 +324,6 @@ def adsorb_ml_pipeline( for adslab_configuration in unrelaxed_adslab_configurations ] - - if reference_ml_energies_to_gas_phase: if atomic_reference_energies is None and molecule_results is None: molecule_results = generate_molecule_reference_results( @@ -362,8 +351,8 @@ def adsorb_ml_pipeline( if num_to_validate_with_DFT == 0: return { - "slab": slab, - "adslab_ml_relaxed_configurations": top_candidates, + "slab": slab.get_metadata_dict(), + "adslabs": top_candidates, "adslab_anomalies": adslab_anomalies_list, } else: @@ -388,8 +377,8 @@ def adsorb_ml_pipeline( ) return { - "slab": slab, - "adslab_ml_relaxed_configurations": top_candidates, + "slab": slab.get_metadata_dict(), + "adslabs": top_candidates, "adslab_anomalies": adslab_anomalies_list, "validated_structures": {"slab": dft_validated_slab, "adslabs": dft_validated_adslabs}} @@ -397,11 +386,11 @@ def adsorb_ml_pipeline( @job def reference_adslab_energies( - adslab_results: list[dict[str, Any]], + adslab_results: list[OptSchema], slab_result: RunSchema, atomic_energies: AtomicReferenceEnergies | None, - molecule_results: MoleculeResults | None, -) -> list[dict[str, Any]]: + molecule_results: MoleculeReferenceResults | None, +) -> list[OptSchema]: """ Reference adsorbate-slab energies to atomic and slab energies. @@ -413,7 +402,7 @@ def reference_adslab_energies( Result of the slab calculation. atomic_energies : AtomicReferenceEnergies | None Dictionary of atomic energies. - molecule_results : MoleculeResults | None + molecule_results : MoleculeReferenceResults | None Dictionary of molecule results. Returns @@ -501,7 +490,7 @@ def molecule_pbc(*args: Any, **molecule_kwargs: Any) -> Atoms: return atoms -def generate_molecule_reference_results(relax_job: Job) -> MoleculeResults: +def generate_molecule_reference_results(relax_job: Job) -> MoleculeReferenceResults: """ Generate reference results for molecules. @@ -512,7 +501,7 @@ def generate_molecule_reference_results(relax_job: Job) -> MoleculeResults: Returns ------- - MoleculeResults + MoleculeReferenceResults Dictionary of reference results for molecules. """ return { @@ -526,7 +515,7 @@ def generate_molecule_reference_results(relax_job: Job) -> MoleculeResults: @flow def bulk_to_surfaces_to_adsorbml( bulk_atoms: Atoms, - adsorbates_kwargs: AdsorbatesKwargs, + adsorbates_kwargs: dict[str,Any], multiple_adsorbate_slab_config_kwargs: dict[str, Any], ml_relax_job: Job, slab_validate_job: Job, @@ -534,8 +523,7 @@ def bulk_to_surfaces_to_adsorbml( gas_validate_job: Job, max_miller: int = 1, bulk_relax_job: Job | None = None, - job_params: dict[str, dict[str, Any]] | None = None, - job_decorators: dict[str, dict[str, Any]] | None = None, + num_to_validate_with_DFT: int = 0, reference_ml_energies_to_gas_phase: bool = True, relax_bulk: bool = True, @@ -578,10 +566,6 @@ def bulk_to_surfaces_to_adsorbml( Maximum Miller index, by default 1. bulk_relax_job : Job | None, optional Job for relaxing the bulk structure, by default None. - job_params : dict[str, dict[str, Any]] | None, optional - Parameters for customizing jobs, by default None. - job_decorators : dict[str, dict[str, Any]] | None, optional - Decorators for customizing jobs, by default None. num_to_validate_with_DFT : int, optional Number of top configurations to validate with DFT, by default 0. reference_ml_energies_to_gas_phase : bool, optional @@ -593,42 +577,18 @@ def bulk_to_surfaces_to_adsorbml( Returns ------- - list[dict[str, Any]] - List of dictionaries containing the results of the pipeline for each slab. + list[AdsorbMLSchema] + List of AdsorbML results for each slab """ - ( - bulk_relax_job_, - ml_slab_adslab_relax_job_, - slab_validate_job_, - adslab_validate_job_, - gas_validate_job_, - ) = customize_funcs( - [ - "bulk_relax_job", - "ml_slab_adslab_relax_job", - "slab_validate_job", - "adslab_validate_job", - "gas_validate_job", - ], - [ - bulk_relax_job, # type: ignore - ml_relax_job, - slab_validate_job, - adslab_validate_job, - gas_validate_job, - ], - param_swaps=job_params, - decorators=job_decorators, # type: ignore - ) if relax_bulk: - bulk_atoms = bulk_relax_job_(bulk_atoms, relax_cell=True)["atoms"] + bulk_atoms = bulk_relax_job(bulk_atoms, relax_cell=True)["atoms"] slabs = ocp_surface_generator(bulk_atoms=bulk_atoms, max_miller=max_miller) if reference_ml_energies_to_gas_phase and atomic_reference_energies is not None: molecule_results = generate_molecule_reference_results( - ml_slab_adslab_relax_job_ + ml_relax_job ) else: molecule_results = None @@ -645,10 +605,10 @@ def adsorbML_each_surface(slabs: list[Slab], **kwargs: Any) -> list[dict[str, An slabs=slabs, adsorbates_kwargs=adsorbates_kwargs, multiple_adsorbate_slab_config_kwargs=multiple_adsorbate_slab_config_kwargs, - ml_slab_adslab_relax_job=ml_slab_adslab_relax_job_, - slab_validate_job=slab_validate_job_, - adslab_validate_job=adslab_validate_job_, - gas_validate_job=gas_validate_job_, + ml_slab_adslab_relax_job=ml_relax_job, + slab_validate_job=slab_validate_job, + adslab_validate_job=adslab_validate_job, + gas_validate_job=gas_validate_job, num_to_validate_with_DFT=num_to_validate_with_DFT, molecule_results=molecule_results, reference_ml_energies_to_gas_phase=reference_ml_energies_to_gas_phase, diff --git a/src/quacc/recipes/mlp/adsorbml.py b/src/quacc/recipes/mlp/adsorbml.py new file mode 100644 index 0000000000..83bf378685 --- /dev/null +++ b/src/quacc/recipes/mlp/adsorbml.py @@ -0,0 +1,234 @@ +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Any + +from quacc import Job, flow +from quacc.recipes.common.adsorbml import ( + adsorb_ml_pipeline as adsorb_ml_pipeline_, + bulk_to_surfaces_to_adsorbml as bulk_to_surfaces_to_adsorbml_, +) +from quacc.wflow_tools.customizers import customize_funcs + +if TYPE_CHECKING: + from ase.atoms import Atoms + from fairchem.data.oc.core.slab import Slab + + from quacc.types import ( + AdsorbMLSchema, + AtomicReferenceEnergies, + MoleculeReferenceResults, + ) + + + +logger = logging.getLogger(__name__) + + +@flow +def adsorb_ml_pipeline( + slab: Slab, + adsorbates_kwargs: dict[str, Any], + multiple_adsorbate_slab_config_kwargs: dict[str, Any], + ml_slab_adslab_relax_job: Job, + slab_validate_job: Job, + adslab_validate_job: Job, + gas_validate_job: Job, + num_to_validate_with_DFT: int = 0, + reference_ml_energies_to_gas_phase: bool = False, + molecule_results: MoleculeReferenceResults | None = None, + atomic_reference_energies: AtomicReferenceEnergies | None = None, + job_params: dict[str, dict[str, Any]] | None = None, + job_decorators: dict[str, dict[str, Any]] | None = None, +) -> AdsorbMLSchema: + """ + Run a machine learning-based pipeline for adsorbate-slab systems. + + 1. Relax slab using ML + 2. Generate trial adsorbate-slab configurations for the relaxed slab + 3. Relax adsorbate-slab configurations using ML + 4. Validate slab and adsorbate-slab configurations (check for anomalies like dissociations)) + 5. Reference the energies to gas phase if needed (eg using a total energy ML model) + 6. Optionally validate top K configurations with DFT single-points or relaxations + + Parameters + ---------- + slab : Slab + The slab structure to which adsorbates will be added. + adsorbates_kwargs : dict[str, Any] + Keyword arguments for generating adsorbate configurations. + multiple_adsorbate_slab_config_kwargs : dict[str, Any] + Keyword arguments for generating multiple adsorbate-slab configurations. + ml_slab_adslab_relax_job : Job + Job for relaxing slab and adsorbate-slab configurations using ML. + slab_validate_job : Job + Job for validating the slab structure. + adslab_validate_job : Job + Job for validating the adsorbate-slab structures. + gas_validate_job : Job + Job for validating gas phase structures. + num_to_validate_with_DFT : int, optional + Number of top configurations to validate with DFT, by default 0. + reference_ml_energies_to_gas_phase : bool, optional + Whether to reference ML energies to gas phase, by default False. + molecule_results : MoleculeReferenceResults, optional + Precomputed molecule results for referencing, by default None. + atomic_reference_energies : AtomicReferenceEnergies, optional + Atomic reference energies for referencing, by default None. + + Returns + ------- + dict + Dictionary containing the slab, ML-relaxed adsorbate-slab configurations, + detected anomalies, and optionally DFT-validated structures. + """ + + ( + ml_slab_adslab_relax_job_, + slab_validate_job_, + adslab_validate_job_, + gas_validate_job_, + ) = customize_funcs( + [ + "ml_slab_adslab_relax_job", + "slab_validate_job", + "adslab_validate_job", + "gas_validate_job", + ], + [ + ml_slab_adslab_relax_job, + slab_validate_job, + adslab_validate_job, + gas_validate_job, + ], + param_swaps=job_params, + decorators=job_decorators, # type: ignore + ) + + return adsorb_ml_pipeline_( + slab, + adsorbates_kwargs, + multiple_adsorbate_slab_config_kwargs, + ml_slab_adslab_relax_job_, + slab_validate_job_, + adslab_validate_job_, + gas_validate_job_, + num_to_validate_with_DFT, + reference_ml_energies_to_gas_phase, + molecule_results, + atomic_reference_energies, + ) + + +@flow +def bulk_to_surfaces_to_adsorbml( + bulk_atoms: Atoms, + adsorbates_kwargs: dict[str,Any], + multiple_adsorbate_slab_config_kwargs: dict[str, Any], + ml_relax_job: Job, + slab_validate_job: Job, + adslab_validate_job: Job, + gas_validate_job: Job, + max_miller: int = 1, + bulk_relax_job: Job | None = None, + job_params: dict[str, dict[str, Any]] | None = None, + job_decorators: dict[str, dict[str, Any]] | None = None, + num_to_validate_with_DFT: int = 0, + reference_ml_energies_to_gas_phase: bool = True, + relax_bulk: bool = True, + atomic_reference_energies: dict[str, float] | None = None, +) -> list[dict[str, Any]]: + """ + Run a pipeline from bulk atoms to adsorbate-slab configurations using machine learning! + For full details, see the AdsorbML paper (https://arxiv.org/abs/2211.16486, + https://www.nature.com/articles/s41524-023-01121-5). + + 1. Relax bulk structure if desired + 2. Generate surface slabs from bulk atoms + 3. Generate gas phase reference energies if needed + + For each slab generated in (3): + 1. Relax slab using ML + 2. Generate trial adsorbate-slab configurations for the relaxed slab + 3. Relax adsorbate-slab configurations using ML + 4. Validate slab and adsorbate-slab configurations (check for anomalies like dissociations)) + 5. Reference the energies to gas phase if needed (eg using a total energy ML model) + 6. Optionally validate top K configurations with DFT single-points or relaxations + + Parameters + ---------- + bulk_atoms : Atoms + The bulk atomic structure. + adsorbates_kwargs : AdsorbatesKwargs + Keyword arguments for generating adsorbate configurations. + multiple_adsorbate_slab_config_kwargs : dict[str, Any] + Keyword arguments for generating multiple adsorbate-slab configurations. + ml_relax_job : Job + Job for relaxing slab and adsorbate-slab configurations using ML. + slab_validate_job : Job + Job for validating the slab structure. + adslab_validate_job : Job + Job for validating the adsorbate-slab structures. + gas_validate_job : Job + Job for validating gas phase structures. + max_miller : int, optional + Maximum Miller index, by default 1. + bulk_relax_job : Job | None, optional + Job for relaxing the bulk structure, by default None. + job_params : dict[str, dict[str, Any]] | None, optional + Parameters for customizing jobs, by default None. + job_decorators : dict[str, dict[str, Any]] | None, optional + Decorators for customizing jobs, by default None. + num_to_validate_with_DFT : int, optional + Number of top configurations to validate with DFT, by default 0. + reference_ml_energies_to_gas_phase : bool, optional + Whether to reference ML energies to gas phase, by default True. + relax_bulk : bool, optional + Whether to relax the bulk structure, by default True. + atomic_reference_energies : dict[str, float] | None, optional + Atomic reference energies for referencing if known ahead of time, by default None. + + Returns + ------- + list[AdsorbMLSchema] + List of AdsorbML results for each slab + """ + ( + bulk_relax_job_, + ml_relax_job_, + slab_validate_job_, + adslab_validate_job_, + gas_validate_job_, + ) = customize_funcs( + [ + "bulk_relax_job", + "ml_relax_job", + "slab_validate_job", + "adslab_validate_job", + "gas_validate_job", + ], + [ + bulk_relax_job, # type: ignore + ml_relax_job, + slab_validate_job, + adslab_validate_job, + gas_validate_job, + ], + param_swaps=job_params, + decorators=job_decorators, # type: ignore + ) + return bulk_to_surfaces_to_adsorbml_( + bulk_atoms, + adsorbates_kwargs, + multiple_adsorbate_slab_config_kwargs, + ml_relax_job_, + slab_validate_job_, + adslab_validate_job_, + gas_validate_job_, + max_miller, + bulk_relax_job_, + num_to_validate_with_DFT, + reference_ml_energies_to_gas_phase, + relax_bulk, + atomic_reference_energies, + ) diff --git a/src/quacc/types.py b/src/quacc/types.py index 335b176bfc..cd06566a6c 100644 --- a/src/quacc/types.py +++ b/src/quacc/types.py @@ -539,6 +539,39 @@ class ElasticSchema(TypedDict): undeformed_result: RunSchema | OptSchema elasticity_doc: ElasticityDoc + class SlabMetadata(TypedDict): + millers: tuple[int, int, int] + shift: tuple[float, float, float] + vacuum: float + surface_area: float + volume: float + + class Slab(TypedDict): + slab_atomsobject: Atoms + slab_metadata: SlabMetadata + + class AdsorbMLSchema(TypedDict): + slab: Slab + adslabs: list[OptSchema] + adslab_anomalies: list[Literal[ + "adsorbate_dissociated", + "adsorbate_desorbed", + "surface_changed", + "adsorbate_intercalated",]] + validated_structures: NotRequired[dict[str, Any]] + + class MoleculeReferenceResults(TypedDict): + N2: RunSchema + CO: RunSchema + H2: RunSchema + H2O: RunSchema + + class AtomicReferenceEnergies(TypedDict): + H: float + N: float + O: float + C: float + class VibThermoSchema(VibSchema, ThermoSchema): """Combined Vibrations and Thermo schema""" diff --git a/tests/core/recipes/mlp_recipes/test_adsorbml.py b/tests/core/recipes/mlp_recipes/test_adsorbml.py new file mode 100644 index 0000000000..9b16e07cd8 --- /dev/null +++ b/tests/core/recipes/mlp_recipes/test_adsorbml.py @@ -0,0 +1,146 @@ +from __future__ import annotations + +import numpy as np +import pytest +from ase.build import bulk +from quacc.recipes.mlp.core import relax_job + +from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml + +torch = pytest.importorskip("torch") + +from importlib.util import find_spec + +methods = [] +if has_mace := find_spec("mace"): + methods.append("mace-mp-0") + +if has_matgl := find_spec("matgl"): + methods.append("m3gnet") + +if has_chgnet := find_spec("chgnet"): + methods.append("chgnet") + +if has_sevennet := find_spec("sevenn"): + methods.append("sevennet") + +if has_orb := find_spec("orb_models"): + methods.append("orb") + +if find_spec("fairchem"): + from huggingface_hub.utils._auth import get_token + + if get_token(): + methods.append("fairchem") + + +def _set_dtype(size, type_="float"): + globals()[f"{type_}_th"] = getattr(torch, f"{type_}{size}") + globals()[f"{type_}_np"] = getattr(np, f"{type_}{size}") + torch.set_default_dtype(getattr(torch, f"float{size}")) + + +@pytest.mark.parametrize("method", methods) +def test_total_energy_adsorbml(tmp_path, monkeypatch, method): + monkeypatch.chdir(tmp_path) + + if method == "mace-mp-0": + _set_dtype(64) + else: + _set_dtype(32) + + if method == "fairchem": + calc_kwargs = { + "model_name": "EquiformerV2-31M-OMAT24-MP-sAlex", + "local_cache": "./fairchem_checkpoint_cache/", + "seed": 42, + } + else: + calc_kwargs = {} + + ref_CO_Cu111_adsorption_energy = { + "chgnet": -1.5, + "m3gnet": -0.5, + "mace-mp-0": -0.5, + "sevennet": -0.98, + "orb": -0.5, + "fairchem": -0.5, + } + + bulk_atoms = bulk("Cu") + + outputs = bulk_to_surfaces_to_adsorbml( + bulk_atoms, + adsorbates_kwargs=[{"adsorbate_smiles_from_db": "*CO"}], + multiple_adsorbate_slab_config_kwargs={"num_configurations": 1}, + ml_relax_job=relax_job, + slab_validate_job=relax_job, + adslab_validate_job=relax_job, + gas_validate_job=relax_job, + bulk_relax_job=relax_job, + job_params={ + "all": dict( + method=method, + **calc_kwargs, + opt_params={"max_steps": 500}, + ), + }, + max_miller=1, + num_to_validate_with_DFT=1, + reference_ml_energies_to_gas_phase=True, + relax_bulk=True, +) + index_111_facet = np.where([result["slab"]["slab_metadata"]['millers'] == (1, 1, 1) for result in outputs])[0][0] + + assert len(outputs) > 0 + assert outputs[index_111_facet]["adslabs"][0]["results"]["referenced_adsorption_energy"][ + "adsorption_energy" + ] == pytest.approx( + ref_CO_Cu111_adsorption_energy[method], abs=0.1 + ) + + + +@pytest.mark.parametrize("fairchem_checkpoint", ["EquiformerV2-31M-S2EF-OC20-All+MD"]) +def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint): + monkeypatch.chdir(tmp_path) + + calc_kwargs = { + "model_name": fairchem_checkpoint, + "local_cache": "./fairchem_checkpoint_cache/", + "seed": 42, + } + + ref_CO_Cu111_adsorption_energy = { + "EquiformerV2-31M-S2EF-OC20-All+MD": -0.5, + } + + bulk_atoms = bulk("Cu") + + outputs = bulk_to_surfaces_to_adsorbml( + bulk_atoms, + adsorbates_kwargs=[{"adsorbate_smiles_from_db": "*CO"}], + multiple_adsorbate_slab_config_kwargs={"num_configurations": 1}, + ml_relax_job=relax_job, + slab_validate_job=relax_job, + adslab_validate_job=relax_job, + gas_validate_job=relax_job, + bulk_relax_job=relax_job, + job_params={ + "all": dict( + method="fairchem", + **calc_kwargs, + ), + }, + max_miller=1, + num_to_validate_with_DFT=1, + reference_ml_energies_to_gas_phase=False, + relax_bulk=False, + ) + index_111_facet = np.where([result["slab"]["slab_metadata"]["millers"] == (1, 1, 1) for result in outputs])[0][0] + + assert len(outputs) > 0 + assert outputs[index_111_facet]["adslabs"][0]["results"]["energy"] == pytest.approx( + ref_CO_Cu111_adsorption_energy[fairchem_checkpoint], abs=0.1 + ) + \ No newline at end of file diff --git a/tests/core/recipes/mlp_recipes/test_core_recipes.py b/tests/core/recipes/mlp_recipes/test_core_recipes.py index 669c68cb8e..6d9d5b2879 100644 --- a/tests/core/recipes/mlp_recipes/test_core_recipes.py +++ b/tests/core/recipes/mlp_recipes/test_core_recipes.py @@ -59,7 +59,7 @@ def test_static_job(tmp_path, monkeypatch, method): if method == "fairchem": # Note that for this to work, you need HF_TOKEN env variable set! calc_kwargs = { - "model_name": "EquiformerV2-31M-OMAT24-mp-salex", + "model_name": "EquiformerV2-31M-OMAT24-MP-sAlex", "local_cache": "./fairchem_checkpoint_cache/", } else: @@ -117,7 +117,7 @@ def test_relax_job(tmp_path, monkeypatch, method): if method == "fairchem": # Note that for this to work, you need HF_TOKEN env variable set! calc_kwargs = { - "model_name": "EquiformerV2-31M-OMAT24-mp-salex", + "model_name": "EquiformerV2-31M-OMAT24-MP-sAlex", "local_cache": "./fairchem_checkpoint_cache/", } else: @@ -169,7 +169,7 @@ def test_relax_cell_job(tmp_path, monkeypatch, method): if method == "fairchem": # Note that for this to work, you need HF_TOKEN env variable set! calc_kwargs = { - "model_name": "EquiformerV2-31M-OMAT24-mp-salex", + "model_name": "EquiformerV2-31M-OMAT24-MP-sAlex", "local_cache": "./fairchem_checkpoint_cache/", } else: diff --git a/tests/core/recipes/mlp_recipes/test_elastic_recipes.py b/tests/core/recipes/mlp_recipes/test_elastic_recipes.py index a1fb3c32eb..23509fee09 100644 --- a/tests/core/recipes/mlp_recipes/test_elastic_recipes.py +++ b/tests/core/recipes/mlp_recipes/test_elastic_recipes.py @@ -50,7 +50,7 @@ def test_elastic_jobs(tmp_path, monkeypatch, method): if method == "fairchem": calc_kwargs = { - "model_name": "EquiformerV2-31M-OMAT24-mp-salex", + "model_name": "EquiformerV2-31M-OMAT24-MP-sAlex", "local_cache": "./fairchem_checkpoint_cache/", } else: diff --git a/tests/requirements-mlp1.txt b/tests/requirements-mlp1.txt index 44dae38751..8ebcb8ab2a 100644 --- a/tests/requirements-mlp1.txt +++ b/tests/requirements-mlp1.txt @@ -10,3 +10,4 @@ torch_geometric==2.6.1 torch_scatter==2.1.2 torch_sparse==0.6.18 torch_cluster==1.6.3 +fairchem-data-oc==0.2.1 \ No newline at end of file diff --git a/tests/requirements-mlp2.txt b/tests/requirements-mlp2.txt index aa31143880..f12449c333 100644 --- a/tests/requirements-mlp2.txt +++ b/tests/requirements-mlp2.txt @@ -1,2 +1,3 @@ mace-torch==0.3.12 matgl @ git+https://github.com/materialsvirtuallab/matgl.git +fairchem-data-oc==0.2.1 \ No newline at end of file From 7c35be671afa48f74b3bc59acc5d8d0b7604d7f0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Mar 2025 14:24:58 +0000 Subject: [PATCH 08/19] pre-commit auto-fixes --- src/quacc/recipes/common/adsorbml.py | 82 +++++++++---------- src/quacc/recipes/mlp/adsorbml.py | 5 +- src/quacc/types.py | 13 +-- .../core/recipes/mlp_recipes/test_adsorbml.py | 67 +++++++-------- tests/requirements-mlp1.txt | 2 +- tests/requirements-mlp2.txt | 2 +- 6 files changed, 82 insertions(+), 89 deletions(-) diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py index c98dd92544..5115686a15 100644 --- a/src/quacc/recipes/common/adsorbml.py +++ b/src/quacc/recipes/common/adsorbml.py @@ -31,9 +31,6 @@ logger = logging.getLogger(__name__) - - - @job def ocp_surface_generator(bulk_atoms: Atoms, max_miller: int = 1) -> list[Slab]: """ @@ -92,8 +89,12 @@ def __init__( self.shift = shift self.top = top - assert np.linalg.norm(self.atoms.cell[0]) >= min_ab, f"Slab not tiled, you need to repeat it to at least {min_ab}" - assert np.linalg.norm(self.atoms.cell[1]) >= min_ab, f"Slab not tiled, you need to repeat it to at least {min_ab}" + assert np.linalg.norm(self.atoms.cell[0]) >= min_ab, ( + f"Slab not tiled, you need to repeat it to at least {min_ab}" + ) + assert np.linalg.norm(self.atoms.cell[1]) >= min_ab, ( + f"Slab not tiled, you need to repeat it to at least {min_ab}" + ) assert self.has_surface_tagged(), "Slab not tagged" assert len(self.atoms.constraints) > 0, "Sub-surface atoms not constrained" @@ -101,8 +102,8 @@ def __init__( @job def ocp_adslab_generator( slab: Slab | Atoms, - adsorbates_kwargs: list[dict[str,Any]] | None = None, - multiple_adsorbate_slab_config_kwargs: dict[str,Any] | None = None, + adsorbates_kwargs: list[dict[str, Any]] | None = None, + multiple_adsorbate_slab_config_kwargs: dict[str, Any] | None = None, ) -> list[Atoms]: """ Generate adsorbate-slab configurations. @@ -152,8 +153,8 @@ def ocp_adslab_generator( @flow def find_adslabs_each_slab( slabs: list[Slab], - adsorbates_kwargs: dict[str,Any], - multiple_adsorbate_slab_config_kwargs: dict[str,Any] | None = None, + adsorbates_kwargs: dict[str, Any], + multiple_adsorbate_slab_config_kwargs: dict[str, Any] | None = None, ) -> list[dict[str, Slab | list[Atoms]]]: """ Find adsorbate-slab configurations for each slab. @@ -259,7 +260,7 @@ def filter_sort_select_adslabs( @flow def adsorb_ml_pipeline( slab: Slab, - adsorbates_kwargs: dict[str,Any], + adsorbates_kwargs: dict[str, Any], multiple_adsorbate_slab_config_kwargs: dict[str, Any], ml_slab_adslab_relax_job: Job, slab_validate_job: Job, @@ -312,11 +313,13 @@ def adsorb_ml_pipeline( detected anomalies, and optionally DFT-validated structures. """ - slab.atoms.pbc=True + slab.atoms.pbc = True ml_relaxed_slab_result = ml_slab_adslab_relax_job(slab.atoms) unrelaxed_adslab_configurations = ocp_adslab_generator( - ml_relaxed_slab_result["atoms"], adsorbates_kwargs, multiple_adsorbate_slab_config_kwargs + ml_relaxed_slab_result["atoms"], + adsorbates_kwargs, + multiple_adsorbate_slab_config_kwargs, ) ml_relaxed_configurations = [ @@ -357,17 +360,15 @@ def adsorb_ml_pipeline( } else: dft_validated_adslabs = [ - adslab_validate_job(top_candidates[i]["atoms"], relax_cell=False) - for i in range(num_to_validate_with_DFT) - ] + adslab_validate_job(top_candidates[i]["atoms"], relax_cell=False) + for i in range(num_to_validate_with_DFT) + ] dft_validated_slab = slab_validate_job(slab.atoms, relax_cell=False) if reference_ml_energies_to_gas_phase: if atomic_reference_energies is None and molecule_results is None: - molecule_results = generate_molecule_reference_results( - gas_validate_job - ) + molecule_results = generate_molecule_reference_results(gas_validate_job) dft_validated_adslabs = reference_adslab_energies( dft_validated_adslabs, @@ -380,8 +381,11 @@ def adsorb_ml_pipeline( "slab": slab.get_metadata_dict(), "adslabs": top_candidates, "adslab_anomalies": adslab_anomalies_list, - "validated_structures": {"slab": dft_validated_slab, "adslabs": dft_validated_adslabs}} - + "validated_structures": { + "slab": dft_validated_slab, + "adslabs": dft_validated_adslabs, + }, + } @job @@ -414,23 +418,22 @@ def reference_adslab_energies( if atomic_energies is None: if molecule_results is not None: atomic_energies = { - "H": molecule_results["H2"]["results"]["energy"] / 2, - "N": molecule_results["N2"]["results"]["energy"] / 2, - "O": ( - molecule_results["H2O"]["results"]["energy"] - - molecule_results["H2"]["results"]["energy"] - ), - "C": molecule_results["CO"]["results"]["energy"] - - ( - molecule_results["H2O"]["results"]["energy"] - - molecule_results["H2"]["results"]["energy"] - ), - } + "H": molecule_results["H2"]["results"]["energy"] / 2, + "N": molecule_results["N2"]["results"]["energy"] / 2, + "O": ( + molecule_results["H2O"]["results"]["energy"] + - molecule_results["H2"]["results"]["energy"] + ), + "C": molecule_results["CO"]["results"]["energy"] + - ( + molecule_results["H2O"]["results"]["energy"] + - molecule_results["H2"]["results"]["energy"] + ), + } else: raise Exception( - "Missing atomic energies and gas phase energies; unable to continue!" - ) - + "Missing atomic energies and gas phase energies; unable to continue!" + ) slab_energy = slab_result["results"]["energy"] @@ -515,7 +518,7 @@ def generate_molecule_reference_results(relax_job: Job) -> MoleculeReferenceResu @flow def bulk_to_surfaces_to_adsorbml( bulk_atoms: Atoms, - adsorbates_kwargs: dict[str,Any], + adsorbates_kwargs: dict[str, Any], multiple_adsorbate_slab_config_kwargs: dict[str, Any], ml_relax_job: Job, slab_validate_job: Job, @@ -523,7 +526,6 @@ def bulk_to_surfaces_to_adsorbml( gas_validate_job: Job, max_miller: int = 1, bulk_relax_job: Job | None = None, - num_to_validate_with_DFT: int = 0, reference_ml_energies_to_gas_phase: bool = True, relax_bulk: bool = True, @@ -587,9 +589,7 @@ def bulk_to_surfaces_to_adsorbml( slabs = ocp_surface_generator(bulk_atoms=bulk_atoms, max_miller=max_miller) if reference_ml_energies_to_gas_phase and atomic_reference_energies is not None: - molecule_results = generate_molecule_reference_results( - ml_relax_job - ) + molecule_results = generate_molecule_reference_results(ml_relax_job) else: molecule_results = None @@ -612,5 +612,5 @@ def adsorbML_each_surface(slabs: list[Slab], **kwargs: Any) -> list[dict[str, An num_to_validate_with_DFT=num_to_validate_with_DFT, molecule_results=molecule_results, reference_ml_energies_to_gas_phase=reference_ml_energies_to_gas_phase, - atomic_reference_energies=atomic_reference_energies + atomic_reference_energies=atomic_reference_energies, ) diff --git a/src/quacc/recipes/mlp/adsorbml.py b/src/quacc/recipes/mlp/adsorbml.py index 83bf378685..29a6955c5e 100644 --- a/src/quacc/recipes/mlp/adsorbml.py +++ b/src/quacc/recipes/mlp/adsorbml.py @@ -4,8 +4,8 @@ from typing import TYPE_CHECKING, Any from quacc import Job, flow +from quacc.recipes.common.adsorbml import adsorb_ml_pipeline as adsorb_ml_pipeline_ from quacc.recipes.common.adsorbml import ( - adsorb_ml_pipeline as adsorb_ml_pipeline_, bulk_to_surfaces_to_adsorbml as bulk_to_surfaces_to_adsorbml_, ) from quacc.wflow_tools.customizers import customize_funcs @@ -21,7 +21,6 @@ ) - logger = logging.getLogger(__name__) @@ -123,7 +122,7 @@ def adsorb_ml_pipeline( @flow def bulk_to_surfaces_to_adsorbml( bulk_atoms: Atoms, - adsorbates_kwargs: dict[str,Any], + adsorbates_kwargs: dict[str, Any], multiple_adsorbate_slab_config_kwargs: dict[str, Any], ml_relax_job: Job, slab_validate_job: Job, diff --git a/src/quacc/types.py b/src/quacc/types.py index cd06566a6c..460519478b 100644 --- a/src/quacc/types.py +++ b/src/quacc/types.py @@ -553,11 +553,14 @@ class Slab(TypedDict): class AdsorbMLSchema(TypedDict): slab: Slab adslabs: list[OptSchema] - adslab_anomalies: list[Literal[ - "adsorbate_dissociated", - "adsorbate_desorbed", - "surface_changed", - "adsorbate_intercalated",]] + adslab_anomalies: list[ + Literal[ + "adsorbate_dissociated", + "adsorbate_desorbed", + "surface_changed", + "adsorbate_intercalated", + ] + ] validated_structures: NotRequired[dict[str, Any]] class MoleculeReferenceResults(TypedDict): diff --git a/tests/core/recipes/mlp_recipes/test_adsorbml.py b/tests/core/recipes/mlp_recipes/test_adsorbml.py index 9b16e07cd8..a1df515c78 100644 --- a/tests/core/recipes/mlp_recipes/test_adsorbml.py +++ b/tests/core/recipes/mlp_recipes/test_adsorbml.py @@ -3,9 +3,9 @@ import numpy as np import pytest from ase.build import bulk -from quacc.recipes.mlp.core import relax_job from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml +from quacc.recipes.mlp.core import relax_job torch = pytest.importorskip("torch") @@ -69,36 +69,33 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method): bulk_atoms = bulk("Cu") - outputs = bulk_to_surfaces_to_adsorbml( - bulk_atoms, - adsorbates_kwargs=[{"adsorbate_smiles_from_db": "*CO"}], - multiple_adsorbate_slab_config_kwargs={"num_configurations": 1}, - ml_relax_job=relax_job, - slab_validate_job=relax_job, - adslab_validate_job=relax_job, - gas_validate_job=relax_job, - bulk_relax_job=relax_job, - job_params={ - "all": dict( - method=method, - **calc_kwargs, - opt_params={"max_steps": 500}, - ), - }, - max_miller=1, - num_to_validate_with_DFT=1, - reference_ml_energies_to_gas_phase=True, - relax_bulk=True, -) - index_111_facet = np.where([result["slab"]["slab_metadata"]['millers'] == (1, 1, 1) for result in outputs])[0][0] + outputs = bulk_to_surfaces_to_adsorbml( + bulk_atoms, + adsorbates_kwargs=[{"adsorbate_smiles_from_db": "*CO"}], + multiple_adsorbate_slab_config_kwargs={"num_configurations": 1}, + ml_relax_job=relax_job, + slab_validate_job=relax_job, + adslab_validate_job=relax_job, + gas_validate_job=relax_job, + bulk_relax_job=relax_job, + job_params={ + "all": dict(method=method, **calc_kwargs, opt_params={"max_steps": 500}) + }, + max_miller=1, + num_to_validate_with_DFT=1, + reference_ml_energies_to_gas_phase=True, + relax_bulk=True, + ) + index_111_facet = np.where( + [result["slab"]["slab_metadata"]["millers"] == (1, 1, 1) for result in outputs] + )[0][0] assert len(outputs) > 0 - assert outputs[index_111_facet]["adslabs"][0]["results"]["referenced_adsorption_energy"][ - "adsorption_energy" - ] == pytest.approx( + assert outputs[index_111_facet]["adslabs"][0]["results"][ + "referenced_adsorption_energy" + ]["adsorption_energy"] == pytest.approx( ref_CO_Cu111_adsorption_energy[method], abs=0.1 ) - @pytest.mark.parametrize("fairchem_checkpoint", ["EquiformerV2-31M-S2EF-OC20-All+MD"]) @@ -111,9 +108,7 @@ def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint): "seed": 42, } - ref_CO_Cu111_adsorption_energy = { - "EquiformerV2-31M-S2EF-OC20-All+MD": -0.5, - } + ref_CO_Cu111_adsorption_energy = {"EquiformerV2-31M-S2EF-OC20-All+MD": -0.5} bulk_atoms = bulk("Cu") @@ -126,21 +121,17 @@ def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint): adslab_validate_job=relax_job, gas_validate_job=relax_job, bulk_relax_job=relax_job, - job_params={ - "all": dict( - method="fairchem", - **calc_kwargs, - ), - }, + job_params={"all": dict(method="fairchem", **calc_kwargs)}, max_miller=1, num_to_validate_with_DFT=1, reference_ml_energies_to_gas_phase=False, relax_bulk=False, ) - index_111_facet = np.where([result["slab"]["slab_metadata"]["millers"] == (1, 1, 1) for result in outputs])[0][0] + index_111_facet = np.where( + [result["slab"]["slab_metadata"]["millers"] == (1, 1, 1) for result in outputs] + )[0][0] assert len(outputs) > 0 assert outputs[index_111_facet]["adslabs"][0]["results"]["energy"] == pytest.approx( ref_CO_Cu111_adsorption_energy[fairchem_checkpoint], abs=0.1 ) - \ No newline at end of file diff --git a/tests/requirements-mlp1.txt b/tests/requirements-mlp1.txt index 8ebcb8ab2a..108e0fe762 100644 --- a/tests/requirements-mlp1.txt +++ b/tests/requirements-mlp1.txt @@ -10,4 +10,4 @@ torch_geometric==2.6.1 torch_scatter==2.1.2 torch_sparse==0.6.18 torch_cluster==1.6.3 -fairchem-data-oc==0.2.1 \ No newline at end of file +fairchem-data-oc==0.2.1 diff --git a/tests/requirements-mlp2.txt b/tests/requirements-mlp2.txt index f12449c333..0623938ec3 100644 --- a/tests/requirements-mlp2.txt +++ b/tests/requirements-mlp2.txt @@ -1,3 +1,3 @@ mace-torch==0.3.12 matgl @ git+https://github.com/materialsvirtuallab/matgl.git -fairchem-data-oc==0.2.1 \ No newline at end of file +fairchem-data-oc==0.2.1 From ddd17010bbc6bf17816635d628ae899507491965 Mon Sep 17 00:00:00 2001 From: Zack Ulissi Date: Wed, 26 Mar 2025 14:27:55 +0000 Subject: [PATCH 09/19] add ignore for O energy --- src/quacc/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/quacc/types.py b/src/quacc/types.py index 460519478b..eaf1de4fa4 100644 --- a/src/quacc/types.py +++ b/src/quacc/types.py @@ -572,7 +572,7 @@ class MoleculeReferenceResults(TypedDict): class AtomicReferenceEnergies(TypedDict): H: float N: float - O: float + O: float # noqa: E741 C: float class VibThermoSchema(VibSchema, ThermoSchema): From 8c1fa59876664a7e1604b2fd63338b1a8748a14c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Mar 2025 14:28:08 +0000 Subject: [PATCH 10/19] pre-commit auto-fixes --- src/quacc/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/quacc/types.py b/src/quacc/types.py index eaf1de4fa4..58c4b37070 100644 --- a/src/quacc/types.py +++ b/src/quacc/types.py @@ -572,7 +572,7 @@ class MoleculeReferenceResults(TypedDict): class AtomicReferenceEnergies(TypedDict): H: float N: float - O: float # noqa: E741 + O: float # noqa: E741 C: float class VibThermoSchema(VibSchema, ThermoSchema): From 9b72e61be95a4ae965fb42b9e6a721542f282321 Mon Sep 17 00:00:00 2001 From: Zack Ulissi Date: Wed, 26 Mar 2025 14:30:43 +0000 Subject: [PATCH 11/19] back off fairchem data version --- tests/requirements-mlp1.txt | 2 +- tests/requirements-mlp2.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/requirements-mlp1.txt b/tests/requirements-mlp1.txt index 0c20c6a56d..5bebe4089f 100644 --- a/tests/requirements-mlp1.txt +++ b/tests/requirements-mlp1.txt @@ -10,4 +10,4 @@ torch_geometric==2.6.1 torch_scatter==2.1.2 torch_sparse==0.6.18 torch_cluster==1.6.3 -fairchem-data-oc==0.2.1 +fairchem-data-oc==0.2.0 diff --git a/tests/requirements-mlp2.txt b/tests/requirements-mlp2.txt index 0623938ec3..98d4fecb3a 100644 --- a/tests/requirements-mlp2.txt +++ b/tests/requirements-mlp2.txt @@ -1,3 +1,3 @@ mace-torch==0.3.12 matgl @ git+https://github.com/materialsvirtuallab/matgl.git -fairchem-data-oc==0.2.1 +fairchem-data-oc==0.2.0 From 719376ce7fca574fb6b0f6c6032b214a6c1f04bb Mon Sep 17 00:00:00 2001 From: Zack Ulissi Date: Wed, 26 Mar 2025 14:38:17 +0000 Subject: [PATCH 12/19] skip adsorbml tests if fairchem-data-oc not installed --- pyproject.toml | 2 +- .../core/recipes/mlp_recipes/test_adsorbml.py | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c16566fd01..69ae9da93b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,7 @@ mlp1 = ["chgnet>=0.3.3", "torch-dftd>=0.4.0", "sevenn>=0.10.1", "orb-models>=0. mlp2 = ["mace-torch>=0.3.3", "matgl>=1.1.2"] mp = ["atomate2>=0.0.14"] newtonnet = ["newtonnet>=1.1"] -ocdata = ["fairchem-data-oc>=0.2.0"] +adsorbml = ["fairchem-data-oc>=0.2.0"] parsl = ["parsl[monitoring]>=2024.5.27; platform_system!='Windows'"] phonons = ["phonopy>=2.20.0", "seekpath>=2.1.0"] prefect = ["prefect[dask]>=2.19.0", "dask-jobqueue>=0.8.2"] diff --git a/tests/core/recipes/mlp_recipes/test_adsorbml.py b/tests/core/recipes/mlp_recipes/test_adsorbml.py index a1df515c78..acae09dcdc 100644 --- a/tests/core/recipes/mlp_recipes/test_adsorbml.py +++ b/tests/core/recipes/mlp_recipes/test_adsorbml.py @@ -4,7 +4,6 @@ import pytest from ase.build import bulk -from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml from quacc.recipes.mlp.core import relax_job torch = pytest.importorskip("torch") @@ -27,7 +26,7 @@ if has_orb := find_spec("orb_models"): methods.append("orb") -if find_spec("fairchem"): +if find_spec("fairchem.core"): from huggingface_hub.utils._auth import get_token if get_token(): @@ -39,9 +38,15 @@ def _set_dtype(size, type_="float"): globals()[f"{type_}_np"] = getattr(np, f"{type_}{size}") torch.set_default_dtype(getattr(torch, f"float{size}")) - +@pytest.mark.skipif( + not has_fairchem_data_oc, + reason="fairchem-data-oc python package not available", +) @pytest.mark.parametrize("method", methods) def test_total_energy_adsorbml(tmp_path, monkeypatch, method): + + from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml + monkeypatch.chdir(tmp_path) if method == "mace-mp-0": @@ -97,9 +102,15 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method): ref_CO_Cu111_adsorption_energy[method], abs=0.1 ) - +@pytest.mark.skipif( + not has_fairchem_data_oc, + reason="fairchem-data-oc python package not available", +) @pytest.mark.parametrize("fairchem_checkpoint", ["EquiformerV2-31M-S2EF-OC20-All+MD"]) def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint): + + from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml + monkeypatch.chdir(tmp_path) calc_kwargs = { From c1798cbb787087cfadad6af2f51aa05c9734183c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Mar 2025 14:38:31 +0000 Subject: [PATCH 13/19] pre-commit auto-fixes --- tests/core/recipes/mlp_recipes/test_adsorbml.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/core/recipes/mlp_recipes/test_adsorbml.py b/tests/core/recipes/mlp_recipes/test_adsorbml.py index acae09dcdc..a61bb7d0bf 100644 --- a/tests/core/recipes/mlp_recipes/test_adsorbml.py +++ b/tests/core/recipes/mlp_recipes/test_adsorbml.py @@ -38,13 +38,12 @@ def _set_dtype(size, type_="float"): globals()[f"{type_}_np"] = getattr(np, f"{type_}{size}") torch.set_default_dtype(getattr(torch, f"float{size}")) + @pytest.mark.skipif( - not has_fairchem_data_oc, - reason="fairchem-data-oc python package not available", + not has_fairchem_data_oc, reason="fairchem-data-oc python package not available" ) @pytest.mark.parametrize("method", methods) def test_total_energy_adsorbml(tmp_path, monkeypatch, method): - from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml monkeypatch.chdir(tmp_path) @@ -102,13 +101,12 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method): ref_CO_Cu111_adsorption_energy[method], abs=0.1 ) + @pytest.mark.skipif( - not has_fairchem_data_oc, - reason="fairchem-data-oc python package not available", + not has_fairchem_data_oc, reason="fairchem-data-oc python package not available" ) @pytest.mark.parametrize("fairchem_checkpoint", ["EquiformerV2-31M-S2EF-OC20-All+MD"]) def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint): - from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml monkeypatch.chdir(tmp_path) From 238a57ba95d78344d93eaf1943238a7c3e863f1b Mon Sep 17 00:00:00 2001 From: Zack Ulissi Date: Wed, 26 Mar 2025 14:40:25 +0000 Subject: [PATCH 14/19] fix test skip --- tests/core/recipes/mlp_recipes/test_adsorbml.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/core/recipes/mlp_recipes/test_adsorbml.py b/tests/core/recipes/mlp_recipes/test_adsorbml.py index a61bb7d0bf..757980a5af 100644 --- a/tests/core/recipes/mlp_recipes/test_adsorbml.py +++ b/tests/core/recipes/mlp_recipes/test_adsorbml.py @@ -32,18 +32,20 @@ if get_token(): methods.append("fairchem") +has_fairchem_data_oc = find_spec("fairchem.data.oc") def _set_dtype(size, type_="float"): globals()[f"{type_}_th"] = getattr(torch, f"{type_}{size}") globals()[f"{type_}_np"] = getattr(np, f"{type_}{size}") torch.set_default_dtype(getattr(torch, f"float{size}")) - @pytest.mark.skipif( - not has_fairchem_data_oc, reason="fairchem-data-oc python package not available" + not has_fairchem_data_oc, + reason="fairchem-data-oc python package not available", ) @pytest.mark.parametrize("method", methods) def test_total_energy_adsorbml(tmp_path, monkeypatch, method): + from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml monkeypatch.chdir(tmp_path) @@ -101,14 +103,15 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method): ref_CO_Cu111_adsorption_energy[method], abs=0.1 ) - @pytest.mark.skipif( - not has_fairchem_data_oc, reason="fairchem-data-oc python package not available" + not has_fairchem_data_oc, + reason="fairchem-data-oc python package not available", ) @pytest.mark.parametrize("fairchem_checkpoint", ["EquiformerV2-31M-S2EF-OC20-All+MD"]) def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint): - from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml + from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml + monkeypatch.chdir(tmp_path) calc_kwargs = { From d8bae84613b06a0a842aa2ad8daa82e26f1a06fc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Mar 2025 14:43:33 +0000 Subject: [PATCH 15/19] pre-commit auto-fixes --- tests/core/recipes/mlp_recipes/test_adsorbml.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/core/recipes/mlp_recipes/test_adsorbml.py b/tests/core/recipes/mlp_recipes/test_adsorbml.py index 757980a5af..34af40da2f 100644 --- a/tests/core/recipes/mlp_recipes/test_adsorbml.py +++ b/tests/core/recipes/mlp_recipes/test_adsorbml.py @@ -34,18 +34,18 @@ has_fairchem_data_oc = find_spec("fairchem.data.oc") + def _set_dtype(size, type_="float"): globals()[f"{type_}_th"] = getattr(torch, f"{type_}{size}") globals()[f"{type_}_np"] = getattr(np, f"{type_}{size}") torch.set_default_dtype(getattr(torch, f"float{size}")) + @pytest.mark.skipif( - not has_fairchem_data_oc, - reason="fairchem-data-oc python package not available", + not has_fairchem_data_oc, reason="fairchem-data-oc python package not available" ) @pytest.mark.parametrize("method", methods) def test_total_energy_adsorbml(tmp_path, monkeypatch, method): - from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml monkeypatch.chdir(tmp_path) @@ -103,15 +103,14 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method): ref_CO_Cu111_adsorption_energy[method], abs=0.1 ) + @pytest.mark.skipif( - not has_fairchem_data_oc, - reason="fairchem-data-oc python package not available", + not has_fairchem_data_oc, reason="fairchem-data-oc python package not available" ) @pytest.mark.parametrize("fairchem_checkpoint", ["EquiformerV2-31M-S2EF-OC20-All+MD"]) def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint): - from quacc.recipes.mlp.adsorbml import bulk_to_surfaces_to_adsorbml - + monkeypatch.chdir(tmp_path) calc_kwargs = { From 3688f16fcf50c5d3478b13f2364395bb200ab529 Mon Sep 17 00:00:00 2001 From: Zack Ulissi Date: Wed, 26 Mar 2025 14:43:46 +0000 Subject: [PATCH 16/19] fix tests --- tests/core/recipes/mlp_recipes/test_core_recipes.py | 2 +- tests/core/recipes/mlp_recipes/test_elastic_recipes.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/core/recipes/mlp_recipes/test_core_recipes.py b/tests/core/recipes/mlp_recipes/test_core_recipes.py index 6d9d5b2879..0de3717cf4 100644 --- a/tests/core/recipes/mlp_recipes/test_core_recipes.py +++ b/tests/core/recipes/mlp_recipes/test_core_recipes.py @@ -27,7 +27,7 @@ if has_orb := find_spec("orb_models"): methods.append("orb") -if find_spec("fairchem"): +if find_spec("fairchem.core"): from huggingface_hub.utils._auth import get_token if get_token(): diff --git a/tests/core/recipes/mlp_recipes/test_elastic_recipes.py b/tests/core/recipes/mlp_recipes/test_elastic_recipes.py index 23509fee09..4d638b4298 100644 --- a/tests/core/recipes/mlp_recipes/test_elastic_recipes.py +++ b/tests/core/recipes/mlp_recipes/test_elastic_recipes.py @@ -26,7 +26,7 @@ if has_orb := find_spec("orb_models"): methods.append("orb") -if find_spec("fairchem"): +if find_spec("fairchem.core"): from huggingface_hub.utils._auth import get_token if get_token(): From f694404ad856bf07f5cdbb92523f0b1ae5d4efc7 Mon Sep 17 00:00:00 2001 From: Zack Ulissi Date: Wed, 26 Mar 2025 15:59:20 +0000 Subject: [PATCH 17/19] small test fixes --- .../core/recipes/mlp_recipes/test_adsorbml.py | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/core/recipes/mlp_recipes/test_adsorbml.py b/tests/core/recipes/mlp_recipes/test_adsorbml.py index 34af40da2f..1e6d0fb81a 100644 --- a/tests/core/recipes/mlp_recipes/test_adsorbml.py +++ b/tests/core/recipes/mlp_recipes/test_adsorbml.py @@ -26,7 +26,7 @@ if has_orb := find_spec("orb_models"): methods.append("orb") -if find_spec("fairchem.core"): +if has_fairchem := find_spec("fairchem.core"): from huggingface_hub.utils._auth import get_token if get_token(): @@ -57,19 +57,34 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method): if method == "fairchem": calc_kwargs = { - "model_name": "EquiformerV2-31M-OMAT24-MP-sAlex", + "model_name": "EquiformerV2-31M-OMAT24", "local_cache": "./fairchem_checkpoint_cache/", "seed": 42, } + # Computed from DFT since we only need this once; this also serves as a test + # that the reference energies are being used correctly + OMAT_energies = { + "H2": -6.77226619, + "H2O": -14.23238513, + "N2": -16.65252624, + "CO": -14.79264030, + } + atomic_reference_energies = { + "H": OMAT_energies["H2"] / 2, + "N": OMAT_energies["N2"] / 2, + "O": (OMAT_energies["H2O"] - OMAT_energies["H2"]), + "C": OMAT_energies["CO"] - (OMAT_energies["H2O"] - OMAT_energies["H2"]), + } else: calc_kwargs = {} + atomic_reference_energies = None ref_CO_Cu111_adsorption_energy = { "chgnet": -1.5, "m3gnet": -0.5, "mace-mp-0": -0.5, "sevennet": -0.98, - "orb": -0.5, + "orb": -1.04, "fairchem": -0.5, } @@ -89,6 +104,7 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method): }, max_miller=1, num_to_validate_with_DFT=1, + atomic_reference_energies=atomic_reference_energies, reference_ml_energies_to_gas_phase=True, relax_bulk=True, ) @@ -105,7 +121,7 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method): @pytest.mark.skipif( - not has_fairchem_data_oc, reason="fairchem-data-oc python package not available" + not (has_fairchem_data_oc and has_fairchem), reason="fairchem-data-oc python package not available" ) @pytest.mark.parametrize("fairchem_checkpoint", ["EquiformerV2-31M-S2EF-OC20-All+MD"]) def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint): From 99b0857a115d88db9ec039317ec959abacdddb05 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Mar 2025 16:00:48 +0000 Subject: [PATCH 18/19] pre-commit auto-fixes --- tests/core/recipes/mlp_recipes/test_adsorbml.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/core/recipes/mlp_recipes/test_adsorbml.py b/tests/core/recipes/mlp_recipes/test_adsorbml.py index 1e6d0fb81a..5dae8e9520 100644 --- a/tests/core/recipes/mlp_recipes/test_adsorbml.py +++ b/tests/core/recipes/mlp_recipes/test_adsorbml.py @@ -121,7 +121,8 @@ def test_total_energy_adsorbml(tmp_path, monkeypatch, method): @pytest.mark.skipif( - not (has_fairchem_data_oc and has_fairchem), reason="fairchem-data-oc python package not available" + not (has_fairchem_data_oc and has_fairchem), + reason="fairchem-data-oc python package not available", ) @pytest.mark.parametrize("fairchem_checkpoint", ["EquiformerV2-31M-S2EF-OC20-All+MD"]) def test_referenced_energy_mlp(tmp_path, monkeypatch, fairchem_checkpoint): From 67c02968cc5539515675e38a5f66fca0a177e53d Mon Sep 17 00:00:00 2001 From: Zack Ulissi Date: Wed, 26 Mar 2025 20:21:41 +0000 Subject: [PATCH 19/19] always reference DFT validations --- src/quacc/recipes/common/adsorbml.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/quacc/recipes/common/adsorbml.py b/src/quacc/recipes/common/adsorbml.py index 5115686a15..79c9bd0c97 100644 --- a/src/quacc/recipes/common/adsorbml.py +++ b/src/quacc/recipes/common/adsorbml.py @@ -366,16 +366,16 @@ def adsorb_ml_pipeline( dft_validated_slab = slab_validate_job(slab.atoms, relax_cell=False) - if reference_ml_energies_to_gas_phase: - if atomic_reference_energies is None and molecule_results is None: - molecule_results = generate_molecule_reference_results(gas_validate_job) - - dft_validated_adslabs = reference_adslab_energies( - dft_validated_adslabs, - dft_validated_slab, - atomic_energies=atomic_reference_energies, - molecule_results=molecule_results, - ) + # If we're running DFT calculations, we always need to reference energies! + if atomic_reference_energies is None and molecule_results is None: + molecule_results = generate_molecule_reference_results(gas_validate_job) + + dft_validated_adslabs = reference_adslab_energies( + dft_validated_adslabs, + dft_validated_slab, + atomic_energies=atomic_reference_energies, + molecule_results=molecule_results, + ) return { "slab": slab.get_metadata_dict(),