diff --git a/src/atomate2/ase/md.py b/src/atomate2/ase/md.py index e6adbf056..8491ae47d 100644 --- a/src/atomate2/ase/md.py +++ b/src/atomate2/ase/md.py @@ -139,11 +139,13 @@ class AseMDMaker(AseMaker, metaclass=ABCMeta): traj_file : str | Path | None = None If a str or Path, the name of the file to save the MD trajectory to. If None, the trajectory is not written to disk - traj_file_fmt : Literal["ase","pmg","xdatcar"] + traj_file_fmt : Literal["ase","pmg","xdatcar", "parquet"] The format of the trajectory file to write. If "ase", writes an ASE .Trajectory. If "pmg", writes a Pymatgen .Trajectory. - If "xdatcar, writes a VASP-style XDATCAR + If "xdatcar", writes a VASP-style XDATCAR + If "parquet", uses emmet.core's Trajectory object to write a high-efficiency + parquet format file containing the trajectory. traj_interval : int The step interval for saving the trajectories. mb_velocity_seed : int or None @@ -415,7 +417,7 @@ def _callback(dyn: MolecularDynamics = md_runner) -> None: return AseResult( final_mol_or_struct=mol_or_struct, - trajectory=md_observer.to_pymatgen_trajectory(filename=None), + trajectory=md_observer.to_emmet_trajectory(filename=None), dir_name=os.getcwd(), elapsed_time=t_f - t_i, ) diff --git a/src/atomate2/ase/schemas.py b/src/atomate2/ase/schemas.py index db4a7ac79..cd79f3b68 100644 --- a/src/atomate2/ase/schemas.py +++ b/src/atomate2/ase/schemas.py @@ -13,16 +13,14 @@ from pathlib import Path from typing import Any -from ase.stress import voigt_6_to_full_3x3_stress -from ase.units import GPa from emmet.core.math import Matrix3D, Vector3D from emmet.core.structure import MoleculeMetadata, StructureMetadata from emmet.core.tasks import TaskState +from emmet.core.trajectory import AtomTrajectory from emmet.core.utils import ValueEnum from emmet.core.vasp.calculation import StoreTrajectoryOption from pydantic import BaseModel, Field from pymatgen.core import Molecule, Structure -from pymatgen.core.trajectory import Trajectory as PmgTrajectory _task_doc_translation_keys = { "input", @@ -49,7 +47,7 @@ class AseResult(BaseModel): None, description="The final total energy from the calculation." ) - trajectory: PmgTrajectory | None = Field( + trajectory: AtomTrajectory | None = Field( None, description="The relaxation or molecular dynamics trajectory." ) @@ -146,7 +144,7 @@ class OutputDoc(AseBaseModel): # NOTE: units for stresses were converted to kbar (* -10 from standard output) # to comply with MP convention stress: Matrix3D | None = Field( - None, description="The stress on the cell in units of kbar (in Voigt notation)." + None, description="The stress on the cell in units of kbar." ) # NOTE: the ionic_steps can also be a dict when these are in blob storage and @@ -417,22 +415,7 @@ def from_ase_compatible_result( input_mol_or_struct = None if trajectory: n_steps = len(trajectory) - - # NOTE: convert stress units from eV/A³ to kBar (* -1 from standard output) - # and to 3x3 matrix to comply with MP convention - if n_steps: - for idx in range(n_steps): - if trajectory.frame_properties[idx].get("stress") is not None: - trajectory.frame_properties[idx]["stress"] = ( - voigt_6_to_full_3x3_stress( - [ - val * -10 / GPa - for val in trajectory.frame_properties[idx]["stress"] - ] - ) - ) - - input_mol_or_struct = trajectory[0] + input_mol_or_struct = trajectory.to_pmg(frame_props=tuple(), indices=0)[0] input_doc = InputDoc( mol_or_struct=input_mol_or_struct, @@ -450,63 +433,43 @@ def from_ase_compatible_result( steps = 1 n_steps = 1 - if isinstance(input_mol_or_struct, Structure): - traj_method = "from_structures" - elif isinstance(input_mol_or_struct, Molecule): - traj_method = "from_molecules" - - trajectory = getattr(PmgTrajectory, traj_method)( - [input_mol_or_struct], - frame_properties=[trajectory.frame_properties[0]], - constant_lattice=False, - ) + if trajectory: + trajectory = trajectory[-1] output_mol_or_struct = input_mol_or_struct else: output_mol_or_struct = result.final_mol_or_struct - if trajectory is None: - final_energy = result.final_energy - final_forces = None - final_stress = None - ionic_steps = None + final_energy = result.final_energy + final_forces = None + final_stress = None + ionic_steps = None - else: - final_energy = trajectory.frame_properties[-1]["energy"] - final_forces = trajectory.frame_properties[-1]["forces"] - final_stress = trajectory.frame_properties[-1].get("stress") + if trajectory: + final_energy = trajectory.energy[-1] + final_forces = trajectory.forces[-1] + ionic_step_props = ["energy", "forces"] + if trajectory.stress: + final_stress = trajectory.stress[-1] + ionic_step_props.append("stress") + + if trajectory.magmoms: + ionic_step_props.append("magmoms") ionic_steps = [] if ionic_step_data is not None and len(ionic_step_data) > 0: for idx in range(n_steps): _ionic_step_data = { key: ( - trajectory.frame_properties[idx].get(key) + getattr(trajectory, key)[idx] if key in ionic_step_data else None ) - for key in ("energy", "forces", "stress") + for key in ionic_step_props } - current_mol_or_struct = ( - trajectory[idx] - if any( - v in ionic_step_data - for v in ("mol_or_struct", "structure", "molecule") - ) - else None - ) - - # include "magmoms" in `ionic_step` if the trajectory has "magmoms" - if "magmoms" in trajectory.frame_properties[idx]: - _ionic_step_data.update( - { - "magmoms": ( - trajectory.frame_properties[idx]["magmoms"] - if "magmoms" in ionic_step_data - else None - ) - } - ) + current_mol_or_struct = trajectory.to_pmg( + frame_props=tuple(), indices=-1 + )[0] ionic_step = IonicStep( mol_or_struct=current_mol_or_struct, diff --git a/src/atomate2/ase/utils.py b/src/atomate2/ase/utils.py index db4611379..a5a31b3e4 100644 --- a/src/atomate2/ase/utils.py +++ b/src/atomate2/ase/utils.py @@ -22,7 +22,10 @@ from ase.mep.neb import NEB from ase.optimize import BFGS, FIRE, LBFGS, BFGSLineSearch, LBFGSLineSearch, MDMin from ase.optimize.sciopt import SciPyFminBFGS, SciPyFminCG +from ase.stress import voigt_6_to_full_3x3_stress +from ase.units import GPa from emmet.core.neb import NebMethod, NebResult +from emmet.core.trajectory import AtomTrajectory from monty.serialization import dumpfn from pymatgen.core.structure import Molecule, Structure from pymatgen.core.trajectory import Trajectory as PmgTrajectory @@ -83,7 +86,7 @@ def __init__(self, atoms: Atoms, store_md_outputs: bool = False) -> None: self.forces: list[np.ndarray] = [] self._calc_kwargs = { - "stress": ( + "stresses": ( "stress" in self.atoms.calc.implemented_properties and self._is_periodic ), "magmoms": True, @@ -113,7 +116,7 @@ def __call__(self) -> None: # When _store_md_outputs is True, ideal gas contribution to # stress is included. # Only store stress for periodic systems. - if self._calc_kwargs["stress"]: + if self._calc_kwargs["stresses"]: self.stresses.append( self.atoms.get_stress(include_ideal_gas=self._store_md_outputs) ) @@ -144,7 +147,7 @@ def compute_energy(self) -> float: def save( self, filename: str | PathLike | None, - fmt: Literal["pmg", "ase", "xdatcar"] = "ase", + fmt: Literal["pmg", "ase", "xdatcar", "parquet"] = "ase", ) -> None: """ Save the trajectory file using monty.serialization. @@ -162,6 +165,8 @@ def save( self.to_pymatgen_trajectory(filename=filename, file_format=fmt) # type: ignore[arg-type] elif fmt == "ase": self.to_ase_trajectory(filename=filename) + elif fmt == "parquet": + self.to_emmet_trajectory(filename=filename) else: raise ValueError(f"Unknown trajectory format {fmt}.") @@ -189,7 +194,7 @@ def to_ase_trajectory( "energy": self.energies[idx], "forces": self.forces[idx], } - if self._calc_kwargs["stress"]: + if self._calc_kwargs["stresses"]: kwargs["stress"] = self.stresses[idx] if self._calc_kwargs["magmoms"]: kwargs["magmom"] = self.magmoms[idx] @@ -218,7 +223,7 @@ def to_pymatgen_trajectory( If "xdatcar", writes a VASP-format XDATCAR object to file """ frame_property_keys = ["energy", "forces"] - for k in ("stress", "magmoms", "velocities", "temperature"): + for k in ("stresses", "magmoms", "velocities", "temperature"): if self._calc_kwargs[k]: frame_property_keys += [k] @@ -276,12 +281,47 @@ def to_pymatgen_trajectory( return pmg_traj + def to_emmet_trajectory( + self, filename: str | PathLike | None = None + ) -> AtomTrajectory: + """Create an emmet.core.AtomTrajectory.""" + frame_props = { + "cells": "lattice", + "energies": "energy", + "forces": "forces", + "stresses": "stress", + "magmoms": "magmoms", + "velocities": "velocities", + "temperatures": "temperature", + } + for k in ("stresses", "magmoms"): + if not self._calc_kwargs[k]: + frame_props.pop(k) + + ionic_step_data = {v: getattr(self, k) for k, v in frame_props.items()} + if self._calc_kwargs["stresses"]: + # NOTE: convert stress units from eV/A³ to kBar (* -1 from standard output) + # and to 3x3 matrix to comply with MP convention + ionic_step_data["stress"] = [ + voigt_6_to_full_3x3_stress(val * -10 / GPa) for val in self.stresses + ] + + traj = AtomTrajectory( + elements=self.atoms.get_atomic_numbers(), + cart_coords=self.atom_positions, + num_ionic_steps=len(self.atom_positions), + **ionic_step_data, + ) + if filename: + traj.to(file_name=filename) + return traj + def as_dict(self) -> dict: """Make JSONable dict representation of the Trajectory.""" traj_dict = { "energy": self.energies, "forces": self.forces, - "stress": self.stresses, + "stresses": self.stresses, "atom_positions": self.atom_positions, "cells": self.cells, "atoms": self.atoms, @@ -413,9 +453,9 @@ def relax( struct = self.ase_adaptor.get_structure( atoms, cls=Molecule if is_mol else Structure ) - traj = obs.to_pymatgen_trajectory(None) + traj = obs.to_emmet_trajectory(filename=None) is_force_conv = all( - np.linalg.norm(traj.frame_properties[-1]["forces"][idx]) < abs(fmax) + np.linalg.norm(traj.forces[-1][idx]) < abs(fmax) for idx in range(len(struct)) ) @@ -434,9 +474,7 @@ def relax( trajectory=traj, converged=converged, is_force_converged=is_force_conv, - energy_downhill=( - traj.frame_properties[-1]["energy"] < traj.frame_properties[0]["energy"] - ), + energy_downhill=traj.energy[-1] < traj.energy[0], dir_name=os.getcwd(), elapsed_time=t_f - t_i, ) diff --git a/src/atomate2/forcefields/md.py b/src/atomate2/forcefields/md.py index 21a6ffded..7aaf7fecf 100644 --- a/src/atomate2/forcefields/md.py +++ b/src/atomate2/forcefields/md.py @@ -88,11 +88,13 @@ class ForceFieldMDMaker(ForceFieldMixin, AseMDMaker): traj_file : str | Path | None = None If a str or Path, the name of the file to save the MD trajectory to. If None, the trajectory is not written to disk - traj_file_fmt : Literal["ase","pmg","xdatcar"] + traj_file_fmt : Literal["ase","pmg","xdatcar","parquet"] The format of the trajectory file to write. If "ase", writes an ASE .Trajectory. If "pmg", writes a Pymatgen .Trajectory. - If "xdatcar, writes a VASP-style XDATCAR + If "xdatcar", writes a VASP-style XDATCAR. + If "parquet", uses emmet.core's Trajectory object to write a high-efficiency + parquet format file containing the trajectory. traj_interval : int The step interval for saving the trajectories. mb_velocity_seed : int or None diff --git a/src/atomate2/forcefields/schemas.py b/src/atomate2/forcefields/schemas.py index 14f737ec0..027cc2205 100644 --- a/src/atomate2/forcefields/schemas.py +++ b/src/atomate2/forcefields/schemas.py @@ -144,6 +144,8 @@ def from_ase_compatible_result( MLFF.MACE_MP_0B3: "mace-torch", MLFF.GAP: "quippy-ase", MLFF.Nequip: "nequip", + MLFF.MATPES_PBE: "matgl", + MLFF.MATPES_R2SCAN: "matgl", } if pkg_name := {str(k): v for k, v in model_to_pkg_map.items()}.get( diff --git a/tests/ase/test_md.py b/tests/ase/test_md.py index 7300be1a5..0621c6026 100644 --- a/tests/ase/test_md.py +++ b/tests/ase/test_md.py @@ -9,6 +9,7 @@ import pytest from jobflow import run_locally +from pymatgen.io.vasp.outputs import Xdatcar from atomate2.ase.md import GFNxTBMDMaker, LennardJonesMDMaker from atomate2.ase.schemas import AseStructureTaskDoc @@ -137,7 +138,9 @@ def test_ase_npt_maker(calculator_name, lj_fcc_ne_pars, fcc_ne_structure, tmp_di reference_energies_per_atom[calculator_name] ) - # TODO: improve XDATCAR parsing test when class is fixed in pmg assert os.path.isfile("XDATCAR") + xdatcar = Xdatcar("XDATCAR") + assert len(xdatcar) == len(output.objects["trajectory"]) + assert len(xdatcar) == len(output.output.ionic_steps) assert len(output.objects["trajectory"]) == n_steps + 1 diff --git a/tests/ase/test_utils.py b/tests/ase/test_utils.py index 74cb28da5..3ab97417c 100644 --- a/tests/ase/test_utils.py +++ b/tests/ase/test_utils.py @@ -55,6 +55,9 @@ def test_trajectory_observer(si_structure: Structure, test_dir, tmp_dir): [("BFGS", None), (None, None), (BFGS, "log_file.traj")], ) def test_relaxer(si_structure, test_dir, tmp_dir, optimizer, traj_file): + from ase.stress import voigt_6_to_full_3x3_stress + from ase.units import GPa + expected_lattice = { "a": 3.866974, "b": 3.866974, @@ -92,19 +95,17 @@ def test_relaxer(si_structure, test_dir, tmp_dir, optimizer, traj_file): for key in expected_lattice } == pytest.approx(expected_lattice) - assert relax_output.trajectory.frame_properties[-1]["energy"] == pytest.approx( - expected_energy - ) + assert relax_output.trajectory.energy[-1] == pytest.approx(expected_energy) assert_allclose( - relax_output["trajectory"].frame_properties[-1]["forces"], + relax_output["trajectory"].forces[-1], expected_forces, atol=1e-11, ) assert_allclose( - relax_output["trajectory"].frame_properties[-1]["stress"], - expected_stresses, + relax_output["trajectory"].stress[-1], + voigt_6_to_full_3x3_stress(expected_stresses) * -10 / GPa, atol=1e-11, ) diff --git a/tests/forcefields/flows/test_mpmorph.py b/tests/forcefields/flows/test_mpmorph.py index 305a24dcb..9f6377c41 100644 --- a/tests/forcefields/flows/test_mpmorph.py +++ b/tests/forcefields/flows/test_mpmorph.py @@ -162,14 +162,14 @@ def test_mpmorph_mlff_maker(ff_name, si_structure, test_dir, clean_dir): # this is designed to check if MD is injected with approximately # the right temperature, and that's why tolerance is so high assert all( - doc.forcefield_objects["trajectory"].frame_properties[0]["temperature"] + doc.forcefield_objects["trajectory"].temperature[0] == pytest.approx(temp, abs=50) for name, doc in task_docs.items() if "MD Maker" in name ) - assert task_docs["production run"].forcefield_objects[ - "trajectory" - ].frame_properties[0]["temperature"] == pytest.approx(temp, abs=50) + assert task_docs["production run"].forcefield_objects["trajectory"].temperature[ + 0 + ] == pytest.approx(temp, abs=50) # check that MD Maker Energies are close # TODO: This may be unnecessary because it changes from model to model @@ -211,7 +211,7 @@ def test_mpmorph_mlff_maker(ff_name, si_structure, test_dir, clean_dir): assert all( any( - doc.forcefield_objects["trajectory"].frame_properties[0]["temperature"] + doc.forcefield_objects["trajectory"].temperature[0] == pytest.approx(T, abs=100) for name, doc in task_docs.items() if "K" in name diff --git a/tests/forcefields/test_md.py b/tests/forcefields/test_md.py index 0b0544902..141b844f1 100644 --- a/tests/forcefields/test_md.py +++ b/tests/forcefields/test_md.py @@ -146,9 +146,8 @@ def test_ml_ff_md_maker( assert len(task_doc.objects["trajectory"]) == n_steps + 1 assert task_doc.objects == task_doc.forcefield_objects # test legacy alias assert all( - key in step + getattr(task_doc.objects["trajectory"], key, None) is not None for key in ("energy", "forces", "stress", "velocities", "temperature") - for step in task_doc.objects["trajectory"].frame_properties ) if ff_maker := name_to_maker.get(ff_name): with pytest.warns(FutureWarning): @@ -184,32 +183,31 @@ def test_traj_file(traj_file, si_structure, clean_dir, ff_name="CHGNet"): assert len(traj_from_file) == n_steps + 1 if traj_file_fmt == "pmg": - assert all( - np.all( + other_traj = { + key: [ traj_from_file.frame_properties[idx][key] - == task_doc.objects["trajectory"].frame_properties[idx].get(key) - ) + for idx in range(len(traj_from_file)) + ] for key in ("energy", "temperature", "forces", "velocities") - for idx in range(n_steps + 1) - ) + } elif traj_file_fmt == "ase": - traj_as_dict = [ - { - "energy": traj_from_file[idx].get_potential_energy(), - "temperature": traj_from_file[idx].get_temperature(), - "forces": traj_from_file[idx].get_forces(), - "velocities": traj_from_file[idx].get_velocities(), - } - for idx in range(1, n_steps + 1) - ] - assert all( - np.all( - traj_as_dict[idx - 1][key] - == task_doc.objects["trajectory"].frame_properties[idx].get(key) - ) - for key in ("energy", "temperature", "forces", "velocities") - for idx in range(1, n_steps + 1) + other_traj = { + k: [getattr(traj_from_file[idx], v)() for idx in range(n_steps + 1)] + for k, v in { + "energy": "get_potential_energy", + "temperature": "get_temperature", + "forces": "get_forces", + "velocities": "get_velocities", + }.items() + } + + assert all( + np.all( + np.array(other_traj[key]) + == np.array(getattr(task_doc.objects["trajectory"], key)) ) + for key in ("energy", "temperature", "forces", "velocities") + ) def test_nve_and_dynamics_obj(si_structure: Structure, test_dir: Path): @@ -285,9 +283,7 @@ def test_temp_schedule(ff_name, si_structure, clean_dir): response = run_locally(job, ensure_success=True) task_doc = response[next(iter(response))][1].output - temp_history = [ - step["temperature"] for step in task_doc.objects["trajectory"].frame_properties - ] + temp_history = task_doc.objects["trajectory"].temperature assert temp_history[-1] > temp_schedule[0]