Skip to content

Commit

Permalink
Adding function to initialize StructureData from vasp dir
Browse files Browse the repository at this point in the history
  • Loading branch information
bowen-bd committed Apr 29, 2024
1 parent 11315e5 commit 6408de5
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 53 deletions.
49 changes: 47 additions & 2 deletions chgnet/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(
forces: list[Sequence[Sequence[float]]],
stresses: list[Sequence[Sequence[float]]] | None = None,
magmoms: list[Sequence[Sequence[float]]] | None = None,
structure_ids: list[str] | None = None,
structure_ids: list | None = None,
graph_converter: CrystalGraphConverter | None = None,
shuffle: bool = True,
) -> None:
Expand All @@ -49,7 +49,7 @@ def __init__(
Default = None
magmoms (list[list[float]], optional): [data_size, n_atoms, 1]
Default = None
structure_ids (list[str], optional): a list of ids to track the structures
structure_ids (list, optional): a list of ids to track the structures
Default = None
graph_converter (CrystalGraphConverter, optional): Converts the structures
to graphs. If None, it will be set to CHGNet 0.3.0 converter
Expand Down Expand Up @@ -87,6 +87,51 @@ def __init__(
self.failed_idx: list[int] = []
self.failed_graph_id: dict[str, str] = {}

@classmethod
def from_vasp(
cls,
file_root: str,
check_electronic_convergence: bool = True,
save_path: str | None = None,
graph_converter: CrystalGraphConverter | None = None,
shuffle: bool = True,
):
"""Parse VASP output files into structures and labels and feed into the dataset.
Args:
file_root (str): the directory of the VASP calculation outputs
check_electronic_convergence (bool): if set to True, this function will
raise Exception to VASP calculation that did not achieve
electronic convergence.
Default = True
save_path (str): path to save the parsed VASP labels
Default = None
graph_converter (CrystalGraphConverter, optional): Converts the structures
to graphs. If None, it will be set to CHGNet 0.3.0 converter
with AtomGraph cutoff = 6A.
shuffle (bool): whether to shuffle the sequence of dataset
Default = True
"""
result_dict = utils.parse_vasp_dir(
file_root=file_root,
check_electronic_convergence=check_electronic_convergence,
save_path=save_path,
)
return cls(
structures=result_dict["structure"],
energies=result_dict["energy_per_atom"],
forces=result_dict["force"],
stresses=None
if result_dict["stress"] in [None, []]
else result_dict["stress"],
magmoms=None
if result_dict["magmom"] in [None, []]
else result_dict["magmom"],
structure_ids=np.arange(len(result_dict["structure"])),
graph_converter=graph_converter,
shuffle=shuffle,
)

def __len__(self) -> int:
"""Get the number of structures in this dataset."""
return len(self.keys)
Expand Down
12 changes: 11 additions & 1 deletion chgnet/utils/vasp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,16 @@
from monty.io import reverse_readfile
from pymatgen.io.vasp.outputs import Oszicar, Vasprun

from chgnet.utils import write_json

if TYPE_CHECKING:
from pymatgen.core import Structure


def parse_vasp_dir(
file_root: str, check_electronic_convergence: bool = True
file_root: str,
check_electronic_convergence: bool = True,
save_path: str | None = None,
) -> dict[str, list]:
"""Parse VASP output files into structures and labels
By default, the magnetization is read from mag_x from VASP,
Expand All @@ -22,6 +26,8 @@ def parse_vasp_dir(
file_root (str): the directory of the VASP calculation outputs
check_electronic_convergence (bool): if set to True, this function will raise
Exception to VASP calculation that did not achieve electronic convergence.
Default = True
save_path (str): path to save the parsed VASP labels
"""
if os.path.exists(file_root) is False:
raise FileNotFoundError("No such file or directory")
Expand Down Expand Up @@ -153,6 +159,10 @@ def parse_vasp_dir(
if dataset["uncorrected_total_energy"] == []:
raise RuntimeError(f"No data parsed from {file_root}!")

if save_path is not None:
save_dict = dataset.copy()
save_dict["structure"] = [struct.as_dict() for struct in dataset["structure"]]
write_json(save_dict, save_path)
return dataset


Expand Down
Loading

0 comments on commit 6408de5

Please sign in to comment.