Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add HDF5 support for trajs and model_devis #259

Merged
merged 17 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions dpgen2/entrypoint/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ def run_diffcsp_args():
doc_gen_tasks = "Number of DiffCSP generation tasks"
doc_gen_command = "Command for DiffCSP generation"
doc_relax_group_size = "Group size for relaxation"
doc_use_hdf5 = "Use HDF5 to store trajs and model_devis"
return [
Argument(
"gen_tasks",
Expand All @@ -380,6 +381,13 @@ def run_diffcsp_args():
default=100,
doc=doc_relax_group_size,
),
Argument(
"use_hdf5",
bool,
optional=True,
default=False,
doc=doc_use_hdf5,
),
]


Expand Down
5 changes: 4 additions & 1 deletion dpgen2/entrypoint/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@
RunDPTrain,
RunLmp,
RunRelax,
RunRelaxHDF5,
SelectConfs,
)
from dpgen2.op.caly_evo_step_merge import (
Expand Down Expand Up @@ -167,6 +168,7 @@ def make_concurrent_learning_op(
upload_python_packages: Optional[List[os.PathLike]] = None,
valid_data: Optional[S3Artifact] = None,
train_optional_files: Optional[List[str]] = None,
explore_config: Optional[dict] = None,
):
if train_style in ("dp", "dp-dist"):
prep_run_train_op = PrepRunDPTrain(
Expand Down Expand Up @@ -234,7 +236,7 @@ def make_concurrent_learning_op(
"prep-run-diffcsp",
DiffCSPGen,
PrepRelax,
RunRelax,
RunRelaxHDF5 if explore_config["use_hdf5"] else RunRelax, # type: ignore
prep_config=prep_explore_config,
run_config=run_explore_config,
upload_python_packages=upload_python_packages,
Expand Down Expand Up @@ -552,6 +554,7 @@ def workflow_concurrent_learning(
upload_python_packages=upload_python_packages,
valid_data=valid_data,
train_optional_files=train_optional_files,
explore_config=explore_config,
)
scheduler = make_naive_exploration_scheduler(config)

Expand Down
7 changes: 5 additions & 2 deletions dpgen2/exploration/render/traj_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@

import dpdata
import numpy as np
from dflow.python.opio import (
HDF5Dataset,
)

from ..deviation import (
DeviManager,
Expand All @@ -30,7 +33,7 @@ class TrajRender(ABC):
@abstractmethod
def get_model_devi(
self,
files: List[Path],
files: Union[List[Path], List[HDF5Dataset]],
) -> DeviManager:
r"""Get model deviations from recording files.

Expand All @@ -48,7 +51,7 @@ def get_model_devi(
@abstractmethod
def get_confs(
self,
traj: List[Path],
traj: Union[List[Path], List[HDF5Dataset]],
id_selected: List[List[int]],
type_map: Optional[List[str]] = None,
conf_filters: Optional["ConfFilters"] = None,
Expand Down
21 changes: 17 additions & 4 deletions dpgen2/exploration/render/traj_render_lammps.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import json
from io import (
StringIO,
)
from pathlib import (
Path,
)
Expand All @@ -12,6 +15,9 @@

import dpdata
import numpy as np
from dflow.python.opio import (
HDF5Dataset,
)

from dpgen2.utils import (
setup_ele_temp,
Expand Down Expand Up @@ -42,7 +48,7 @@

def get_model_devi(
self,
files: List[Path],
files: Union[List[Path], List[HDF5Dataset]],
) -> DeviManager:
ntraj = len(files)

Expand All @@ -53,7 +59,10 @@
return model_devi

def _load_one_model_devi(self, fname, model_devi):
dd = np.loadtxt(fname)
if isinstance(fname, HDF5Dataset):
dd = fname.get_data()

Check warning on line 63 in dpgen2/exploration/render/traj_render_lammps.py

View check run for this annotation

Codecov / codecov/patch

dpgen2/exploration/render/traj_render_lammps.py#L63

Added line #L63 was not covered by tests
else:
dd = np.loadtxt(fname)
if len(np.shape(dd)) == 1: # In case model-devi.out is 1-dimensional
dd = dd.reshape((1, len(dd)))

Expand Down Expand Up @@ -92,7 +101,7 @@

def get_confs(
self,
trajs: List[Path],
trajs: Union[List[Path], List[HDF5Dataset]],
id_selected: List[List[int]],
type_map: Optional[List[str]] = None,
conf_filters: Optional["ConfFilters"] = None,
Expand All @@ -108,7 +117,11 @@
ms = dpdata.MultiSystems(type_map=type_map)
for ii in range(ntraj):
if len(id_selected[ii]) > 0:
ss = dpdata.System(trajs[ii], fmt=traj_fmt, type_map=type_map)
if isinstance(trajs[ii], HDF5Dataset):
traj = StringIO(trajs[ii].get_data()) # type: ignore

Check warning on line 121 in dpgen2/exploration/render/traj_render_lammps.py

View check run for this annotation

Codecov / codecov/patch

dpgen2/exploration/render/traj_render_lammps.py#L121

Added line #L121 was not covered by tests
else:
traj = trajs[ii]
ss = dpdata.System(traj, fmt=traj_fmt, type_map=type_map)
ss.nopbc = self.nopbc
if ele_temp:
self.set_ele_temp(ss, ele_temp[ii])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,15 @@
List,
Optional,
Tuple,
Union,
)

from dflow.python import (
FatalError,
)
from dflow.python.opio import (
HDF5Dataset,
)

from dpgen2.exploration.report import (
ExplorationReport,
Expand Down Expand Up @@ -67,7 +71,7 @@ def reached_max_iteration(self):
def plan_next_iteration(
self,
report: Optional[ExplorationReport] = None,
trajs: Optional[List[Path]] = None,
trajs: Optional[Union[List[Path], List[HDF5Dataset]]] = None,
) -> Tuple[bool, Optional[BaseExplorationTaskGroup], Optional[ConfSelector]]:
if self.complete():
raise FatalError("Cannot plan because the stage has completed.")
Expand Down
8 changes: 6 additions & 2 deletions dpgen2/exploration/scheduler/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@
List,
Optional,
Tuple,
Union,
)

import numpy as np
from dflow.python import (
FatalError,
)
from dflow.python.opio import (
HDF5Dataset,
)

from dpgen2.exploration.report import (
ExplorationReport,
Expand Down Expand Up @@ -110,7 +114,7 @@ def force_stage_complete(self):
def plan_next_iteration(
self,
report: Optional[ExplorationReport] = None,
trajs: Optional[List[Path]] = None,
trajs: Optional[Union[List[Path], List[HDF5Dataset]]] = None,
) -> Tuple[bool, Optional[ExplorationTaskGroup], Optional[ConfSelector]]:
"""
Make the plan for the next DPGEN iteration.
Expand All @@ -119,7 +123,7 @@ def plan_next_iteration(
----------
report : ExplorationReport
The exploration report of this iteration.
trajs : List[Path]
trajs : Union[List[Path], List[HDF5Dataset]]
A list of configurations generated during the exploration. May be used to generate new configurations for the next iteration.

Returns
Expand Down
11 changes: 7 additions & 4 deletions dpgen2/exploration/scheduler/stage_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
from typing import (
List,
Tuple,
Union,
)

from dflow.python.opio import (
HDF5Dataset,
)

from dpgen2.exploration.report import (
Expand Down Expand Up @@ -87,7 +92,7 @@ def get_reports(self) -> List[ExplorationReport]:
def plan_next_iteration(
self,
report: ExplorationReport,
trajs: List[Path],
trajs: Union[List[Path], List[HDF5Dataset]],
) -> Tuple[bool, ExplorationTaskGroup, ConfSelector]:
"""
Make the plan for the next iteration of the stage.
Expand All @@ -96,11 +101,9 @@ def plan_next_iteration(

Parameters
----------
hist_reports : List[ExplorationReport]
The historical exploration report of the stage. If this is the first iteration of the stage, this list is empty.
report : ExplorationReport
The exploration report of this iteration.
confs : List[Path]
trajs : Union[List[Path], List[HDF5Dataset]]
A list of configurations generated during the exploration. May be used to generate new configurations for the next iteration.

Returns
Expand Down
8 changes: 6 additions & 2 deletions dpgen2/exploration/selector/conf_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,13 @@
Optional,
Set,
Tuple,
Union,
)

import dpdata
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove unused import.

The dpdata module is imported but unused.

Apply this diff to remove the unused import:

-import dpdata
Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
import dpdata
Tools
Ruff

16-16: dpdata imported but unused

Remove unused import: dpdata

(F401)

from dflow.python.opio import (
wanghan-iapcm marked this conversation as resolved.
Show resolved Hide resolved
HDF5Dataset,
)

from dpgen2.exploration.report import (
ExplorationReport,
Expand All @@ -29,8 +33,8 @@ class ConfSelector(ABC):
@abstractmethod
def select(
self,
trajs: List[Path],
model_devis: List[Path],
trajs: Union[List[Path], List[HDF5Dataset]],
model_devis: Union[List[Path], List[HDF5Dataset]],
type_map: Optional[List[str]] = None,
optional_outputs: Optional[List[Path]] = None,
) -> Tuple[List[Path], ExplorationReport]:
Expand Down
8 changes: 6 additions & 2 deletions dpgen2/exploration/selector/conf_selector_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,14 @@
List,
Optional,
Tuple,
Union,
)

import dpdata
import numpy as np
Comment on lines 15 to 16
Copy link

@coderabbitai coderabbitai bot Sep 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove unused imports.

The static analysis tool suggests that the following imports are unused:

  • dpdata (line 15)
  • numpy (line 16)

Please remove these unused imports to keep the code clean.

Tools
Ruff

15-15: dpdata imported but unused

Remove unused import: dpdata

(F401)


16-16: numpy imported but unused

Remove unused import: numpy

(F401)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems ruff hasn't been enabled in the dpgen2

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@njzjz, you're correct that Ruff hasn't been enabled in the dpgen2 repository. If you want to enforce linting rules like removing unused imports, you might consider enabling Ruff in the future. For now, you can disregard the suggestion to remove the unused imports.

from dflow.python.opio import (
HDF5Dataset,
)

from dpgen2.exploration.render import (
TrajRender,
Expand Down Expand Up @@ -52,8 +56,8 @@ def __init__(

def select(
self,
trajs: List[Path],
model_devis: List[Path],
trajs: Union[List[Path], List[HDF5Dataset]],
model_devis: Union[List[Path], List[HDF5Dataset]],
type_map: Optional[List[str]] = None,
optional_outputs: Optional[List[Path]] = None,
) -> Tuple[List[Path], ExplorationReport]:
Expand Down
4 changes: 3 additions & 1 deletion dpgen2/flow/dpgen_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import (
List,
Optional,
Union,
)

import jsonpickle
Expand All @@ -35,6 +36,7 @@
OPIO,
Artifact,
BigParameter,
HDF5Datasets,
OPIOSign,
PythonOPTemplate,
Slices,
Expand Down Expand Up @@ -91,7 +93,7 @@ def get_input_sign(cls):
{
"exploration_scheduler": BigParameter(ExplorationScheduler),
"exploration_report": BigParameter(ExplorationReport),
"trajs": Artifact(List[Path]),
"trajs": Artifact(Union[List[Path], HDF5Datasets]),
}
)

Expand Down
1 change: 1 addition & 0 deletions dpgen2/op/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
)
from .run_relax import (
RunRelax,
RunRelaxHDF5,
)
from .select_confs import (
SelectConfs,
Expand Down
Loading