Merge pull request #31 from mir-group/develop

Version 0.3.0
mir-group · May 7, 2021 · 8969264 · 8969264
2 parents 6de1cb9 + 04a1f07
commit 8969264
Show file tree

Hide file tree

Showing 25 changed files with 584 additions and 123 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+md_runs/
 simon_configs/
 .idea/
 .vscode/

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,8 +6,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 Most recent change on the bottom.
 
+## [Unreleased]
+
+## [0.3.0] - 2021-05-07
+### Added
+- Sub-builders can be skipped in `instantiate` by setting them to `None`
+- More flexible model initialization
+- Add MD w/ Nequip-ASE-calculator + run-MD script w/ custom Nose-Hoover
+
+### Changed
+- PBC must be explicit if a cell is provided
+- Training now uses atomic file writes to avoid corruption if interupted
+- `feature_embedding` renamed to `chemical_embedding` in default models
+
+### Fixed
+- `BesselBasis` now works on GPU when `trainable=False`
+- Dataset `extra_fixed_fields` are now added even if `get_data()` returns `AtomicData` objects
+
 ## [0.2.1] - 2021-05-03
 ### Fixed
 - `load_deployed_model` now correctly loads all metadata
 
-## [0.2.0] - 2021-04-30
+## [0.2.0] - 2021-04-30
diff --git a/README.md b/README.md
@@ -32,10 +32,10 @@ pip install git+https://github.com/rusty1s/pytorch_geometric.git
 * Install [e3nn](https://github.com/e3nn/e3nn): 
 
 ```
-pip install git+https://github.com/e3nn/e3nn.git 
+pip install --no-deps git+https://github.com/e3nn/e3nn.git 
 ```
 
-* Install [`opt_einsum_fx`](https://github.com/Linux-cpp-lisp/opt_einsum_fx) for optimized `e3nn` operations:
+* Install [`opt_einsum_fx`](https://github.com/Linux-cpp-lisp/opt_einsum_fx) for optimized `e3nn` operations --- this is very important for performance:
 
 ```bash
 $ git clone https://github.com/Linux-cpp-lisp/opt_einsum_fx.git
@@ -55,7 +55,7 @@ $ pip install .
 $ pip install -U git+https://github.com/Linux-cpp-lisp/pytorch_ema
 ```
 
-* We use [Weights&Biases](https://wandb.ai) to keep track of experiments. This is not a strict requirement, you can use our software without this, but it may make your life easier. If you want to use it, create an account [here](https://wandb.ai) and install it: 
+* We use [Weights&Biases](https://wandb.ai) to keep track of experiments. This is not a strict requirement, you can use our package without this, but it may make your life easier. If you want to use it, create an account [here](https://wandb.ai) and install it: 
 
 ```
 pip install wandb
@@ -78,14 +78,6 @@ pip install pytest
 pytest ./tests
 ```
 
-One some platforms, the installation may complain about the scikit learn installation. If that's the case, specifically install the following scikit-learn version:
-
-```
-pip install -U scikit-learn==0.23.0
-```
-
-That should fix it.
-
 ### Tutorial 
 
 The best way to learn how to use NequIP is [through the tutorial notebook hosted here](https://deepnote.com/project/2412ca93-7ad1-4458-972c-5d5add5a667e) 

diff --git a/configs/example.yaml b/configs/example.yaml
@@ -9,32 +9,39 @@ run_name: example-run
 seed: 0                                                                           # random number seed for numpy and torch
 restart: false                                                                    # set True for a restarted run
 append: false                                                                     # set True if a restarted run should append to the previous log file
-
-default_dtype: float32                                                             # type of float, e.g. float32 and float64
+default_dtype: float32                                                            # type of float, e.g. float32 and float64
 
 # network
 compile_model: False                                                              # whether to compile the constructed model to TorchScript
 num_basis: 8                                                                      # number of basis functions
 r_max: 4.0                                                                        # cutoff radius
 irreps_edge_sh: 0e + 1o + 2e                                                      # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
-feature_irreps_hidden: 32x0o + 32x0e + 16x1o + 16x1e + 8x2o + 8x2e              # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
+chemical_embedding_irreps_out: 32x0e                                               #
+feature_irreps_hidden: 32x0o + 32x0e + 16x1o + 16x1e + 8x2o + 8x2e                # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 BesselBasis_trainable: true                                                       # set true to train the bessel weights
 nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
-num_layers: 3                                                                     # number of interaction blocks, we found 5-6 to work best
+num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 resnet: false                                                                     # set True to make interaction block a resnet-style update
 PolynomialCutoff_p: 6                                                             # p-value used in polynomial cutoff function
 invariant_layers: 1                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 invariant_neurons: 8                                                              # number of hidden neurons in radial function, again keep this small for MD applications, 8 - 32, smaller is faster
 avg_num_neighbors: null                                                           # number of neighbors to divide by, None => no normalization.
 use_sc: true                                                                      # use self-connection or not, usually gives big improvement
-model_uniform_init: false                         # whether to use uniform (instead of normal) initialization for e3nn operations
 
 # to specify different parameters for each convolutional layer, try examples below
-# layer1_use_sc: true                                                             # use "layer{i}_" prefix to specify parameters for only one of the layer, 
-# priority for different definition: 
-#   invariant_neurons < InteractionBlock_invariant_neurons < layer{i}_invariant_neurons 
-
+# layer1_use_sc: true                                                             # use "layer{i}_" prefix to specify parameters for only one of the layer,
+# priority for different definition:
+#   invariant_neurons < InteractionBlock_invariant_neurons < layer{i}_invariant_neurons
+
+
+# how to initialize the weights of the model:
+# this can be the importable name of any function that can be `model.apply`ed to initialize some weights in the model. NequIP provides a number of useful initializers:
+#model_initializers:
+#  - nequip.utils.initialization.uniform_initialize_fcs
+#  - nequip.utils.initialization.uniform_initialize_tps
+#  - nequip.utils.initialization.orthogonal_initialize_linears
+##  - nequip.utils.initialization.uniform_initialize_linears
 
 # whether to apply a shift and scale, defined per-species, to the atomic energies
 PerSpeciesScaleShift_enable: false

diff --git a/configs/minimal.yaml b/configs/minimal.yaml
@@ -7,10 +7,15 @@ seed: 0
 num_basis: 8
 r_max: 4.0
 irreps_edge_sh: 0e + 1o
-conv_to_output_hidden_irreps_out: 16x0o + 16x0e + 16x1o + 16x1e + 16x2o + 16x2e
-feature_irreps_hidden: 16x0o + 16x0e
+conv_to_output_hidden_irreps_out: 16x0e
+feature_irreps_hidden: 16x0o + 16x0e + 16x1o + 16x1e + 16x2o + 16x2e
 model_uniform_init: false
 
+model_initializers:
+  - nequip.utils.initialization.uniform_initialize_fcs
+  - nequip.utils.initialization.uniform_initialize_tps
+  - nequip.utils.initialization.orthogonal_initialize_linears
+
 # data
 dataset: aspirin
 dataset_file_name: benchmark_data/aspirin_ccsd-train.npz

diff --git a/nequip/_version.py b/nequip/_version.py
@@ -2,4 +2,4 @@
 # See Python packaging guide
 # https://packaging.python.org/guides/single-sourcing-package-version/
 
-__version__ = "0.2.1"
+__version__ = "0.3.0"
diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py
@@ -5,7 +5,7 @@
 
 import warnings
 from copy import deepcopy
-from typing import Union, Tuple, Dict
+from typing import Union, Tuple, Dict, Optional
 from collections.abc import Mapping
 
 import numpy as np
@@ -132,7 +132,7 @@ def from_points(
         self_interaction: bool = False,
         strict_self_interaction: bool = True,
         cell=None,
-        pbc: PBC = False,
+        pbc: Optional[PBC] = None,
         **kwargs,
     ):
         """Build neighbor graph from points, optionally with PBC.
@@ -154,6 +154,15 @@ def from_points(
         """
         if pos is None or r_max is None:
             raise ValueError("pos and r_max must be given.")
+
+        if pbc is None:
+            if cell is not None:
+                raise ValueError(
+                    "A cell was provided, but pbc weren't. Please explicitly probide PBC."
+                )
+            # there are no PBC if cell and pbc are not provided
+            pbc = False
+
         if isinstance(pbc, bool):
             pbc = (pbc,) * 3
         else:

diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py
@@ -200,6 +200,8 @@ def process(self):
                 if key in data_list[0]:
                     fixed_fields[key] = data_list[0][key]
 
+            fixed_fields.update(self.extra_fixed_fields)
+
         elif len(data) == 2:
 
             # It's fields and fixed_fields
@@ -376,6 +378,8 @@ def statistics(
 class NpzDataset(AtomicInMemoryDataset):
     """Load data from an npz file.
 
+    To avoid loading unneeded data, keys are ignored by default unless they are in ``key_mapping``, ``npz_keys``, or ``npz_fixed_fields``.
+
     Args:
         file_name (str): file name of the npz file
         key_mapping (Dict[str, str]): mapping of npz keys to ``AtomicData`` keys

diff --git a/nequip/dynamics/__init__.py b/nequip/dynamics/__init__.py
diff --git a/nequip/dynamics/nequip_calculator.py b/nequip/dynamics/nequip_calculator.py
@@ -0,0 +1,72 @@
+from typing import Union
+import torch
+
+from ase.calculators.calculator import Calculator, all_changes
+
+from nequip.data import AtomicData, AtomicDataDict
+import nequip.scripts.deploy
+
+
+class NequIPCalculator(Calculator):
+    """NequIP ASE Calculator."""
+
+    implemented_properties = ["energy", "forces"]
+
+    def __init__(
+        self,
+        model: torch.jit.ScriptModule,
+        r_max: float,
+        device: Union[str, torch.device],
+        energy_units_to_eV: float = 1.0,
+        length_units_to_A: float = 1.0,
+        **kwargs
+    ):
+        Calculator.__init__(self, **kwargs)
+        self.results = {}
+        self.model = model
+        self.r_max = r_max
+        self.device = device
+        self.energy_units_to_eV = energy_units_to_eV
+        self.length_units_to_A = length_units_to_A
+
+    @classmethod
+    def from_deployed_model(
+        cls, model_path, device: Union[str, torch.device] = "cpu", **kwargs
+    ):
+        # load model
+        model, metadata = nequip.scripts.deploy.load_deployed_model(
+            model_path=model_path, device=device
+        )
+        r_max = float(metadata[nequip.scripts.deploy.R_MAX_KEY])
+
+        # build nequip calculator
+        return cls(model=model, r_max=r_max, device=device, **kwargs)
+
+    def calculate(self, atoms=None, properties=["energy"], system_changes=all_changes):
+        """
+        Calculate properties.
+
+        :param atoms: ase.Atoms object
+        :param properties: [str], properties to be computed, used by ASE internally
+        :param system_changes: [str], system changes since last calculation, used by ASE internally
+        :return:
+        """
+        # call to base-class to set atoms attribute
+        Calculator.calculate(self, atoms)
+
+        # prepare data
+        data = AtomicData.from_ase(atoms=atoms, r_max=self.r_max)
+
+        data = data.to(self.device)
+
+        # predict + extract data
+        out = self.model(AtomicData.to_AtomicDataDict(data))
+        forces = out[AtomicDataDict.FORCE_KEY].detach().cpu().numpy()
+        energy = out[AtomicDataDict.TOTAL_ENERGY_KEY].detach().cpu().item()
+
+        # store results
+        self.results = {
+            "energy": energy * self.energy_units_to_eV,
+            # force has units eng / len:
+            "forces": forces * (self.energy_units_to_eV / self.length_units_to_A),
+        }
diff --git a/nequip/dynamics/nosehoover.py b/nequip/dynamics/nosehoover.py
@@ -0,0 +1,115 @@
+""" Custom Nose-Hoover NVT thermostat based on ASE.
+
+This code was originally written by Jonathan Mailoa based on these notes:
+
+    https://www2.ph.ed.ac.uk/~dmarendu/MVP/MVP03.pdf
+
+It was then adapted by Simon Batzner to be used within ASE. Parts of the overall outline of the class are also based on the Langevin class in ASE.
+"""
+
+import numpy as np
+
+from ase.md.md import MolecularDynamics
+from ase.md.velocitydistribution import Stationary, ZeroRotation
+from ase import units
+
+
+class NoseHoover(MolecularDynamics):
+    """Nose-Hoover (constant N, V, T) molecular dynamics.
+
+    Usage: NoseHoover(atoms, dt, temperature)
+
+    atoms
+        The list of atoms.
+
+    timestep
+        The time step.
+
+    temperature
+        Target temperature of the MD run in [K]
+
+    nvt_q
+        Q in the Nose-Hoover equations
+
+    Example Usage:
+
+        nvt_dyn = NoseHoover(
+            atoms=atoms,
+            timestep=0.5 * units.fs,
+            temperature=300.,
+            nvt_q=334.
+        )
+
+    """
+
+    def __init__(
+        self,
+        atoms,
+        timestep,
+        temperature,
+        nvt_q,
+        trajectory=None,
+        logfile=None,
+        loginterval=1,
+        append_trajectory=False,
+    ):
+        # set angular and com momentum to zero, necessary for nose-hoover dynamics.
+        ZeroRotation(atoms)
+        Stationary(atoms)
+
+        # thermostat parameters
+        self.temp = temperature
+        self.nvt_q = nvt_q
+        self.dt = timestep
+        self.dtdt = np.power(self.dt, 2)
+        self.nvt_bath = 0.0
+
+        self.natoms = len(atoms)
+
+        MolecularDynamics.__init__(
+            self,
+            atoms,
+            timestep,
+            trajectory,
+            logfile,
+            loginterval,
+            append_trajectory=append_trajectory,
+        )
+
+    def step(self):
+        """Perform a MD step."""
+        masses = self.atoms.get_masses()
+
+        modified_acc = (
+            self.atoms.get_forces() / masses[:, np.newaxis]
+            - self.nvt_bath * self.atoms.get_velocities()
+        )
+        pos_fullstep = (
+            self.atoms.get_positions()
+            + self.dt * self.atoms.get_velocities()
+            + 0.5 * self.dtdt * modified_acc
+        )
+        vel_halfstep = self.atoms.get_velocities() + 0.5 * self.dt * modified_acc
+
+        self.atoms.set_positions(pos_fullstep)
+
+        e_kin_diff = 0.5 * (
+            np.sum(masses * np.sum(self.atoms.get_velocities() ** 2, axis=1))
+            - (3 * self.natoms + 1) * units.kB * self.temp
+        )
+
+        nvt_bath_halfstep = self.nvt_bath + 0.5 * self.dt * e_kin_diff / self.nvt_q
+        e_kin_diff_halfstep = 0.5 * (
+            np.sum(masses * np.sum(vel_halfstep ** 2, axis=1))
+            - (3 * self.natoms + 1) * units.kB * self.temp
+        )
+        self.nvt_bath = (
+            nvt_bath_halfstep + 0.5 * self.dt * e_kin_diff_halfstep / self.nvt_q
+        )
+        self.atoms.set_velocities(
+            (
+                vel_halfstep
+                + 0.5 * self.dt * (self.atoms.get_forces() / masses[:, np.newaxis])
+            )
+            / (1 + 0.5 * self.dt * self.nvt_bath)
+        )
diff --git a/nequip/models/_eng.py b/nequip/models/_eng.py
@@ -33,7 +33,7 @@ def EnergyModel(**shared_params) -> SequentialGraphNetwork:
         "spharm_edges": SphericalHarmonicEdgeAttrs,
         "radial_basis": RadialBasisEdgeEncoding,
         # -- Embed features --
-        "feature_embedding": AtomwiseLinear,
+        "chemical_embedding": AtomwiseLinear,
     }
 
     # add convnet layers