Merge pull request #35 from thorben-frank/v1.0-lrs-gems

V1.0 lrs gems
thorben-frank · Jan 22, 2025 · 6c88d3f · 6c88d3f
2 parents 6ae8d8b + 0b929bd
commit 6c88d3f
Show file tree

Hide file tree

Showing 35 changed files with 3,069 additions and 869 deletions.
diff --git a/_MANIFEST.in b/_MANIFEST.in
@@ -0,0 +1,2 @@
+graft mlff/sph_ops/cgmatrix.npz
+graft mlff/sph_ops/u_matrix.pickle
diff --git a/setup.py → _setup.py b/setup.py → _setup.py
@@ -8,7 +8,7 @@
     packages=find_packages(),
     install_requires=[
         "numpy",
-        "clu",
+        "clu == 0.0.9",
         # "jax == 0.4.8",
         "e3x",
         "flax",
@@ -52,3 +52,51 @@
         ],
     },
 )
+
+# [build-system]
+# requires = ["setuptools>=42", "wheel"]
+# build-backend = "setuptools.build_meta"
+#
+# [project]
+# name = "mlff"
+# version = "1.0"
+# description = "Build Neural Networks for Force Fields with JAX"
+# requires-python = ">=3.9"
+# dependencies = [
+#     "numpy",
+#     "clu == 0.0.9",
+#     "e3x",
+#     "flax",
+#     "jaxopt",
+#     "jraph",
+#     "optax",
+#     "orbax-checkpoint",
+#     "portpicker",
+#     "pandas",
+#     "scikit-learn",
+#     "ase",
+#     "tqdm",
+#     "wandb",
+#     "pyyaml",
+#     "pytest",
+#     "h5py",
+#     "ml_collections"
+# ]
+#
+# [project.scripts]
+# evaluate = "mlff.cAPI.mlff_eval:evaluate"
+# train = "mlff.cAPI.mlff_train:train"
+# run_md = "mlff.cAPI.mlff_md:run_md"
+# run_relaxation = "mlff.cAPI.mlff_structure_relaxation:run_relaxation"
+# analyse_md = "mlff.cAPI.mlff_analyse:analyse_md"
+# train_so3krates = "mlff.cAPI.mlff_train_so3krates:train_so3krates"
+# train_so3kratACE = "mlff.cAPI.mlff_train_so3kratace:train_so3kratace"
+# trajectory_to_xyz = "mlff.cAPI.mlff_postprocessing:trajectory_to_xyz"
+# to_mlff_input = "mlff.cAPI.mlff_input_processing:to_mlff_input"
+# train_so3krates_sparse = "mlff.CLI.run_training:train_so3krates_sparse"
+# train_itp_net = "mlff.CLI.run_training_itp_net:train_itp_net"
+# evaluate_itp_net = "mlff.CLI.run_evaluation_itp_net:evaluate_itp_net"
+# evaluate_itp_net_on = "mlff.CLI.run_evaluation_itp_net_on:evaluate_itp_net_on"
+# fine_tune_so3krates_sparse = "mlff.CLI.run_fine_tuning:fine_tune_so3krates_sparse"
+# evaluate_so3krates_sparse = "mlff.CLI.run_evaluation:evaluate_so3krates_sparse"
+# evaluate_so3krates_sparse_on = "mlff.CLI.run_evaluation_on:evaluate_so3krates_sparse_on"
diff --git a/mlff/CLI/run_evaluation.py b/mlff/CLI/run_evaluation.py
@@ -37,6 +37,7 @@ def evaluate_so3krates_sparse():
         pick_idx=pick_idx,
         on_split=args.on_split
     )
+    print('Metrics are reported in eV and Angstrom.')
     print(metrics)
 
 

diff --git a/mlff/CLI/run_evaluation_itp_net_on.py b/mlff/CLI/run_evaluation_itp_net_on.py
@@ -53,6 +53,14 @@ def evaluate_itp_net_on():
         help='How many edges to put in a batch. If not set is determined from max_num_graphs and max_num_edges in '
              '--datafile.'
     )
+    parser.add_argument( #TODO: remove --max_num_pairs
+        "--max_num_pairs",
+        type=int,
+        default=None,
+        required=False,
+        help='How many edges to put in a batch. If not set is determined from max_num_graphs and max_num_edges in '
+             '--datafile.'
+    )
     parser.add_argument(
         '--num_test',
         type=int,
@@ -95,6 +103,7 @@ def evaluate_itp_net_on():
     cfg.training.batch_max_num_graphs = args.max_num_graphs
     cfg.training.batch_max_num_edges = args.max_num_edges
     cfg.training.batch_max_num_nodes = args.max_num_nodes
+    cfg.training.batch_max_num_pairs = args.max_num_pairs
 
     if args.write_batch_metrics_to is not None and cfg.training.batch_max_num_graphs > 2:
         raise ValueError(

diff --git a/mlff/CLI/run_evaluation_on.py b/mlff/CLI/run_evaluation_on.py
@@ -3,6 +3,7 @@
 from mlff.config import from_config
 from ml_collections import config_dict
 import pathlib
+from typing import Optional, Sequence
 
 
 def evaluate_so3krates_sparse_on():
@@ -70,6 +71,14 @@ def evaluate_so3krates_sparse_on():
                              '`batch_max_num_nodes = 2` which allows one graph per batch, following the `jraph` logic '
                              'that one graph in used as padding graph.'
                         )
+    # parser.add_argument(
+    #     '--testing_targets',
+    #     type=str,
+    #     required=False,
+    #     nargs='+',
+    #     default=['forces'],
+    #     help='Targets for which the metrics should be calculated. Defaults to `forces`.'
+    # )
     args = parser.parse_args()
 
     if args.num_test is not None and args.write_batch_metrics_to is not None:
@@ -119,9 +128,10 @@ def evaluate_so3krates_sparse_on():
         config=cfg,
         num_test=args.num_test,
         pick_idx=None,
-        write_batch_metrics_to=write_batch_metrics_to
-    )
-    print(metrics)
+        write_batch_metrics_to=write_batch_metrics_to,
+        # testing_targets=args.testing_targets
+        )
+    print(f"metrics: {metrics}")
 
 
 if __name__ == '__main__':

diff --git a/mlff/config/config.yaml b/mlff/config/config.yaml
@@ -5,10 +5,14 @@ data:
   length_unit: Angstrom  # Length unit. Is converted to the data set units internally.
   shift_mode: null  # Options are null, mean, custom.
   energy_shifts: null  # Energy shifts to subtract.
-  split_seed: 0  # Seed using for splitting the data into training, validation and test.
+  neighbors_lr_bool: false  # Calculate long-range neighborhood indices. Required for modules like DispersionEnergy.
+  neighbors_lr_cutoff: null  # Cutoff for calculating the long-range neighborhoods. Note that it is not required to be
+  # equal to the lr_cutoff of the model. E.g. one can calculate neighbors up to 50 Ang and use no long-range cutoff
+  # in the model during training at all. More details see README.
   filter:
     min_distance: 0.75  # Minimal allowed distance in Angstrom. Is converted to the data set units internally.
     max_force: 25.  # Maximal allowed force component in eV/Angstrom. Is converted to the data set units internally.
+  split_seed: 0  # Seed using for splitting the data into training, validation and test.
 model:
   num_layers: 2  # Number of message passing layers.
   num_features: 128  # Number of invariant features.
@@ -21,6 +25,8 @@ model:
     - 4
   cutoff: 5.0  # Local cutoff to use.
   cutoff_fn: cosine  # Cutoff function to use.
+  cutoff_lr: null  # Long-range cutoff used in the long range modules. It is possible to use no long range cutoff in
+  # the modules. Details see README.
   num_radial_basis_fn: 32  # Number of radial basis functions.
   radial_basis_fn: physnet  # Radial basis function to use.
   activation_fn: silu  # Activation function used in the MLPs.
@@ -39,6 +45,12 @@ model:
   energy_learn_atomic_type_scales: false
   energy_learn_atomic_type_shifts: false
   input_convention: positions  # Input convention.
+  electrostatic_energy_bool: false
+  electrostatic_energy_scale: 1.0
+  dispersion_energy_bool: false
+  dispersion_energy_cutoff_lr_damping: null
+  dispersion_energy_scale: 1.0
+  zbl_repulsion_bool: true
 optimizer:
   name: adam  # Name of the optimizer. See https://optax.readthedocs.io/en/latest/api.html#common-optimizers for available ones.
   optimizer_args: null
@@ -57,6 +69,7 @@ training:
   num_valid: 50  # Number of validation points to draw from data.filepath.
   batch_max_num_nodes: null  # Maximal number of nodes per batch. Must be at least maximal number of atoms + 1 in the data set.
   batch_max_num_edges: null  # Maximal number of edges per batch. Must be at least maximal number of edges + 1 in the data set.
+  batch_max_num_pairs: null
   # If batch_max_num_nodes and batch_max_num_edges is set to null, they will be determined from the max_num_of_graphs.
   # If they are set to values, each batch will contain as many molecular structures/graphs such none of the three values
   # batch_max_num_nodes, batch_max_num_edges and batch_max_num_of_graphs is exceeded.
@@ -66,6 +79,8 @@ training:
   loss_weights:
     energy: 0.01  # Loss weight for the energy.
     forces: 0.99  # Loss weight for the forces.
+    dipole_vec: 0.01 
+    hirshfeld_ratios: 0.01
   model_seed: 0  # Seed used for the initialization of the model parameters.
   training_seed: 0  # Seed used for shuffling the batches during training.
   log_gradient_values: False  # Log the norm of the gradients for each set of weights.

diff --git a/mlff/config/config_itp_net.yaml b/mlff/config/config_itp_net.yaml
@@ -39,6 +39,11 @@ model:
   energy_learn_atomic_type_scales: false
   energy_learn_atomic_type_shifts: false
   input_convention: positions  # Input convention.
+  electrostatic_energy_bool: false
+  electrostatic_energy_scale: 1.0
+  dispersion_energy_bool: false
+  dispersion_energy_scale: 1.0
+  zbl_repulsion_bool: true
 optimizer:
   name: adam  # Name of the optimizer. See https://optax.readthedocs.io/en/latest/api.html#common-optimizers for available ones.
   optimizer_args: null
@@ -57,6 +62,7 @@ training:
   num_valid: 50  # Number of validation points to draw from data.filepath.
   batch_max_num_nodes: null  # Maximal number of nodes per batch. Must be at least maximal number of atoms + 1 in the data set.
   batch_max_num_edges: null  # Maximal number of edges per batch. Must be at least maximal number of edges + 1 in the data set.
+  batch_max_num_pairs: null
   # If batch_max_num_nodes and batch_max_num_edges is set to null, they will be determined from the max_num_of_graphs.
   # If they are set to values, each batch will contain as many molecular structures/graphs such none of the three values
   # batch_max_num_nodes, batch_max_num_edges and batch_max_num_of_graphs is exceeded.
@@ -66,6 +72,8 @@ training:
   loss_weights:
     energy: 0.01  # Loss weight for the energy.
     forces: 0.99  # Loss weight for the forces.
+    dipole_vec: 0.01 
+    hirshfeld_ratios: 0.01
   model_seed: 0  # Seed used for the initialization of the model parameters.
   training_seed: 0  # Seed used for shuffling the batches during training.
   log_gradient_values: False  # Log the norm of the gradients for each set of weights.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		graft mlff/sph_ops/cgmatrix.npz
		graft mlff/sph_ops/u_matrix.pickle