refactor external calculators; better handle devices

atomind-ai · Jul 11, 2024 · 7cbf186 · 7cbf186
1 parent 0ffedd3
commit 7cbf186
Show file tree

Hide file tree

Showing 10 changed files with 620 additions and 289 deletions.
diff --git a/mlip_arena/models/__init__.py b/mlip_arena/models/__init__.py
@@ -18,37 +18,29 @@ class MLIP(
     PyTorchModelHubMixin,
     tags=["atomistic-simulation", "MLIP"],
 ):
-    def __init__(self, *args, **kwargs) -> None:
-        super().__init__(*args, **kwargs)
-
-
-class ModuleMLIP(MLIP):
-    def __init__(self, model: nn.Module, *args, **kwargs) -> None:
-        super().__init__(*args, **kwargs)
-        self.add_module("model", model)
+    def __init__(self, model: nn.Module) -> None:
+        super().__init__()
+        self.model = model
 
     def forward(self, x):
-        print("Forwarding...")
-        out = self.model(x)
-        print("Forwarded!")
-        return out
-
+        return self.model(x)
 
-class MLIPCalculator(Calculator):
+class MLIPCalculator(MLIP, Calculator):
     name: str
-    # device: torch.device
-    # model: MLIP
     implemented_properties: list[str] = ["energy", "forces", "stress"]
 
     def __init__(
         self,
+        model,
         # ASE Calculator
         restart=None,
         atoms=None,
         directory=".",
-        **kwargs,
+        calculator_kwargs: dict = {},
     ):
-        super().__init__(restart=restart, atoms=atoms, directory=directory, **kwargs)
+        MLIP.__init__(self, model=model)  # Initialize MLIP part
+        Calculator.__init__(self, restart=restart, atoms=atoms, directory=directory, **calculator_kwargs)  # Initialize ASE Calculator part
+        # Additional initialization if needed
         # self.name: str = self.__class__.__name__
         # self.device = device or torch.device(
         #     "cuda" if torch.cuda.is_available() else "cpu"

diff --git a/mlip_arena/models/chgnet.py b/mlip_arena/models/chgnet.py
@@ -7,10 +7,12 @@
 from huggingface_hub import hf_hub_download
 from torch_geometric.data import Data
 
-from mlip_arena.models import MLIP, MLIPCalculator, ModuleMLIP
+from mlip_arena.models import MLIP, MLIPCalculator
 
+# TODO: WIP
 
-class CHGNetCalculator(MLIPCalculator):
+
+class CHGNet(MLIPCalculator):
     def __init__(
         self,
         device: torch.device | None = None,
@@ -19,23 +21,15 @@ def __init__(
         directory=".",
         **kwargs,
     ):
-        super().__init__(restart=restart, atoms=atoms, directory=directory, **kwargs)
-
-        self.name: str = self.__class__.__name__
-
-        fpath = hf_hub_download(
-            repo_id="cyrusyc/mace-universal",
-            subfolder="pretrained",
-            filename="2023-12-12-mace-128-L1_epoch-199.model",
-            revision="main",
-        )
-
         self.device = device or torch.device(
             "cuda" if torch.cuda.is_available() else "cpu"
         )
 
-        self.model = torch.load(fpath, map_location=self.device)
+        super().__init__(
+            model=model, restart=restart, atoms=atoms, directory=directory, **kwargs
+        )
 
+        self.name: str = self.__class__.__name__
         self.implemented_properties = ["energy", "forces", "stress"]
 
     def calculate(

diff --git a/mlip_arena/models/externals.py b/mlip_arena/models/externals.py
@@ -0,0 +1,165 @@
+import os
+import urllib
+from typing import Literal
+
+import torch
+from alignn.ff.ff import AlignnAtomwiseCalculator, get_figshare_model_ff
+from ase import Atoms
+from chgnet.model.dynamics import CHGNetCalculator
+from chgnet.model.model import CHGNet
+from fairchem.core import OCPCalculator
+from mace.calculators import MACECalculator
+
+
+# Avoid circular import
+def get_freer_device() -> torch.device:
+    """Get the GPU with the most free memory, or use MPS if available.
+    s
+        Returns:
+            torch.device: The selected GPU device or MPS.
+
+        Raises:
+            ValueError: If no GPU or MPS is available.
+    """
+    device_count = torch.cuda.device_count()
+    if device_count > 0:
+        # If CUDA GPUs are available, select the one with the most free memory
+        mem_free = [
+            torch.cuda.get_device_properties(i).total_memory
+            - torch.cuda.memory_allocated(i)
+            for i in range(device_count)
+        ]
+        free_gpu_index = mem_free.index(max(mem_free))
+        device = torch.device(f"cuda:{free_gpu_index}")
+        print(
+            f"Selected GPU {device} with {mem_free[free_gpu_index] / 1024**2:.2f} MB free memory from {device_count} GPUs"
+        )
+    elif torch.backends.mps.is_available():
+        # If no CUDA GPUs are available but MPS is, use MPS
+        print("No GPU available. Using MPS.")
+        device = torch.device("mps")
+    else:
+        # Fallback to CPU if neither CUDA GPUs nor MPS are available
+        print("No GPU or MPS available. Using CPU.")
+        device = torch.device("cpu")
+
+    return device
+
+
+class MACE_MP_Medium(MACECalculator):
+    def __init__(self, device=None, default_dtype="float32", **kwargs):
+        checkpoint_url = "http://tinyurl.com/5yyxdm76"
+        cache_dir = os.path.expanduser("~/.cache/mace")
+        checkpoint_url_name = "".join(
+            c for c in os.path.basename(checkpoint_url) if c.isalnum() or c in "_"
+        )
+        cached_model_path = f"{cache_dir}/{checkpoint_url_name}"
+        if not os.path.isfile(cached_model_path):
+            os.makedirs(cache_dir, exist_ok=True)
+            # download and save to disk
+            print(f"Downloading MACE model from {checkpoint_url!r}")
+            _, http_msg = urllib.request.urlretrieve(checkpoint_url, cached_model_path)
+            if "Content-Type: text/html" in http_msg:
+                raise RuntimeError(
+                    f"Model download failed, please check the URL {checkpoint_url}"
+                )
+            print(f"Cached MACE model to {cached_model_path}")
+        model = cached_model_path
+        msg = f"Using Materials Project MACE for MACECalculator with {model}"
+        print(msg)
+
+        device = device or str(get_freer_device())
+
+        super().__init__(
+            model_paths=model, device=device, default_dtype=default_dtype, **kwargs
+        )
+
+
+class CHGNet(CHGNetCalculator):
+    def __init__(
+        self,
+        model: CHGNet | None = None,
+        use_device: str | None = None,
+        stress_weight: float | None = 1 / 160.21766208,
+        on_isolated_atoms: Literal["ignore", "warn", "error"] = "warn",
+        **kwargs,
+    ) -> None:
+        use_device = use_device or str(get_freer_device())
+        super().__init__(
+            model=model,
+            use_device=use_device,
+            stress_weight=stress_weight,
+            on_isolated_atoms=on_isolated_atoms,
+            **kwargs,
+        )
+
+    def calculate(
+        self,
+        atoms: Atoms | None = None,
+        properties: list | None = None,
+        system_changes: list | None = None,
+    ) -> None:
+        super().calculate(atoms, properties, system_changes)
+
+        # for ase.io.write compatibility
+        self.results.pop("crystal_fea", None)
+
+
+class EquiformerV2(OCPCalculator):
+    def __init__(
+        self,
+        model_name="EquiformerV2-lE4-lF100-S2EFS-OC22",
+        local_cache="/tmp/ocp/",
+        cpu=False,
+        seed=0,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            model_name=model_name,
+            local_cache=local_cache,
+            cpu=cpu,
+            seed=0,
+            **kwargs,
+        )
+
+    def calculate(self, atoms: Atoms, properties, system_changes) -> None:
+        super().calculate(atoms, properties, system_changes)
+
+        self.results.update(
+            force=atoms.get_forces(),
+        )
+
+
+class eSCN(OCPCalculator):
+    def __init__(
+        self,
+        model_name="eSCN-L6-M3-Lay20-S2EF-OC20-All+MD",
+        local_cache="/tmp/ocp/",
+        cpu=False,
+        seed=0,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            model_name=model_name,
+            local_cache=local_cache,
+            cpu=cpu,
+            seed=0,
+            **kwargs,
+        )
+
+    def calculate(self, atoms: Atoms, properties, system_changes) -> None:
+        super().calculate(atoms, properties, system_changes)
+
+        self.results.update(
+            force=atoms.get_forces(),
+        )
+
+
+class ALIGNN(AlignnAtomwiseCalculator):
+    def __init__(self, dir_path: str = "/tmp/alignn/", device=None, **kwargs) -> None:
+        model_path = get_figshare_model_ff(dir_path=dir_path)
+        device = device or get_freer_device()
+        super().__init__(model_path=model_path, device=device, **kwargs)
+
+    def calculate(self, atoms, properties=None, system_changes=None):
+        super().calculate(atoms, properties, system_changes)
diff --git a/mlip_arena/models/mace.py b/mlip_arena/models/mace.py
@@ -1,13 +1,10 @@
-from typing import Optional, Tuple
-
-import numpy as np
 import torch
 from ase import Atoms
 from ase.calculators.calculator import all_changes
 from huggingface_hub import hf_hub_download
 from torch_geometric.data import Data
 
-from mlip_arena.models import MLIP, MLIPCalculator, ModuleMLIP
+from mlip_arena.models import MLIPCalculator
 
 
 class MACE_MP_Medium(MLIPCalculator):
@@ -19,9 +16,9 @@ def __init__(
         directory=".",
         **kwargs,
     ):
-        super().__init__(restart=restart, atoms=atoms, directory=directory, **kwargs)
-
-        self.name: str = self.__class__.__name__
+        self.device = device or torch.device(
+            "cuda" if torch.cuda.is_available() else "cpu"
+        )
 
         fpath = hf_hub_download(
             repo_id="cyrusyc/mace-universal",
@@ -30,23 +27,15 @@ def __init__(
             revision="main",
         )
 
-        self.device = device or torch.device(
-            "cuda" if torch.cuda.is_available() else "cpu"
-        )
+        model = torch.load(fpath, map_location=self.device)
 
-        self.model = torch.load(fpath, map_location=self.device)
+        super().__init__(
+            model=model, restart=restart, atoms=atoms, directory=directory, **kwargs
+        )
 
+        self.name: str = self.__class__.__name__
         self.implemented_properties = ["energy", "forces", "stress"]
 
-        # repo_id = f"atomind/{self.__class__.__name__}".lower().replace("_", "-")
-
-        # model = ModuleMLIP(model=model)
-        # model.save_pretrained(
-        #     self.__class__.__name__.lower().replace("_", "-"),
-        #     repo_id=repo_id,
-        #     push_to_hub=True,
-        # )
-
     def calculate(
         self, atoms: Atoms, properties: list[str], system_changes: list = all_changes
     ):

diff --git a/mlip_arena/models/registry.yaml b/mlip_arena/models/registry.yaml
@@ -1,7 +1,7 @@
 
 
 MACE-MP(M):
-  module: mace
+  module: externals
   class: MACE_MP_Medium
   username: cyrusyc # HF username
   last-update: 2024-03-25T14:30:00
@@ -17,6 +17,40 @@ MACE-MP(M):
   doi: https://arxiv.org/abs/2401.00096
   date: 2023-12-29
 
+CHGNet:
+  module: externals
+  class: CHGNet
+  username: cyrusyc
+  last-update: 2024-07-08T00:00:00
+  datetime: 2024-07-08T00:00:00
+  datasets:
+    - atomind/mptrj
+  gpu-tasks:
+    - diatomics
+
+EquiformerV2(OC22):
+  module: externals
+  class: EquiformerV2
+  username: cyrusyc
+  last-update: 2024-07-08T00:00:00
+  datetime: 2024-07-08T00:00:00
+  datasets:
+    - ocp
+  gpu-tasks:
+    - diatomics
+
+eSCN(OC20):
+  module: externals
+  class: eSCN
+  username: cyrusyc
+  last-update: 2024-07-08T00:00:00
+  datetime: 2024-07-08T00:00:00
+  datasets:
+    - ocp
+  gpu-tasks:
+    - diatomics
+
+
 # CHGNet:
 #   module: chgnet
 #   username: cyrusyc