Skip to content

Commit

Permalink
refactor external calculators; better handle devices
Browse files Browse the repository at this point in the history
  • Loading branch information
chiang-yuan committed Jul 11, 2024
1 parent 0ffedd3 commit 7cbf186
Show file tree
Hide file tree
Showing 10 changed files with 620 additions and 289 deletions.
28 changes: 10 additions & 18 deletions mlip_arena/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,37 +18,29 @@ class MLIP(
PyTorchModelHubMixin,
tags=["atomistic-simulation", "MLIP"],
):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)


class ModuleMLIP(MLIP):
def __init__(self, model: nn.Module, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.add_module("model", model)
def __init__(self, model: nn.Module) -> None:
super().__init__()
self.model = model

def forward(self, x):
print("Forwarding...")
out = self.model(x)
print("Forwarded!")
return out

return self.model(x)

class MLIPCalculator(Calculator):
class MLIPCalculator(MLIP, Calculator):
name: str
# device: torch.device
# model: MLIP
implemented_properties: list[str] = ["energy", "forces", "stress"]

def __init__(
self,
model,
# ASE Calculator
restart=None,
atoms=None,
directory=".",
**kwargs,
calculator_kwargs: dict = {},
):
super().__init__(restart=restart, atoms=atoms, directory=directory, **kwargs)
MLIP.__init__(self, model=model) # Initialize MLIP part
Calculator.__init__(self, restart=restart, atoms=atoms, directory=directory, **calculator_kwargs) # Initialize ASE Calculator part
# Additional initialization if needed
# self.name: str = self.__class__.__name__
# self.device = device or torch.device(
# "cuda" if torch.cuda.is_available() else "cpu"
Expand Down
22 changes: 8 additions & 14 deletions mlip_arena/models/chgnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
from huggingface_hub import hf_hub_download
from torch_geometric.data import Data

from mlip_arena.models import MLIP, MLIPCalculator, ModuleMLIP
from mlip_arena.models import MLIP, MLIPCalculator

# TODO: WIP

class CHGNetCalculator(MLIPCalculator):

class CHGNet(MLIPCalculator):
def __init__(
self,
device: torch.device | None = None,
Expand All @@ -19,23 +21,15 @@ def __init__(
directory=".",
**kwargs,
):
super().__init__(restart=restart, atoms=atoms, directory=directory, **kwargs)

self.name: str = self.__class__.__name__

fpath = hf_hub_download(
repo_id="cyrusyc/mace-universal",
subfolder="pretrained",
filename="2023-12-12-mace-128-L1_epoch-199.model",
revision="main",
)

self.device = device or torch.device(
"cuda" if torch.cuda.is_available() else "cpu"
)

self.model = torch.load(fpath, map_location=self.device)
super().__init__(
model=model, restart=restart, atoms=atoms, directory=directory, **kwargs
)

self.name: str = self.__class__.__name__
self.implemented_properties = ["energy", "forces", "stress"]

def calculate(
Expand Down
165 changes: 165 additions & 0 deletions mlip_arena/models/externals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import os
import urllib
from typing import Literal

import torch
from alignn.ff.ff import AlignnAtomwiseCalculator, get_figshare_model_ff
from ase import Atoms
from chgnet.model.dynamics import CHGNetCalculator
from chgnet.model.model import CHGNet
from fairchem.core import OCPCalculator
from mace.calculators import MACECalculator


# Avoid circular import
def get_freer_device() -> torch.device:
"""Get the GPU with the most free memory, or use MPS if available.
s
Returns:
torch.device: The selected GPU device or MPS.
Raises:
ValueError: If no GPU or MPS is available.
"""
device_count = torch.cuda.device_count()
if device_count > 0:
# If CUDA GPUs are available, select the one with the most free memory
mem_free = [
torch.cuda.get_device_properties(i).total_memory
- torch.cuda.memory_allocated(i)
for i in range(device_count)
]
free_gpu_index = mem_free.index(max(mem_free))
device = torch.device(f"cuda:{free_gpu_index}")
print(
f"Selected GPU {device} with {mem_free[free_gpu_index] / 1024**2:.2f} MB free memory from {device_count} GPUs"
)
elif torch.backends.mps.is_available():
# If no CUDA GPUs are available but MPS is, use MPS
print("No GPU available. Using MPS.")
device = torch.device("mps")
else:
# Fallback to CPU if neither CUDA GPUs nor MPS are available
print("No GPU or MPS available. Using CPU.")
device = torch.device("cpu")

return device


class MACE_MP_Medium(MACECalculator):
def __init__(self, device=None, default_dtype="float32", **kwargs):
checkpoint_url = "http://tinyurl.com/5yyxdm76"
cache_dir = os.path.expanduser("~/.cache/mace")
checkpoint_url_name = "".join(
c for c in os.path.basename(checkpoint_url) if c.isalnum() or c in "_"
)
cached_model_path = f"{cache_dir}/{checkpoint_url_name}"
if not os.path.isfile(cached_model_path):
os.makedirs(cache_dir, exist_ok=True)
# download and save to disk
print(f"Downloading MACE model from {checkpoint_url!r}")
_, http_msg = urllib.request.urlretrieve(checkpoint_url, cached_model_path)
if "Content-Type: text/html" in http_msg:
raise RuntimeError(
f"Model download failed, please check the URL {checkpoint_url}"
)
print(f"Cached MACE model to {cached_model_path}")
model = cached_model_path
msg = f"Using Materials Project MACE for MACECalculator with {model}"
print(msg)

device = device or str(get_freer_device())

super().__init__(
model_paths=model, device=device, default_dtype=default_dtype, **kwargs
)


class CHGNet(CHGNetCalculator):
def __init__(
self,
model: CHGNet | None = None,
use_device: str | None = None,
stress_weight: float | None = 1 / 160.21766208,
on_isolated_atoms: Literal["ignore", "warn", "error"] = "warn",
**kwargs,
) -> None:
use_device = use_device or str(get_freer_device())
super().__init__(
model=model,
use_device=use_device,
stress_weight=stress_weight,
on_isolated_atoms=on_isolated_atoms,
**kwargs,
)

def calculate(
self,
atoms: Atoms | None = None,
properties: list | None = None,
system_changes: list | None = None,
) -> None:
super().calculate(atoms, properties, system_changes)

# for ase.io.write compatibility
self.results.pop("crystal_fea", None)


class EquiformerV2(OCPCalculator):
def __init__(
self,
model_name="EquiformerV2-lE4-lF100-S2EFS-OC22",
local_cache="/tmp/ocp/",
cpu=False,
seed=0,
**kwargs,
) -> None:
super().__init__(
model_name=model_name,
local_cache=local_cache,
cpu=cpu,
seed=0,
**kwargs,
)

def calculate(self, atoms: Atoms, properties, system_changes) -> None:
super().calculate(atoms, properties, system_changes)

self.results.update(
force=atoms.get_forces(),
)


class eSCN(OCPCalculator):
def __init__(
self,
model_name="eSCN-L6-M3-Lay20-S2EF-OC20-All+MD",
local_cache="/tmp/ocp/",
cpu=False,
seed=0,
**kwargs,
) -> None:
super().__init__(
model_name=model_name,
local_cache=local_cache,
cpu=cpu,
seed=0,
**kwargs,
)

def calculate(self, atoms: Atoms, properties, system_changes) -> None:
super().calculate(atoms, properties, system_changes)

self.results.update(
force=atoms.get_forces(),
)


class ALIGNN(AlignnAtomwiseCalculator):
def __init__(self, dir_path: str = "/tmp/alignn/", device=None, **kwargs) -> None:
model_path = get_figshare_model_ff(dir_path=dir_path)
device = device or get_freer_device()
super().__init__(model_path=model_path, device=device, **kwargs)

def calculate(self, atoms, properties=None, system_changes=None):
super().calculate(atoms, properties, system_changes)
29 changes: 9 additions & 20 deletions mlip_arena/models/mace.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
from typing import Optional, Tuple

import numpy as np
import torch
from ase import Atoms
from ase.calculators.calculator import all_changes
from huggingface_hub import hf_hub_download
from torch_geometric.data import Data

from mlip_arena.models import MLIP, MLIPCalculator, ModuleMLIP
from mlip_arena.models import MLIPCalculator


class MACE_MP_Medium(MLIPCalculator):
Expand All @@ -19,9 +16,9 @@ def __init__(
directory=".",
**kwargs,
):
super().__init__(restart=restart, atoms=atoms, directory=directory, **kwargs)

self.name: str = self.__class__.__name__
self.device = device or torch.device(
"cuda" if torch.cuda.is_available() else "cpu"
)

fpath = hf_hub_download(
repo_id="cyrusyc/mace-universal",
Expand All @@ -30,23 +27,15 @@ def __init__(
revision="main",
)

self.device = device or torch.device(
"cuda" if torch.cuda.is_available() else "cpu"
)
model = torch.load(fpath, map_location=self.device)

self.model = torch.load(fpath, map_location=self.device)
super().__init__(
model=model, restart=restart, atoms=atoms, directory=directory, **kwargs
)

self.name: str = self.__class__.__name__
self.implemented_properties = ["energy", "forces", "stress"]

# repo_id = f"atomind/{self.__class__.__name__}".lower().replace("_", "-")

# model = ModuleMLIP(model=model)
# model.save_pretrained(
# self.__class__.__name__.lower().replace("_", "-"),
# repo_id=repo_id,
# push_to_hub=True,
# )

def calculate(
self, atoms: Atoms, properties: list[str], system_changes: list = all_changes
):
Expand Down
36 changes: 35 additions & 1 deletion mlip_arena/models/registry.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@


MACE-MP(M):
module: mace
module: externals
class: MACE_MP_Medium
username: cyrusyc # HF username
last-update: 2024-03-25T14:30:00
Expand All @@ -17,6 +17,40 @@ MACE-MP(M):
doi: https://arxiv.org/abs/2401.00096
date: 2023-12-29

CHGNet:
module: externals
class: CHGNet
username: cyrusyc
last-update: 2024-07-08T00:00:00
datetime: 2024-07-08T00:00:00
datasets:
- atomind/mptrj
gpu-tasks:
- diatomics

EquiformerV2(OC22):
module: externals
class: EquiformerV2
username: cyrusyc
last-update: 2024-07-08T00:00:00
datetime: 2024-07-08T00:00:00
datasets:
- ocp
gpu-tasks:
- diatomics

eSCN(OC20):
module: externals
class: eSCN
username: cyrusyc
last-update: 2024-07-08T00:00:00
datetime: 2024-07-08T00:00:00
datasets:
- ocp
gpu-tasks:
- diatomics


# CHGNet:
# module: chgnet
# username: cyrusyc
Expand Down
Loading

0 comments on commit 7cbf186

Please sign in to comment.