helmholtz-analytics · ClaudiaComito · Nov 26, 2022 · Mar 18, 2023 · Mar 18, 2023 · Mar 29, 2023
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -6,6 +6,7 @@
 - Implementation:
     - [ ] unit tests: all split configurations tested
     - [ ] unit tests: multiple dtypes tested
+    - [ ] **NEW** unit tests: MPS tested (1 MPI process, 1 GPU)
     - [ ] benchmarks: created for new functionality
     - [ ] benchmarks: performance improved or maintained
     - [ ] documentation updated where needed

diff --git a/heat/classification/kneighborsclassifier.py b/heat/classification/kneighborsclassifier.py
@@ -122,7 +122,6 @@ def predict(self, x: DNDarray) -> DNDarray:
         """
         distances = self.effective_metric_(x, self.x)
         _, indices = ht.topk(distances, self.n_neighbors, largest=False)
-
         predictions = self.y[indices.flatten()]
         predictions.balance_()
         predictions = ht.reshape(predictions, (indices.gshape + (self.y.gshape[1],)))

diff --git a/heat/cluster/tests/test_batchparallelclustering.py b/heat/cluster/tests/test_batchparallelclustering.py
@@ -84,14 +84,19 @@ def test_get_and_set_params(self):
             self.assertEqual(10, parallelclusterer.n_clusters)
 
     def test_spherical_clusters(self):
+        if self.is_mps:
+            dtypes = [ht.float32]
+        else:
+            dtypes = [ht.float32, ht.float64]
+
         for ParallelClusterer in [ht.cluster.BatchParallelKMeans, ht.cluster.BatchParallelKMedians]:
             if ParallelClusterer is ht.cluster.BatchParallelKMeans:
                 ppinitkws = ["k-means++"]
             elif ParallelClusterer is ht.cluster.BatchParallelKMedians:
                 ppinitkws = ["k-medians++"]
             for seed in [1, None]:
                 n = 20 * ht.MPI_WORLD.size
-                for dtype in [ht.float32, ht.float64]:
+                for dtype in dtypes:
                     data = create_spherical_dataset(
                         num_samples_cluster=n,
                         radius=1.0,

diff --git a/heat/cluster/tests/test_kmeans.py b/heat/cluster/tests/test_kmeans.py
@@ -100,15 +100,20 @@ def test_spherical_clusters(self):
 
         # different datatype
         n = 20 * ht.MPI_WORLD.size
+        if self.is_mps:
+            # MPS does not support float64
+            dtype = ht.float32
+        else:
+            dtype = ht.float64
         data = create_spherical_dataset(
-            num_samples_cluster=n, radius=1.0, offset=4.0, dtype=ht.float64, random_state=seed
+            num_samples_cluster=n, radius=1.0, offset=4.0, dtype=dtype, random_state=seed
         )
         kmeans = ht.cluster.KMeans(n_clusters=4, init="kmeans++")
         kmeans.fit(data)
         self.assertIsInstance(kmeans.cluster_centers_, ht.DNDarray)
         self.assertEqual(kmeans.cluster_centers_.shape, (4, 3))
 
-        # on Ints (different radius, offset and datatype
+        # on Ints (different radius, offset and datatype)
         data = create_spherical_dataset(
             num_samples_cluster=n, radius=10.0, offset=40.0, dtype=ht.int32, random_state=seed
         )

diff --git a/heat/cluster/tests/test_kmedians.py b/heat/cluster/tests/test_kmedians.py
@@ -100,8 +100,13 @@ def test_spherical_clusters(self):
 
         # different datatype
         n = 20 * ht.MPI_WORLD.size
+        # MPS does not support float64
+        if self.is_mps:
+            dtype = ht.float32
+        else:
+            dtype = ht.float64
         data = create_spherical_dataset(
-            num_samples_cluster=n, radius=1.0, offset=4.0, dtype=ht.float64, random_state=seed
+            num_samples_cluster=n, radius=1.0, offset=4.0, dtype=dtype, random_state=seed
         )
         kmedians = ht.cluster.KMedians(n_clusters=4, init="kmedians++")
         kmedians.fit(data)

diff --git a/heat/cluster/tests/test_kmedoids.py b/heat/cluster/tests/test_kmedoids.py
@@ -103,8 +103,13 @@ def test_spherical_clusters(self):
 
         # different datatype
         n = 20 * ht.MPI_WORLD.size
+        # MPS does not support float64
+        if self.is_mps:
+            dtype = ht.float32
+        else:
+            dtype = ht.float64
         data = create_spherical_dataset(
-            num_samples_cluster=n, radius=1.0, offset=4.0, dtype=ht.float64, random_state=seed
+            num_samples_cluster=n, radius=1.0, offset=4.0, dtype=dtype, random_state=seed
         )
         kmedoid = ht.cluster.KMedoids(n_clusters=4, init="kmedoids++")
         kmedoid.fit(data)

diff --git a/heat/cluster/tests/test_spectral.py b/heat/cluster/tests/test_spectral.py
@@ -2,6 +2,7 @@
 import unittest
 
 import heat as ht
+import torch
 
 from ...core.tests.test_suites.basic_test import TestCase
 
@@ -35,49 +36,51 @@ def test_get_and_set_params(self):
         self.assertEqual(10, spectral.n_clusters)
 
     def test_fit_iris(self):
-        # get some test data
-        iris = ht.load("heat/datasets/iris.csv", sep=";", split=0)
-        m = 10
-        # fit the clusters
-        spectral = ht.cluster.Spectral(
-            n_clusters=3, gamma=1.0, metric="rbf", laplacian="fully_connected", n_lanczos=m
-        )
-        spectral.fit(iris)
-        self.assertIsInstance(spectral.labels_, ht.DNDarray)
+        # skip on MPS, matmul on ComplexFloat not supported as of PyTorch 2.5
+        if not self.is_mps:
+            # get some test data
+            iris = ht.load("heat/datasets/iris.csv", sep=";", split=0)
+            m = 10
+            # fit the clusters
+            spectral = ht.cluster.Spectral(
+                n_clusters=3, gamma=1.0, metric="rbf", laplacian="fully_connected", n_lanczos=m
+            )
+            spectral.fit(iris)
+            self.assertIsInstance(spectral.labels_, ht.DNDarray)
 
-        spectral = ht.cluster.Spectral(
-            metric="euclidean",
-            laplacian="eNeighbour",
-            threshold=0.5,
-            boundary="upper",
-            n_lanczos=m,
-        )
-        labels = spectral.fit_predict(iris)
-        self.assertIsInstance(labels, ht.DNDarray)
+            spectral = ht.cluster.Spectral(
+                metric="euclidean",
+                laplacian="eNeighbour",
+                threshold=0.5,
+                boundary="upper",
+                n_lanczos=m,
+            )
+            labels = spectral.fit_predict(iris)
+            self.assertIsInstance(labels, ht.DNDarray)
 
-        spectral = ht.cluster.Spectral(
-            gamma=0.1,
-            metric="rbf",
-            laplacian="eNeighbour",
-            threshold=0.5,
-            boundary="upper",
-            n_lanczos=m,
-        )
-        labels = spectral.fit_predict(iris)
-        self.assertIsInstance(labels, ht.DNDarray)
+            spectral = ht.cluster.Spectral(
+                gamma=0.1,
+                metric="rbf",
+                laplacian="eNeighbour",
+                threshold=0.5,
+                boundary="upper",
+                n_lanczos=m,
+            )
+            labels = spectral.fit_predict(iris)
+            self.assertIsInstance(labels, ht.DNDarray)
 
-        kmeans = {"kmeans++": "kmeans++", "max_iter": 30, "tol": -1}
-        spectral = ht.cluster.Spectral(
-            n_clusters=3, gamma=1.0, normalize=True, n_lanczos=m, params=kmeans
-        )
-        labels = spectral.fit_predict(iris)
-        self.assertIsInstance(labels, ht.DNDarray)
+            kmeans = {"kmeans++": "kmeans++", "max_iter": 30, "tol": -1}
+            spectral = ht.cluster.Spectral(
+                n_clusters=3, gamma=1.0, normalize=True, n_lanczos=m, params=kmeans
+            )
+            labels = spectral.fit_predict(iris)
+            self.assertIsInstance(labels, ht.DNDarray)
 
-        # Errors
-        with self.assertRaises(NotImplementedError):
-            spectral = ht.cluster.Spectral(metric="ahalanobis", n_lanczos=m)
+            # Errors
+            with self.assertRaises(NotImplementedError):
+                spectral = ht.cluster.Spectral(metric="ahalanobis", n_lanczos=m)
 
-        iris_split = ht.load("heat/datasets/iris.csv", sep=";", split=1)
-        spectral = ht.cluster.Spectral(n_lanczos=20)
-        with self.assertRaises(NotImplementedError):
-            spectral.fit(iris_split)
+            iris_split = ht.load("heat/datasets/iris.csv", sep=";", split=1)
+            spectral = ht.cluster.Spectral(n_lanczos=20)
+            with self.assertRaises(NotImplementedError):
+                spectral.fit(iris_split)
diff --git a/heat/core/_operations.py b/heat/core/_operations.py
@@ -197,6 +197,10 @@ def __get_out_params(target, other=None, map=None):
         sanitation.sanitize_out(out, output_shape, output_split, output_device, output_comm)
         t1, t2 = sanitation.sanitize_distribution(t1, t2, target=out)
 
+    # MPS does not support float64
+    if t1.larray.is_mps and promoted_type == torch.float64:
+        promoted_type = torch.float32
+
     result = operation(t1.larray.to(promoted_type), t2.larray.to(promoted_type), **fn_kwargs)
 
     if out is None and where is True:
@@ -282,6 +286,9 @@ def __cum_op(
 
     if dtype is not None:
         dtype = types.canonical_heat_type(dtype)
+        if x.larray.is_mps and dtype == types.float64:
+            warnings.warn("MPS does not support float64, will cast to float32")
+            dtype = types.float32
 
     if out is not None:
         sanitation.sanitize_out(out, x.shape, x.split, x.device)
@@ -369,6 +376,8 @@ def __local_op(
     # we need floating point numbers here, due to PyTorch only providing sqrt() implementation for float32/64
     if not no_cast:
         promoted_type = types.promote_types(x.dtype, types.float32)
+        if promoted_type is types.float64 and x.device.torch_device.startswith("mps"):
+            promoted_type = types.float32
         torch_type = promoted_type.torch_type()
     else:
         torch_type = x.larray.dtype

diff --git a/heat/core/arithmetics.py b/heat/core/arithmetics.py
@@ -821,6 +821,14 @@ def wrap_cumprod_(a: torch.Tensor, b: int, out=None, dtype=None) -> torch.Tensor
     def wrap_mul_(a: torch.Tensor, b: torch.Tensor, out=None) -> torch.Tensor:
         return a.mul_(b)
 
+    axis = stride_tricks.sanitize_axis(t.shape, axis)
+    if axis is None:
+        raise NotImplementedError("cumprod_ is not implemented for axis=None")
+
+    if not t.is_distributed():
+        t.larray.cumprod_(dim=axis)
+        return t
+
     return _operations.__cum_op(t, wrap_cumprod_, MPI.PROD, wrap_mul_, 1, axis, dtype=None, out=t)
 
 
@@ -891,6 +899,14 @@ def wrap_cumsum_(a: torch.Tensor, b: int, out=None, dtype=None) -> torch.Tensor:
     def wrap_add_(a: torch.Tensor, b: torch.Tensor, out=None) -> torch.Tensor:
         return a.add_(b)
 
+    axis = stride_tricks.sanitize_axis(t.shape, axis)
+    if axis is None:
+        raise NotImplementedError("cumsum_ is not implemented for axis=None")
+
+    if not t.is_distributed():
+        t.larray.cumsum_(dim=axis)
+        return t
+
     return _operations.__cum_op(t, wrap_cumsum_, MPI.SUM, wrap_add_, 0, axis, dtype=None, out=t)
 
 
@@ -1622,8 +1638,8 @@ def wrap_gcd_(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
 
 
 def hypot(
-    a: DNDarray,
-    b: DNDarray,
+    t1: DNDarray,
+    t2: DNDarray,
     /,
     out: Optional[DNDarray] = None,
     *,
@@ -1635,9 +1651,9 @@ def hypot(
 
     Parameters
     ----------
-    a:   DNDarray
+    t1:   DNDarray
          The first input array
-    b:   DNDarray
+    t2:   DNDarray
          the second input array
     out: DNDarray, optional
         The output array. It must have a shape that the inputs broadcast to and matching split axis.
@@ -1656,12 +1672,22 @@ def hypot(
     >>> ht.hypot(a,b)
     DNDarray([2.2361, 3.6056, 3.6056], dtype=ht.float32, device=cpu:0, split=None)
     """
+    # catch int64 operation crash on MPS. TODO: issue still persists in 2.3.0, check 2.4, report to PyTorch
+    t1_ismps = getattr(getattr(t1, "device", "cpu"), "torch_device", "cpu").startswith("mps")
+    t2_ismps = getattr(getattr(t2, "device", "cpu"), "torch_device", "cpu").startswith("mps")
+    if t1_ismps or t2_ismps:
+        t1_isint64 = getattr(t1, "dtype", None) == types.int64
+        t2_isint64 = getattr(t2, "dtype", None) == types.int64
+        if t1_isint64 or t2_isint64:
+            raise TypeError(
+                f"hypot on MPS does not support int64 dtype, got {t1.dtype}, {t2.dtype}"
+            )
+
     try:
-        res = _operations.__binary_op(torch.hypot, a, b, out, where)
+        res = _operations.__binary_op(torch.hypot, t1, t2, out, where)
     except RuntimeError:
         # every other possibility is caught by __binary_op
-        raise TypeError(f"Not implemented for array dtype, got {a.dtype}, {b.dtype}")
-
+        raise TypeError(f"hypot on CPU does not support Int dtype, got {t1.dtype}, {t2.dtype}")
     return res
 
 
@@ -1704,14 +1730,25 @@ def hypot_(t1: DNDarray, t2: DNDarray) -> DNDarray:
     def wrap_hypot_(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
         return a.hypot_(b)
 
+    # catch int64 operation crash on MPS
+    t1_ismps = getattr(getattr(t1, "device", "cpu"), "torch_device", "cpu").startswith("mps")
+    t2_ismps = getattr(getattr(t2, "device", "cpu"), "torch_device", "cpu").startswith("mps")
+    if t1_ismps or t2_ismps:
+        t1_isint64 = getattr(t1, "dtype", None) == types.int64
+        t2_isint64 = getattr(t2, "dtype", None) == types.int64
+        if t1_isint64 or t2_isint64:
+            raise TypeError(
+                f"hypot_ on MPS does not support int64 dtype, got {t1.dtype}, {t2.dtype}"
+            )
+
     try:
         return _operations.__binary_op(wrap_hypot_, t1, t2, out=t1)
     except NotImplementedError:
         raise ValueError(
             f"In-place operation not allowed: operands are distributed along different axes. \n Operand 1 with shape {t1.shape} is split along axis {t1.split}. \n Operand 2 with shape {t2.shape} is split along axis {t2.split}."
         )
     except RuntimeError:
-        raise TypeError(f"Not implemented for array dtype, got {t1.dtype}, {t2.dtype}")
+        raise TypeError(f"hypot on CPU does not support Int dtype, got {t1.dtype}, {t2.dtype}")
 
 
 DNDarray.hypot_ = hypot_

diff --git a/heat/core/devices.py b/heat/core/devices.py
@@ -16,13 +16,13 @@
 
 class Device:
     """
-    Implements a compute device. HeAT can run computations on different compute devices or backends.
+    Implements a compute device. Heat can run computations on different compute devices or backends.
     A device describes the device type and id on which said computation should be carried out.
 
     Parameters
     ----------
     device_type : str
-        Represents HeAT's device name
+        Represents Heat's device name
     device_id : int
         The device id
     torch_device : str
@@ -34,6 +34,8 @@ class Device:
     device(cpu:0)
     >>> ht.Device("gpu", 0, "cuda:0")
     device(gpu:0)
+    >>> ht.Device("gpu", 0, "mps:0") # on Apple M1/M2
+    device(gpu:0)
     """
 
     def __init__(self, device_type: str, device_id: int, torch_device: str):
@@ -133,6 +135,28 @@ def __eq__(self, other: Any) -> bool:
     # the GPU device should be exported as global symbol
     __all__.append("gpu")
 
+elif torch.backends.mps.is_built() and torch.backends.mps.is_available():
+    # Apple MPS available
+    gpu_id = 0
+    # create a new GPU device
+    gpu = Device("gpu", gpu_id, "mps:{}".format(gpu_id))
+    """
+    The standard GPU Device on Apple M1/M2
+
+    Examples
+    --------
+    >>> ht.cpu
+    device(cpu:0)
+    >>> ht.ones((2, 3), device=ht.gpu)
+    DNDarray([[1., 1., 1.],
+          [1., 1., 1.]], dtype=ht.float32, device=mps:0, split=None)
+    """
+    # add a GPU device string
+    __device_mapping[gpu.device_type] = gpu
+    __device_mapping["mps"] = gpu
+    # the GPU device should be exported as global symbol
+    __all__.append("gpu")
+
 
 def get_device() -> Device:
     """