diff --git a/.github/ISSUE_TEMPLATE/vulnerability.yml b/.github/ISSUE_TEMPLATE/vulnerability.yml
index be07e18a75..e264c89fbb 100644
--- a/.github/ISSUE_TEMPLATE/vulnerability.yml
+++ b/.github/ISSUE_TEMPLATE/vulnerability.yml
@@ -1,4 +1,4 @@
-name: "Vulnerability Report"
+name: "\U0001F6A8 Vulnerability Report"
 description: Report a security vulnerability in our project.
 title: "[VULNERABILITY]:  "
 labels: ["security, High Priority"]
@@ -14,50 +14,50 @@ body:
     attributes:
       label: Affected Version(s)
       description: List the affected versions of the library.
-      validations:
-        required: true
+    validations:
+      required: true
   - type: textarea
     id: severity
     attributes:
       label: Severity
       description: Specify the severity of the vulnerability (e.g., Low/Medium/High/Critical).
-      validations:
-        required: true
+    validations:
+      required: true
   - type: textarea
     id: description
     attributes:
       label: Description
       description: Provide a clear and concise description of the security vulnerability.
-      validations:
-        required: true
+    validations:
+      required: true
   - type: textarea
     id: steps-to-reproduce
     attributes:
       label: Steps to Reproduce
       description: Outline the steps to reproduce the vulnerability, including any relevant code snippets or configuration settings.
-      validations:
-        required: true
+    validations:
+      required: true
   - type: textarea
     id: expected-behavior
     attributes:
       label: Expected Behavior
       description: Explain what you expected to happen when following the steps above.
-      validations:
-        required: true
+    validations:
+      required: true
   - type: textarea
     id: actual-behavior
     attributes:
       label: Actual Behavior
       description: Describe what actually happened when you followed the steps above, highlighting the security issue.
-      validations:
-        required: true
+    validations:
+      required: true
   - type: textarea
     id: impact
     attributes:
       label: Impact
       description: Discuss the potential impact of this vulnerability, including any possible consequences or risks associated with its exploitation.
-      validations:
-        required: true
+    validations:
+      required: true
   - type: textarea
     id: proof-of-concept
     attributes:
diff --git a/.github/workflows/bench_trigger.yml b/.github/workflows/bench_trigger.yml
index deb06e8acf..be12d13c7c 100644
--- a/.github/workflows/bench_trigger.yml
+++ b/.github/workflows/bench_trigger.yml
@@ -28,7 +28,6 @@ jobs:
             -F "variables[PR]=${{ github.event.pull_request.number }}" \
             -F "variables[AUTHOR]=${{ github.event.pull_request.assignee.login }}" \
             https://codebase.helmholtz.cloud/api/v4/projects/7930/trigger/pipeline
-          echo sha
       - name: Trigger benchmarks (Push main)
         id: setup_push
         if: ${{ github.event_name == 'push' }}
@@ -40,9 +39,11 @@ jobs:
             -F "ref=main" \
             -F "variables[SHA]=$GITHUB_SHA" \
             -F "variables[SHORT_SHA]=${SHORT_SHA}" \
+            -F "variables[BRANCH]=main" \
             -F "variables[AUTHOR]=${{ github.event.head_commit.committer.username }}" \
             https://codebase.helmholtz.cloud/api/v4/projects/7930/trigger/pipeline
       - name: Create status
+        if: ${{ steps.setup_pr.outcome == 'success' || steps.setup_push.outcome == 'success'}}
         run: |
           curl -L -X POST \
             -H "Accept: application/vnd.github+json" \
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index 97ffcaa70f..befc6b0336 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -5,17 +5,17 @@ on:
             heat_version:
                 description: 'Heat version'
                 required: true
-                default: '1.2.2'
+                default: 'latest'
                 type: string
             pytorch_img:
                 description: 'Base PyTorch Img'
                 required: true
-                default: '23.03-py3'
+                default: '23.05-py3'
                 type: string
             name:
                 description: 'Output Image name'
                 required: true
-                default: 'heat:1.2.2_torch1.13_cu12.1'
+                default: 'heat:1.3.0_torch2.0.0_cu12.1'
                 type: string
 jobs:
     build-and-push-img:
@@ -43,7 +43,7 @@ jobs:
               name: Build
               uses: docker/build-push-action@v4
               with:
-                context: docker/
+                file: docker/Dockerfile.release
                 build-args: |
                     HEAT_VERSION=${{ inputs.heat_version }}
                     PYTORCH_IMG=${{ inputs.pytorch_img}}
@@ -59,7 +59,7 @@ jobs:
               name: Build and push
               uses: docker/build-push-action@v4
               with:
-                context: docker/
+                file: docker/Dockerfile.release
                 build-args: |
                     HEAT_VERSION=${{ inputs.heat_version }}
                     PYTORCH_IMG=${{ inputs.pytorch_img}}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ce57d1537e..c10c063e92 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,17 +2,21 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v2.0.0
+    rev: v4.5.0
     hooks:
     -   id: trailing-whitespace
     -   id: end-of-file-fixer
     -   id: check-yaml
     -   id: check-added-large-files
-    -   id: flake8
--   repo: https://github.com/psf/black
-    rev: 23.9.1
+    -   id: check-toml
+-   repo: https://github.com/psf/black-pre-commit-mirror
+    rev: 23.10.1
     hooks:
     -   id: black
+-   repo: https://github.com/PyCQA/flake8
+    rev: 6.1.0
+    hooks:
+      - id: flake8
 -   repo: https://github.com/pycqa/pydocstyle
     rev: 6.3.0  # pick a git hash / tag to point to
     hooks:
diff --git a/README.md b/README.md
index 062dc9b415..f19e8dd99c 100644
--- a/README.md
+++ b/README.md
@@ -14,151 +14,165 @@ Heat is a distributed tensor framework for high performance data analytics.
 [![license: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 [![PyPI Version](https://img.shields.io/pypi/v/heat)](https://pypi.org/project/heat/)
 [![Downloads](https://pepy.tech/badge/heat)](https://pepy.tech/project/heat)
+[![Anaconda-Server Badge](https://anaconda.org/conda-forge/heat/badges/version.svg)](https://anaconda.org/conda-forge/heat)
 [![fair-software.eu](https://img.shields.io/badge/fair--software.eu-%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F-green)](https://fair-software.eu)
 [![OpenSSF Best Practices](https://bestpractices.coreinfrastructure.org/projects/7688/badge)](https://bestpractices.coreinfrastructure.org/projects/7688)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2531472.svg)](https://doi.org/10.5281/zenodo.2531472)
 [![Benchmarks](https://img.shields.io/badge/Github--Pages-Benchmarks-2ea44f)](https://helmholtz-analytics.github.io/heat/dev/bench)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 
-# Goals
+# Table of Contents
+  - [What is Heat for?](#what-is-heat-for)
+  - [Features](#features)
+  - [Getting Started](#getting-started)
+  - [Installation](#installation)
+    - [Requirements](#requirements)
+    - [pip](#pip)
+    - [conda](#conda)
+  - [Support Channels](#support-channels)
+  - [Contribution guidelines](#contribution-guidelines)
+    - [Resources](#resources)
+  - [License](#license)
+  - [Citing Heat](#citing-heat)
+  - [FAQ](#faq)
+  - [Acknowledgements](#acknowledgements)
 
-Heat is a flexible and seamless open-source software for high performance data
-analytics and machine learning. It provides highly optimized algorithms and data
-structures for tensor computations using CPUs, GPUs, and distributed cluster
-systems on top of MPI. The goal of Heat is to fill the gap between data
-analytics and machine learning libraries with a strong focus on single-node
-performance, and traditional high-performance computing (HPC). Heat's generic
-Python-first programming interface integrates seamlessly with the existing data
-science ecosystem and makes it as effortless as using numpy to write scalable
-scientific and data science applications.
 
-Heat allows you to tackle your actual Big Data challenges that go beyond the
-computational and memory needs of your laptop and desktop.
+# What is Heat for?
+
+Heat builds on [PyTorch](https://pytorch.org/) and [mpi4py](https://mpi4py.readthedocs.io) to provide high-performance computing infrastructure for memory-intensive applications within the NumPy/SciPy ecosystem.
+
+
+With Heat you can:
+- port existing NumPy/SciPy code from single-CPU to multi-node clusters with minimal coding effort;
+- exploit the entire, cumulative RAM of your many nodes for memory-intensive operations and algorithms;
+- run your NumPy/SciPy code on GPUs (CUDA, ROCm, coming up: Apple MPS).
+
+For a example that highlights the benefits of multi-node parallelism, hardware acceleration, and how easy this can be done with the help of Heat, see, e.g., our [blog post on trucated SVD of a 200GB data set](https://helmholtz-analytics.github.io/heat/2023/06/16/new-feature-hsvd.html).
+
+Check out our [coverage tables](coverage_tables.md) to see which NumPy, SciPy, scikit-learn functions are already supported.
+
+ If you need a functionality that is not yet supported:
+  - [search existing issues](https://github.com/helmholtz-analytics/heat/issues) and make sure to leave a comment if someone else already requested it;
+  - [open a new issue](https://github.com/helmholtz-analytics/heat/issues/new/choose).
+
+
+Check out our [features](#features) and the [Heat API Reference](https://heat.readthedocs.io/en/latest/autoapi/index.html) for a complete list of functionalities.
 
 # Features
 
-* High-performance n-dimensional tensors
+* High-performance n-dimensional arrays
 * CPU, GPU, and distributed computation using MPI
 * Powerful data analytics and machine learning methods
-* Abstracted communication via split tensors
-* Python API
+* Seamless integration with the NumPy/SciPy ecosystem
+* Python array API (work in progress)
 
-# Support Channels
 
-We use [GitHub Discussions](https://github.com/helmholtz-analytics/heat/discussions) as a forum for questions about Heat.
-If you found a bug or miss a feature, then please file a new [issue](https://github.com/helmholtz-analytics/heat/issues/new/choose).
+# Getting Started
 
-# Requirements
+Go to [Quick Start](quick_start.md) for a quick overview. For more details, see [Installation](#installation).
 
-Heat requires Python 3.7 or newer.
-Heat is based on [PyTorch](https://pytorch.org/). Specifically, we are exploiting
-PyTorch's support for GPUs *and* MPI parallelism. For MPI support we utilize
-[mpi4py](https://mpi4py.readthedocs.io). Both packages can be installed via pip
-or automatically using the setup.py.
+**You can test your setup** by running the [`heat_test.py`](https://github.com/helmholtz-analytics/heat/blob/main/scripts/heat_test.py) script:
 
-# Installation
+```shell
+mpirun -n 2 python heat_test.py
+```
 
-Tagged releases are made available on the
-[Python Package Index (PyPI)](https://pypi.org/project/heat/). You can typically
-install the latest version with
+It should print something like this:
 
-```
-$ pip install heat[hdf5,netcdf]
+```shell
+x is distributed:  True
+Global DNDarray x:  DNDarray([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=ht.int32, device=cpu:0, split=0)
+Global DNDarray x:
+Local torch tensor on rank  0 :  tensor([0, 1, 2, 3, 4], dtype=torch.int32)
+Local torch tensor on rank  1 :  tensor([5, 6, 7, 8, 9], dtype=torch.int32)
 ```
 
-where the part in brackets is a list of optional dependencies. You can omit
-it, if you do not need HDF5 or NetCDF support.
+Our Jupyter Notebook [**Tutorial**](https://github.com/helmholtz-analytics/heat/blob/main/scripts/) illustrates Heat's basics. More tutorials [here](https://heat.readthedocs.io/en/latest/tutorials.html).
 
-**It is recommended to use the most recent supported version of PyTorch!**
+The complete documentation of the latest version is always deployed on
+[Read the Docs](https://heat.readthedocs.io/).
 
-**It is also very important to ensure that the PyTorch version is compatible with the local CUDA installation.**
-More information can be found [here](https://pytorch.org/get-started/locally/).
 
-# Hacking
+<!-- # Goals
 
-If you want to work with the development version, you can check out the sources using
+Heat is a flexible and seamless open-source software for high performance data
+analytics and machine learning. It provides highly optimized algorithms and data structures for tensor computations using CPUs, GPUs, and distributed cluster systems on top of MPI. The goal of Heat is to fill the gap between single-node data analytics and machine learning libraries, and  high-performance computing (HPC). Heat's interface integrates seamlessly with the existing data science ecosystem and makes  writing scalable
+scientific and data science applications as effortless as using NumPy.
 
-```
-$ git clone <https://github.com/helmholtz-analytics/heat.git>
-```
+Heat allows you to tackle your actual Big Data challenges that go beyond the
+computational and memory needs of your laptop and desktop.
+ -->
+# Installation
 
-The installation can then be done from the checked-out sources with
+## Requirements
 
-```
-$ pip install heat[hdf5,netcdf,dev]
-```
+### Basics
+- python >= 3.8
+- MPI (OpenMPI, MPICH, Intel MPI, etc.)
+- mpi4py >= 3.0.0
+- pytorch >= 1.8.0
 
-# Getting Started
+### Parallel I/O
+- h5py
+- netCDF4
 
-TL;DR: [Quick Start](quick_start.md) (Read this to get a quick overview of Heat).
+### GPU support
+In order to do computations on your GPU(s):
+- your CUDA or ROCm installation must match your hardware and its drivers;
+- your [PyTorch installation](https://pytorch.org/get-started/locally/) must be compiled with CUDA/ROCm support.
 
-Check out our Jupyter Notebook [**Tutorial**](https://github.com/helmholtz-analytics/heat/blob/main/scripts/)
-right here on GitHub or in the /scripts directory, to learn and understand about the basics and working of Heat.
+### HPC systems
+On most HPC-systems you will not be able to install/compile MPI or CUDA/ROCm yourself. Instead, you will most likely need to load a pre-installed MPI and/or CUDA/ROCm module from the module system. Maybe, you will even find PyTorch, h5py, or mpi4py as (part of) such a module. Note that for optimal performance on GPU, you need to usa an MPI library that has been compiled with CUDA/ROCm support (e.g., so-called "CUDA-aware MPI").
 
-The complete documentation of the latest version is always deployed on
-[Read the Docs](https://heat.readthedocs.io/).
 
-***Try your first Heat program***
+## pip
+Install the latest version with
 
-```shell
-$ python
+```bash
+pip install heat[hdf5,netcdf]
 ```
+where the part in brackets is a list of optional dependencies. You can omit
+it, if you do not need HDF5 or NetCDF support.
 
-```python
->>> import heat as ht
->>> x = ht.arange(10,split=0)
->>> print(x)
-DNDarray([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=ht.int32, device=cpu:0, split=0)
->>> y = ht.ones(10,split=0)
->>> print(y)
-DNDarray([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=ht.float32, device=cpu:0, split=0)
->>> print(x + y)
-DNDarray([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.], dtype=ht.float32, device=cpu:0, split=0)
-```
+## **conda**
 
-### Also, you can test your setup by running the [`heat_test.py`](https://github.com/helmholtz-analytics/heat/blob/main/scripts/heat_test.py) script:
+The conda build includes all dependencies **including OpenMPI**.
+```bash
+ conda install -c conda-forge heat
+ ```
 
-```shell
-mpirun -n 2 python heat_test.py
-```
+# Support Channels
 
-### It should print something like this:
+Go ahead and ask questions on [GitHub Discussions](https://github.com/helmholtz-analytics/heat/discussions). If you found a bug or are missing a feature, then please file a new [issue](https://github.com/helmholtz-analytics/heat/issues/new/choose). You can also get in touch with us on [Mattermost](https://mattermost.hzdr.de/signup_user_complete/?id=3sixwk9okpbzpjyfrhen5jpqfo) (sign up with your GitHub credentials). Once you log in, you can introduce yourself on the `Town Square` channel.
 
-```shell
-x is distributed:  True
-Global DNDarray x:  DNDarray([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=ht.int32, device=cpu:0, split=0)
-Global DNDarray x:
-Local torch tensor on rank  0 :  tensor([0, 1, 2, 3, 4], dtype=torch.int32)
-Local torch tensor on rank  1 :  tensor([5, 6, 7, 8, 9], dtype=torch.int32)
-```
 
-## Resources:
+# Contribution guidelines
+
+**We welcome contributions from the community, if you want to contribute to Heat, be sure to review the [Contribution Guidelines](contributing.md) and [Resources](#resources)  before getting started!**
+
+We use [GitHub issues](https://github.com/helmholtz-analytics/heat/issues) for tracking requests and bugs, please see [Discussions](https://github.com/helmholtz-analytics/heat/discussions) for general questions and discussion. You can also get in touch with us on [Mattermost](https://mattermost.hzdr.de/signup_user_complete/?id=3sixwk9okpbzpjyfrhen5jpqfo) (sign up with your GitHub credentials). Once you log in, you can introduce yourself on the `Town Square` channel.
+
+If you’re unsure where to start or how your skills fit in, reach out! You can ask us here on GitHub, by leaving a comment on a relevant issue that is already open.
+
+**If you are new to contributing to open source, [this guide](https://opensource.guide/how-to-contribute/) helps explain why, what, and how to get involved.**
+
+
+## Resources
 
 * [Heat Tutorials](https://heat.readthedocs.io/en/latest/tutorials.html)
 * [Heat API Reference](https://heat.readthedocs.io/en/latest/autoapi/index.html)
 
 ### Parallel Computing and MPI:
 
-* @davidhenty's [course](https://www.archer2.ac.uk/training/courses/200514-mpi/)
+* David Henty's [course](https://www.archer2.ac.uk/training/courses/200514-mpi/)
 * Wes Kendall's [Tutorials](https://mpitutorial.com/tutorials/)
+* Rolf Rabenseifner's [MPI course material](https://www.hlrs.de/training/self-study-materials/mpi-course-material) (including C, Fortran **and** Python via `mpi4py`)
 
 ### mpi4py
 
 * [mpi4py docs](https://mpi4py.readthedocs.io/en/stable/tutorial.html)
 * [Tutorial](https://www.kth.se/blogs/pdc/2019/08/parallel-programming-in-python-mpi4py-part-1/)
-
-# Contribution guidelines
-
-**We welcome contributions from the community, if you want to contribute to Heat, be sure to review the [Contribution Guidelines](contributing.md) before getting started!**
-
-We use [GitHub issues](https://github.com/helmholtz-analytics/heat/issues) for tracking requests and bugs, please see [Discussions](https://github.com/helmholtz-analytics/heat/discussions) for general questions and discussion, and You can also get in touch with us on [Mattermost](https://mattermost.hzdr.de/signup_user_complete/?id=3sixwk9okpbzpjyfrhen5jpqfo). You can sign up with your GitHub credentials. Once you log in, you can introduce yourself on the `Town Square` channel.
-
-Small improvements or fixes are always appreciated; issues labeled as **"good first issue"** may be a good starting point.
-
-If you’re unsure where to start or how your skills fit in, reach out! You can ask us here on GitHub, by leaving a comment on a relevant issue that is already open.
-
-**If you are new to contributing to open source, [this guide](https://opensource.guide/how-to-contribute/) helps explain why, what, and how to get involved.**
-
 # License
 
 Heat is distributed under the MIT license, see our
@@ -166,7 +180,9 @@ Heat is distributed under the MIT license, see our
 
 # Citing Heat
 
-If you find Heat helpful for your research, please mention it in your publications. You can cite:
+<!-- If you find Heat helpful for your research, please mention it in your publications. You can cite: -->
+
+Please do mention Heat in your publications if it helped your research. You can cite:
 
 * Götz, M., Debus, C., Coquelin, D., Krajsek, K., Comito, C., Knechtges, P., Hagemeier, B., Tarnawa, M., Hanselmann, S., Siggel, S., Basermann, A. & Streit, A. (2020). HeAT - a Distributed and GPU-accelerated Tensor Framework for Data Analytics. In 2020 IEEE International Conference on Big Data (Big Data) (pp. 276-287). IEEE, DOI: 10.1109/BigData50022.2020.9378050.
 
@@ -195,6 +211,13 @@ If you find Heat helpful for your research, please mention it in your publicatio
     doi={10.1109/BigData50022.2020.9378050}
 }
 ```
+# FAQ
+Work in progress...
+
+  <!-- - Users
+  - Developers
+  - Students
+  - system administrators -->
 
 ## Acknowledgements
 
@@ -202,8 +225,11 @@ If you find Heat helpful for your research, please mention it in your publicatio
 Networking Fund](https://www.helmholtz.de/en/about_us/the_association/initiating_and_networking/)
 under project number ZT-I-0003 and the Helmholtz AI platform grant.*
 
+*This project has received funding from Google Summer of Code (GSoC) in 2022.*
+
+
 ---
 
 <div align="center">
-  <a href="https://www.dlr.de/EN/Home/home_node.html"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/master/doc/images/dlr_logo.svg" height="50px" hspace="3%" vspace="20px"></a><a href="https://www.fz-juelich.de/portal/EN/Home/home_node.html"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/master/doc/images/fzj_logo.svg" height="50px" hspace="3%" vspace="20px"></a><a href="http://www.kit.edu/english/index.php"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/master/doc/images/kit_logo.svg" height="50px" hspace="3%" vspace="20px"></a><a href="https://www.helmholtz.de/en/"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/master/doc/images/helmholtz_logo.svg" height="50px" hspace="3%" vspace="20px"></a>
+  <a href="https://www.dlr.de/EN/Home/home_node.html"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/dlr_logo.svg" height="50px" hspace="3%" vspace="20px"></a><a href="https://www.fz-juelich.de/portal/EN/Home/home_node.html"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/fzj_logo.svg" height="50px" hspace="3%" vspace="20px"></a><a href="http://www.kit.edu/english/index.php"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/kit_logo.svg" height="50px" hspace="3%" vspace="20px"></a><a href="https://www.helmholtz.de/en/"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/helmholtz_logo.svg" height="50px" hspace="3%" vspace="20px"></a>
 </div>
diff --git a/benchmarks/2020/distance_matrix/dask-chunks-cpu.py b/benchmarks/2020/distance_matrix/dask-chunks-cpu.py
index 38485e6fb2..d0c94369ed 100644
--- a/benchmarks/2020/distance_matrix/dask-chunks-cpu.py
+++ b/benchmarks/2020/distance_matrix/dask-chunks-cpu.py
@@ -31,4 +31,4 @@
         start = time.perf_counter()
         dist = dmm.euclidean_distances(data, data).compute()
         end = time.perf_counter()
-        print("\t{}s".format(end - start))
+        print(f"\t{end - start}s")
diff --git a/benchmarks/2020/generate_jobscripts.py b/benchmarks/2020/generate_jobscripts.py
index 50051bc492..59d419cacf 100755
--- a/benchmarks/2020/generate_jobscripts.py
+++ b/benchmarks/2020/generate_jobscripts.py
@@ -76,7 +76,7 @@ def jobscripts_from(
         if key in SKIP:
             continue
 
-        parameters.append("--{}".format(key))
+        parameters.append(f"--{key}")
         parameters.append(str(value))
 
     for script, benchmark in configuration["benchmarks"].items():
diff --git a/benchmarks/cb/main.py b/benchmarks/cb/main.py
index 1e69683f07..52cd18d76f 100644
--- a/benchmarks/cb/main.py
+++ b/benchmarks/cb/main.py
@@ -9,7 +9,9 @@
 from linalg import run_linalg_benchmarks
 from cluster import run_cluster_benchmarks
 from manipulations import run_manipulation_benchmarks
+from preprocessing import run_preprocessing_benchmarks
 
 run_linalg_benchmarks()
 run_cluster_benchmarks()
 run_manipulation_benchmarks()
+run_preprocessing_benchmarks()
diff --git a/benchmarks/cb/preprocessing.py b/benchmarks/cb/preprocessing.py
new file mode 100644
index 0000000000..99b7737ed8
--- /dev/null
+++ b/benchmarks/cb/preprocessing.py
@@ -0,0 +1,55 @@
+# flake8: noqa
+import heat as ht
+from mpi4py import MPI
+from perun import monitor
+
+# we benchmark the in-place versions (`copy=False`) of the preprocessing functions
+# for each function, both the forward and the inverse transformation are applied
+
+
+@monitor()
+def apply_inplace_standard_scaler_and_inverse(X):
+    scaler = ht.preprocessing.StandardScaler(copy=False)
+    Y = scaler.fit_transform(X)
+    X = scaler.inverse_transform(Y)
+
+
+@monitor()
+def apply_inplace_min_max_scaler_and_inverse(X):
+    scaler = ht.preprocessing.MinMaxScaler(copy=False)
+    Y = scaler.fit_transform(X)
+    X = scaler.inverse_transform(Y)
+
+
+@monitor()
+def apply_inplace_max_abs_scaler_and_inverse(X):
+    scaler = ht.preprocessing.MaxAbsScaler(copy=False)
+    Y = scaler.fit_transform(X)
+    X = scaler.inverse_transform(Y)
+
+
+@monitor()
+def apply_inplace_robust_scaler_and_inverse(X):
+    scaler = ht.preprocessing.RobustScaler(copy=False)
+    Y = scaler.fit_transform(X)
+    X = scaler.inverse_transform(Y)
+
+
+@monitor()
+def apply_inplace_normalizer(X):
+    scaler = ht.preprocessing.Normalizer(copy=False)
+    scaler.fit_transform(X)
+
+
+def run_preprocessing_benchmarks():
+    n_data_points = 5000
+    n_features = 50
+    X = ht.random.randn(n_data_points, n_features, split=0)
+
+    apply_inplace_standard_scaler_and_inverse(X)
+    apply_inplace_min_max_scaler_and_inverse(X)
+    apply_inplace_max_abs_scaler_and_inverse(X)
+    apply_inplace_robust_scaler_and_inverse(X)
+    apply_inplace_normalizer(X)
+
+    del X
diff --git a/coverage_tables.md b/coverage_tables.md
new file mode 100644
index 0000000000..f90dadfba4
--- /dev/null
+++ b/coverage_tables.md
@@ -0,0 +1,407 @@
+# NumPy Coverage Tables
+This file is automatically generated by `./scripts/numpy_coverage_tables.py`.
+Please do not edit this file directly, but instead edit `./scripts/numpy_coverage_tables.py` and run it to generate this file.
+The following tables show the NumPy functions supported by Heat.
+## Table of Contents
+1. [NumPy  Mathematical Functions](#numpy--mathematical-functions)
+2. [NumPy Array Creation](#numpy-array-creation)
+3. [NumPy Array Manipulation](#numpy-array-manipulation)
+4. [NumPy Binary Operations](#numpy-binary-operations)
+5. [NumPy IO Operations](#numpy-io-operations)
+6. [NumPy LinAlg Operations](#numpy-linalg-operations)
+7. [NumPy Logic Functions](#numpy-logic-functions)
+8. [NumPy Sorting Operations](#numpy-sorting-operations)
+9. [NumPy Statistical Operations](#numpy-statistical-operations)
+10. [NumPy Random Operations](#numpy-random-operations)
+
+## NumPy  Mathematical Functions
+[Back to Table of Contents](#table-of-contents)
+
+| NumPy  Mathematical Functions | Heat |
+|---|---|
+| sin | ✅ |
+| cos | ✅ |
+| tan | ✅ |
+| arcsin | ✅ |
+| arccos | ✅ |
+| arctan | ✅ |
+| hypot | ✅ |
+| arctan2 | ✅ |
+| degrees | ✅ |
+| radians | ✅ |
+| unwrap | ❌ |
+| deg2rad | ✅ |
+| rad2deg | ✅ |
+| sinh | ✅ |
+| cosh | ✅ |
+| tanh | ✅ |
+| arcsinh | ✅ |
+| arccosh | ✅ |
+| arctanh | ✅ |
+| round | ✅ |
+| around | ❌ |
+| rint | ❌ |
+| fix | ❌ |
+| floor | ✅ |
+| ceil | ✅ |
+| trunc | ✅ |
+| prod | ✅ |
+| sum | ✅ |
+| nanprod | ✅ |
+| nansum | ✅ |
+| cumprod | ✅ |
+| cumsum | ✅ |
+| nancumprod | ❌ |
+| nancumsum | ❌ |
+| diff | ✅ |
+| ediff1d | ❌ |
+| gradient | ❌ |
+| cross | ✅ |
+| trapz | ❌ |
+| exp | ✅ |
+| expm1 | ✅ |
+| exp2 | ✅ |
+| log | ✅ |
+| log10 | ✅ |
+| log2 | ✅ |
+| log1p | ✅ |
+| logaddexp | ✅ |
+| logaddexp2 | ✅ |
+| i0 | ❌ |
+| sinc | ❌ |
+| signbit | ✅ |
+| copysign | ✅ |
+| frexp | ❌ |
+| ldexp | ❌ |
+| nextafter | ❌ |
+| spacing | ❌ |
+| lcm | ✅ |
+| gcd | ✅ |
+| add | ✅ |
+| reciprocal | ❌ |
+| positive | ✅ |
+| negative | ✅ |
+| multiply | ✅ |
+| divide | ✅ |
+| power | ✅ |
+| subtract | ✅ |
+| true_divide | ❌ |
+| floor_divide | ✅ |
+| float_power | ❌ |
+| fmod | ✅ |
+| mod | ✅ |
+| modf | ✅ |
+| remainder | ✅ |
+| divmod | ❌ |
+| angle | ✅ |
+| real | ✅ |
+| imag | ✅ |
+| conj | ✅ |
+| conjugate | ✅ |
+| maximum | ✅ |
+| max | ✅ |
+| amax | ❌ |
+| fmax | ❌ |
+| nanmax | ❌ |
+| minimum | ✅ |
+| min | ✅ |
+| amin | ❌ |
+| fmin | ❌ |
+| nanmin | ❌ |
+| convolve | ✅ |
+| clip | ✅ |
+| sqrt | ✅ |
+| cbrt | ❌ |
+| square | ✅ |
+| absolute | ✅ |
+| fabs | ✅ |
+| sign | ✅ |
+| heaviside | ❌ |
+| nan_to_num | ✅ |
+| real_if_close | ❌ |
+| interp | ❌ |
+## NumPy Array Creation
+[Back to Table of Contents](#table-of-contents)
+
+| NumPy Array Creation | Heat |
+|---|---|
+| empty | ✅ |
+| empty_like | ✅ |
+| eye | ✅ |
+| identity | ❌ |
+| ones | ✅ |
+| ones_like | ✅ |
+| zeros | ✅ |
+| zeros_like | ✅ |
+| full | ✅ |
+| full_like | ✅ |
+| array | ✅ |
+| asarray | ✅ |
+| asanyarray | ❌ |
+| ascontiguousarray | ❌ |
+| asmatrix | ❌ |
+| copy | ✅ |
+| frombuffer | ❌ |
+| from_dlpack | ❌ |
+| fromfile | ❌ |
+| fromfunction | ❌ |
+| fromiter | ❌ |
+| fromstring | ❌ |
+| loadtxt | ❌ |
+| arange | ✅ |
+| linspace | ✅ |
+| logspace | ✅ |
+| geomspace | ❌ |
+| meshgrid | ✅ |
+| mgrid | ❌ |
+| ogrid | ❌ |
+| diag | ✅ |
+| diagflat | ❌ |
+| tri | ❌ |
+| tril | ✅ |
+| triu | ✅ |
+| vander | ❌ |
+| mat | ❌ |
+| bmat | ❌ |
+## NumPy Array Manipulation
+[Back to Table of Contents](#table-of-contents)
+
+| NumPy Array Manipulation | Heat |
+|---|---|
+| copyto | ❌ |
+| shape | ✅ |
+| reshape | ✅ |
+| ravel | ✅ |
+| flat | ❌ |
+| flatten | ✅ |
+| moveaxis | ✅ |
+| rollaxis | ❌ |
+| swapaxes | ✅ |
+| T | ❌ |
+| transpose | ✅ |
+| atleast_1d | ❌ |
+| atleast_2d | ❌ |
+| atleast_3d | ❌ |
+| broadcast | ❌ |
+| broadcast_to | ✅ |
+| broadcast_arrays | ✅ |
+| expand_dims | ✅ |
+| squeeze | ✅ |
+| asarray | ✅ |
+| asanyarray | ❌ |
+| asmatrix | ❌ |
+| asfarray | ❌ |
+| asfortranarray | ❌ |
+| ascontiguousarray | ❌ |
+| asarray_chkfinite | ❌ |
+| require | ❌ |
+| concatenate | ✅ |
+| stack | ✅ |
+| block | ❌ |
+| vstack | ✅ |
+| hstack | ✅ |
+| dstack | ❌ |
+| column_stack | ✅ |
+| row_stack | ✅ |
+| split | ✅ |
+| array_split | ❌ |
+| dsplit | ✅ |
+| hsplit | ✅ |
+| vsplit | ✅ |
+| tile | ✅ |
+| repeat | ✅ |
+| delete | ❌ |
+| insert | ❌ |
+| append | ❌ |
+| resize | ❌ |
+| trim_zeros | ❌ |
+| unique | ✅ |
+| flip | ✅ |
+| fliplr | ✅ |
+| flipud | ✅ |
+| reshape | ✅ |
+| roll | ✅ |
+| rot90 | ✅ |
+## NumPy Binary Operations
+[Back to Table of Contents](#table-of-contents)
+
+| NumPy Binary Operations | Heat |
+|---|---|
+| bitwise_and | ✅ |
+| bitwise_or | ✅ |
+| bitwise_xor | ✅ |
+| invert | ✅ |
+| left_shift | ✅ |
+| right_shift | ✅ |
+| packbits | ❌ |
+| unpackbits | ❌ |
+| binary_repr | ❌ |
+## NumPy IO Operations
+[Back to Table of Contents](#table-of-contents)
+
+| NumPy IO Operations | Heat |
+|---|---|
+| load | ✅ |
+| save | ✅ |
+| savez | ❌ |
+| savez_compressed | ❌ |
+| loadtxt | ❌ |
+| savetxt | ❌ |
+| genfromtxt | ❌ |
+| fromregex | ❌ |
+| fromstring | ❌ |
+| tofile | ❌ |
+| tolist | ❌ |
+| array2string | ❌ |
+| array_repr | ❌ |
+| array_str | ❌ |
+| format_float_positional | ❌ |
+| format_float_scientific | ❌ |
+| memmap | ❌ |
+| open_memmap | ❌ |
+| set_printoptions | ✅ |
+| get_printoptions | ✅ |
+| set_string_function | ❌ |
+| printoptions | ❌ |
+| binary_repr | ❌ |
+| base_repr | ❌ |
+| DataSource | ❌ |
+| format | ❌ |
+## NumPy LinAlg Operations
+[Back to Table of Contents](#table-of-contents)
+
+| NumPy LinAlg Operations | Heat |
+|---|---|
+| dot | ✅ |
+| linalg.multi_dot | ❌ |
+| vdot | ✅ |
+| inner | ❌ |
+| outer | ✅ |
+| matmul | ✅ |
+| tensordot | ❌ |
+| einsum | ❌ |
+| einsum_path | ❌ |
+| linalg.matrix_power | ❌ |
+| kron | ❌ |
+| linalg.cholesky | ❌ |
+| linalg.qr | ✅ |
+| linalg.svd | ❌ |
+| linalg.eig | ❌ |
+| linalg.eigh | ❌ |
+| linalg.eigvals | ❌ |
+| linalg.eigvalsh | ❌ |
+| linalg.norm | ✅ |
+| linalg.cond | ❌ |
+| linalg.det | ✅ |
+| linalg.matrix_rank | ❌ |
+| linalg.slogdet | ❌ |
+| trace | ✅ |
+| linalg.solve | ❌ |
+| linalg.tensorsolve | ❌ |
+| linalg.lstsq | ❌ |
+| linalg.inv | ✅ |
+| linalg.pinv | ❌ |
+| linalg.tensorinv | ❌ |
+## NumPy Logic Functions
+[Back to Table of Contents](#table-of-contents)
+
+| NumPy Logic Functions | Heat |
+|---|---|
+| all | ✅ |
+| any | ✅ |
+| isfinite | ✅ |
+| isinf | ✅ |
+| isnan | ✅ |
+| isnat | ❌ |
+| isneginf | ✅ |
+| isposinf | ✅ |
+| iscomplex | ✅ |
+| iscomplexobj | ❌ |
+| isfortran | ❌ |
+| isreal | ✅ |
+| isrealobj | ❌ |
+| isscalar | ❌ |
+| logical_and | ✅ |
+| logical_or | ✅ |
+| logical_not | ✅ |
+| logical_xor | ✅ |
+| allclose | ✅ |
+| isclose | ✅ |
+| array_equal | ❌ |
+| array_equiv | ❌ |
+| greater | ✅ |
+| greater_equal | ✅ |
+| less | ✅ |
+| less_equal | ✅ |
+| equal | ✅ |
+| not_equal | ✅ |
+## NumPy Sorting Operations
+[Back to Table of Contents](#table-of-contents)
+
+| NumPy Sorting Operations | Heat |
+|---|---|
+| sort | ✅ |
+| lexsort | ❌ |
+| argsort | ❌ |
+| sort | ✅ |
+| sort_complex | ❌ |
+| partition | ❌ |
+| argpartition | ❌ |
+| argmax | ✅ |
+| nanargmax | ❌ |
+| argmin | ✅ |
+| nanargmin | ❌ |
+| argwhere | ❌ |
+| nonzero | ✅ |
+| flatnonzero | ❌ |
+| where | ✅ |
+| searchsorted | ❌ |
+| extract | ❌ |
+| count_nonzero | ❌ |
+## NumPy Statistical Operations
+[Back to Table of Contents](#table-of-contents)
+
+| NumPy Statistical Operations | Heat |
+|---|---|
+| ptp | ❌ |
+| percentile | ✅ |
+| nanpercentile | ❌ |
+| quantile | ❌ |
+| nanquantile | ❌ |
+| median | ✅ |
+| average | ✅ |
+| mean | ✅ |
+| std | ✅ |
+| var | ✅ |
+| nanmedian | ❌ |
+| nanmean | ❌ |
+| nanstd | ❌ |
+| nanvar | ❌ |
+| corrcoef | ❌ |
+| correlate | ❌ |
+| cov | ✅ |
+| histogram | ✅ |
+| histogram2d | ❌ |
+| histogramdd | ❌ |
+| bincount | ✅ |
+| histogram_bin_edges | ❌ |
+| digitize | ✅ |
+## NumPy Random Operations
+[Back to Table of Contents](#table-of-contents)
+
+| NumPy Random Operations | Heat |
+|---|---|
+| random.rand | ✅ |
+| random.randn | ✅ |
+| random.randint | ✅ |
+| random.random_integers | ❌ |
+| random.random_sample | ✅ |
+| random.ranf | ✅ |
+| random.sample | ✅ |
+| random.choice | ❌ |
+| random.bytes | ❌ |
+| random.shuffle | ❌ |
+| random.permutation | ✅ |
+| random.seed | ✅ |
+| random.get_state | ✅ |
+| random.set_state | ✅ |
diff --git a/doc/images/fzj_logo.svg b/doc/images/fzj_logo.svg
index 53868ecb83..3b765373b7 100644
--- a/doc/images/fzj_logo.svg
+++ b/doc/images/fzj_logo.svg
@@ -1,86 +1,14 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
-<!-- Created with Inkscape (http://www.inkscape.org/) by Steadfast-->
-<svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" version="1.0" width="900" height="300" id="svg2616">
-  <defs id="defs2618">
-    <clipPath id="clipPath125">
-      <path d="M 455.872,24.874 L 567.455,24.874 L 567.455,64.761 L 455.872,64.761 L 455.872,24.874 z" id="path127"/>
-    </clipPath>
-  </defs>
-  <g id="layer1">
-    <g transform="matrix(7.9166022,0,0,-7.9166022,-3598.9573,514.40714)" id="g121">
-      <g clip-path="url(#clipPath125)" id="g123">
-        <g transform="translate(469.4743,47.3088)" id="g129">
-          <path d="M 0,0 C 0.004,-0.003 0.119,0.218 0.213,0.376 C 0.97,1.656 5.567,13.048 6.693,15.731 C 5.17,16.233 3.544,16.508 1.853,16.508 C -6.683,16.508 -13.603,9.592 -13.603,1.061 C -13.603,-0.346 -13.411,-1.707 -13.058,-3.002 C -12.088,-2.371 -6.173,1.72 -6.165,1.691 C -5.217,-1.795 -2.027,-3.593 0,0 M 13.664,11.019 C 12.503,8.273 8.595,-0.937 7.547,-3.01 C 5.483,-6.762 2.185,-10.263 -2.283,-10.464 C -7.527,-10.7 -10.304,-8.699 -11.799,-6.179 C -9.203,-11.061 -4.064,-14.385 1.853,-14.385 C 10.389,-14.385 17.308,-7.469 17.308,1.061 C 17.308,4.857 15.935,8.33 13.664,11.019" id="path131" style="fill:#00567e;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(499.6361,55.7243)" id="g133">
-          <path d="M 0,0 L 0,-12.409 C 0,-15.283 -0.093,-15.984 -0.561,-16.848 C -1.285,-18.134 -2.734,-18.718 -5.234,-18.718 C -5.421,-18.718 -5.725,-18.718 -6.122,-18.695 L -6.122,-16.194 L -5.772,-16.194 L -5.608,-16.194 C -4.253,-16.171 -3.528,-15.96 -3.248,-15.47 C -3.014,-15.119 -2.991,-14.769 -2.991,-12.479 L -2.991,0 L 0,0" id="path135" style="fill:#00567e;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(513.7736,58.7857)" id="g137">
-          <path d="M 0,0 C 0,-0.958 -0.771,-1.73 -1.706,-1.73 C -2.664,-1.73 -3.435,-0.958 -3.435,-0.024 C -3.435,0.957 -2.664,1.706 -1.706,1.706 C -0.771,1.706 0,0.935 0,0 M -4.346,0 C -4.346,-0.958 -5.118,-1.73 -6.052,-1.73 C -7.011,-1.73 -7.781,-0.958 -7.781,-0.024 C -7.781,0.957 -7.011,1.706 -6.052,1.706 C -5.118,1.706 -4.346,0.935 -4.346,0 M -7.057,-3.061 L -7.057,-12.011 C -7.057,-13.577 -7.011,-14.092 -6.777,-14.699 C -6.356,-15.75 -5.305,-16.358 -3.879,-16.358 C -2.22,-16.358 -1.168,-15.611 -0.841,-14.208 C -0.724,-13.717 -0.701,-13.25 -0.701,-12.011 L -0.701,-3.061 L 2.29,-3.061 L 2.29,-12.058 C 2.29,-14.302 2.173,-15.073 1.683,-16.054 C 0.794,-17.9 -1.262,-18.952 -3.879,-18.952 C -6.496,-18.952 -8.553,-17.9 -9.441,-16.054 C -9.932,-15.073 -10.048,-14.302 -10.048,-12.058 L -10.048,-3.061 L -7.057,-3.061" id="path139" style="fill:#00567e;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(523.3077,55.7243)" id="g141">
-          <path d="M 0,0 L 0,-13.04 L 6.753,-13.04 L 6.753,-15.587 L -3.061,-15.587 L -3.061,0 L 0,0 z" id="path143" style="fill:#00567e;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <path d="M 532.865,55.724 L 535.856,55.724 L 535.856,40.137 L 532.865,40.137 L 532.865,55.724 z" id="path145" style="fill:#00567e;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        <g transform="translate(548.9192,50.6531)" id="g147">
-          <path d="M 0,0 C -0.281,1.8 -1.309,2.781 -2.898,2.781 C -5.211,2.781 -6.66,0.654 -6.66,-2.757 C -6.66,-6.169 -5.211,-8.249 -2.875,-8.249 C -1.052,-8.249 -0.07,-7.104 0.117,-4.744 L 2.781,-4.744 C 2.594,-8.763 0.724,-10.819 -2.758,-10.819 C -7.034,-10.819 -9.768,-7.665 -9.768,-2.781 C -9.768,0.07 -8.927,2.291 -7.291,3.763 C -6.099,4.814 -4.581,5.375 -2.898,5.375 C 0.42,5.375 2.384,3.459 2.594,0 L 0,0" id="path149" style="fill:#00567e;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(563.9443,49.5083)" id="g151">
-          <path d="M 0,0 L 0,6.216 L 2.991,6.216 L 2.991,-9.371 L 0,-9.371 L 0,-2.477 L -6.192,-2.477 L -6.192,-9.371 L -9.183,-9.371 L -9.183,6.216 L -6.192,6.216 L -6.192,0 L 0,0 z" id="path153" style="fill:#00567e;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(496.0333,31.0348)" id="g155">
-          <path d="M 0,0 L 0,-0.465 L -1.931,-0.465 L -1.931,-2.704 L -2.467,-2.704 L -2.467,2.067 L 0.15,2.067 L 0.15,1.602 L -1.931,1.602 L -1.931,0 L 0,0 z" id="path157" style="fill:#58585a;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(497.5717,30.7487)" id="g159">
-          <path d="M 0,0 C 0,-1.238 0.537,-2.024 1.373,-2.024 C 2.196,-2.024 2.739,-1.23 2.739,-0.029 C 2.739,1.18 2.196,1.967 1.373,1.967 C 0.551,1.967 0,1.18 0,0 M 3.312,-0.029 C 3.312,-1.495 2.525,-2.496 1.373,-2.496 C 0.201,-2.496 -0.572,-1.502 -0.572,0.007 C -0.572,1.43 0.229,2.439 1.373,2.439 C 2.525,2.439 3.312,1.438 3.312,-0.029" id="path161" style="fill:#58585a;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(502.9572,30.8343)" id="g163">
-          <path d="M 0,0 C 0.58,0 0.744,0.014 0.966,0.093 C 1.309,0.215 1.502,0.515 1.502,0.923 C 1.502,1.288 1.338,1.566 1.052,1.688 C 0.83,1.789 0.666,1.803 -0.014,1.803 L -0.336,1.803 L -0.336,0 L 0,0 z M -0.028,2.268 C 0.708,2.268 0.98,2.239 1.295,2.117 C 1.774,1.931 2.067,1.481 2.067,0.916 C 2.067,0.286 1.739,-0.143 1.087,-0.379 L 2.339,-2.503 L 1.695,-2.503 L 0.494,-0.465 L -0.336,-0.465 L -0.336,-2.503 L -0.872,-2.503 L -0.872,2.268 L -0.028,2.268" id="path165" style="fill:#58585a;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(508.5579,31.8644)" id="g167">
-          <path d="M 0,0 C -0.05,0.544 -0.386,0.851 -0.93,0.851 C -1.431,0.851 -1.802,0.544 -1.802,0.122 C -1.802,-0.079 -1.702,-0.257 -1.516,-0.379 C -1.359,-0.486 -1.359,-0.486 -0.665,-0.801 C -0.036,-1.08 0.215,-1.237 0.393,-1.452 C 0.565,-1.666 0.658,-1.945 0.658,-2.253 C 0.658,-3.068 0.029,-3.612 -0.901,-3.612 C -1.48,-3.612 -1.939,-3.405 -2.21,-3.033 C -2.396,-2.775 -2.475,-2.518 -2.496,-2.081 L -1.974,-2.081 C -1.924,-2.768 -1.545,-3.14 -0.88,-3.14 C -0.3,-3.14 0.107,-2.797 0.107,-2.317 C 0.107,-2.081 0.014,-1.896 -0.179,-1.731 C -0.329,-1.602 -0.451,-1.538 -0.923,-1.33 C -1.438,-1.101 -1.752,-0.93 -1.917,-0.794 C -2.196,-0.565 -2.339,-0.272 -2.339,0.079 C -2.339,0.801 -1.738,1.323 -0.901,1.323 C -0.379,1.323 0.057,1.116 0.3,0.751 C 0.444,0.544 0.501,0.336 0.522,0 L 0,0" id="path169" style="fill:#58585a;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(513.2358,31.7787)" id="g171">
-          <path d="M 0,0 C -0.136,0.629 -0.501,0.937 -1.087,0.937 C -1.938,0.937 -2.475,0.165 -2.475,-1.066 C -2.475,-2.303 -1.946,-3.061 -1.08,-3.061 C -0.415,-3.061 -0.007,-2.632 0.071,-1.831 L 0.579,-1.831 C 0.494,-2.918 -0.1,-3.534 -1.058,-3.534 C -2.274,-3.534 -3.047,-2.582 -3.047,-1.08 C -3.047,0.429 -2.274,1.409 -1.08,1.409 C -0.193,1.409 0.379,0.894 0.501,0 L 0,0" id="path173" style="fill:#58585a;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(517.7208,31.0489)" id="g175">
-          <path d="M 0,0 L 0,2.053 L 0.536,2.053 L 0.536,-2.718 L 0,-2.718 L 0,-0.443 L -2.239,-0.443 L -2.239,-2.718 L -2.775,-2.718 L -2.775,2.053 L -2.239,2.053 L -2.239,0 L 0,0 z" id="path177" style="fill:#58585a;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(520.1667,33.1019)" id="g179">
-          <path d="M 0,0 L 0,-2.811 C 0,-3.541 0.029,-3.719 0.172,-3.941 C 0.351,-4.213 0.715,-4.377 1.152,-4.377 C 1.659,-4.377 2.06,-4.156 2.203,-3.791 C 2.282,-3.605 2.296,-3.419 2.296,-2.811 L 2.296,0 L 2.832,0 L 2.832,-2.818 C 2.832,-3.576 2.818,-3.705 2.718,-3.963 C 2.489,-4.52 1.91,-4.849 1.152,-4.849 C 0.386,-4.849 -0.193,-4.52 -0.422,-3.963 C -0.522,-3.705 -0.536,-3.576 -0.536,-2.818 L -0.536,0 L 0,0" id="path181" style="fill:#58585a;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(524.959,33.1019)" id="g183">
-          <path d="M 0,0 L 2.289,-3.97 L 2.289,0 L 2.79,0 L 2.79,-4.771 L 2.204,-4.771 L -0.071,-0.808 L -0.071,-4.771 L -0.579,-4.771 L -0.579,0 L 0,0 z" id="path185" style="fill:#58585a;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(532.5768,30.6987)" id="g187">
-          <path d="M 0,0 L 0,-1.96 C -0.45,-2.289 -0.987,-2.454 -1.573,-2.454 C -2.861,-2.454 -3.634,-1.523 -3.634,0.028 C -3.634,1.531 -2.861,2.489 -1.645,2.489 C -0.751,2.489 -0.172,1.981 -0.064,1.087 L -0.565,1.087 C -0.708,1.724 -1.051,2.017 -1.645,2.017 C -2.525,2.017 -3.061,1.266 -3.061,0.021 C -3.061,-1.252 -2.517,-1.981 -1.566,-1.981 C -1.202,-1.981 -0.837,-1.874 -0.486,-1.667 L -0.486,-0.465 L -1.452,-0.465 L -1.452,0 L 0,0" id="path189" style="fill:#58585a;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(536.1028,31.8644)" id="g191">
-          <path d="M 0,0 C -0.05,0.544 -0.386,0.851 -0.929,0.851 C -1.43,0.851 -1.802,0.544 -1.802,0.122 C -1.802,-0.079 -1.702,-0.257 -1.516,-0.379 C -1.359,-0.486 -1.359,-0.486 -0.665,-0.801 C -0.035,-1.08 0.215,-1.237 0.394,-1.452 C 0.565,-1.666 0.658,-1.945 0.658,-2.253 C 0.658,-3.068 0.029,-3.612 -0.901,-3.612 C -1.481,-3.612 -1.938,-3.405 -2.21,-3.033 C -2.396,-2.775 -2.475,-2.518 -2.496,-2.081 L -1.974,-2.081 C -1.924,-2.768 -1.545,-3.14 -0.88,-3.14 C -0.3,-3.14 0.108,-2.797 0.108,-2.317 C 0.108,-2.081 0.015,-1.896 -0.178,-1.731 C -0.329,-1.602 -0.45,-1.538 -0.923,-1.33 C -1.438,-1.101 -1.752,-0.93 -1.917,-0.794 C -2.195,-0.565 -2.338,-0.272 -2.338,0.079 C -2.338,0.801 -1.738,1.323 -0.901,1.323 C -0.379,1.323 0.058,1.116 0.301,0.751 C 0.444,0.544 0.501,0.336 0.523,0 L 0,0" id="path193" style="fill:#58585a;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(540.6736,33.1019)" id="g195">
-          <path d="M 0,0 L 0,-0.365 L -2.453,-4.306 L -0.043,-4.306 L -0.043,-4.771 L -3.083,-4.771 L -3.083,-4.356 L -0.651,-0.458 L -2.933,-0.458 L -2.933,0 L 0,0 z" id="path197" style="fill:#58585a;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(544.1926,31.0348)" id="g199">
-          <path d="M 0,0 L 0,-0.465 L -1.931,-0.465 L -1.931,-2.239 L 0.201,-2.239 L 0.201,-2.704 L -2.468,-2.704 L -2.468,2.067 L 0.151,2.067 L 0.151,1.602 L -1.931,1.602 L -1.931,0 L 0,0 z" id="path201" style="fill:#58585a;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(546.1095,33.1019)" id="g203">
-          <path d="M 0,0 L 2.289,-3.97 L 2.289,0 L 2.79,0 L 2.79,-4.771 L 2.203,-4.771 L -0.071,-0.808 L -0.071,-4.771 L -0.579,-4.771 L -0.579,0 L 0,0 z" id="path205" style="fill:#58585a;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(552.8975,33.1019)" id="g207">
-          <path d="M 0,0 L 0,-0.465 L -1.245,-0.465 L -1.245,-4.771 L -1.781,-4.771 L -1.781,-0.465 L -3.019,-0.465 L -3.019,0 L 0,0 z" id="path209" style="fill:#58585a;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(554.7425,30.8343)" id="g211">
-          <path d="M 0,0 C 0.58,0 0.744,0.014 0.966,0.093 C 1.309,0.215 1.502,0.515 1.502,0.923 C 1.502,1.288 1.338,1.566 1.052,1.688 C 0.83,1.789 0.666,1.803 -0.014,1.803 L -0.336,1.803 L -0.336,0 L 0,0 z M -0.028,2.268 C 0.708,2.268 0.98,2.239 1.295,2.117 C 1.774,1.931 2.067,1.481 2.067,0.916 C 2.067,0.286 1.739,-0.143 1.087,-0.379 L 2.339,-2.503 L 1.695,-2.503 L 0.494,-0.465 L -0.336,-0.465 L -0.336,-2.503 L -0.872,-2.503 L -0.872,2.268 L -0.028,2.268" id="path213" style="fill:#58585a;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(558.5187,33.1019)" id="g215">
-          <path d="M 0,0 L 0,-2.811 C 0,-3.541 0.028,-3.719 0.172,-3.941 C 0.35,-4.213 0.715,-4.377 1.152,-4.377 C 1.659,-4.377 2.06,-4.156 2.203,-3.791 C 2.281,-3.605 2.296,-3.419 2.296,-2.811 L 2.296,0 L 2.832,0 L 2.832,-2.818 C 2.832,-3.576 2.818,-3.705 2.718,-3.963 C 2.489,-4.52 1.909,-4.849 1.152,-4.849 C 0.386,-4.849 -0.193,-4.52 -0.422,-3.963 C -0.523,-3.705 -0.537,-3.576 -0.537,-2.818 L -0.537,0 L 0,0" id="path217" style="fill:#58585a;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-        <g transform="translate(563.5398,33.1019)" id="g219">
-          <path d="M 0,0 L 1.338,-4.184 L 2.683,0 L 3.491,0 L 3.491,-4.771 L 2.983,-4.771 L 2.983,-0.536 L 1.61,-4.771 L 1.073,-4.771 L -0.3,-0.536 L -0.3,-4.771 L -0.808,-4.771 L -0.808,0 L 0,0 z" id="path221" style="fill:#58585a;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
-        </g>
-      </g>
-    </g>
-  </g>
+<?xml version="1.0" encoding="UTF-8"?>
+<svg data-name="Ebene 1" version="1.1" viewBox="0 0 155.91 45.301" xmlns="http://www.w3.org/2000/svg" xmlns:cc="http://creativecommons.org/ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+<metadata>
+<rdf:RDF>
+<cc:Work rdf:about="">
+<dc:format>image/svg+xml</dc:format>
+<dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
+</cc:Work>
+</rdf:RDF>
+</metadata>
+<title>Logo_FZ_Juellich_RGB_schutzzone_weiss</title>
+<rect x="3.9401e-5" width="155.91" height="45.301" fill="#fff" stroke-width=".4588"/>
+<path d="m94.289 36.93h-0.64001a0.23 0.23 0 0 0-0.23 0.24v2.81c0 0.83001-0.42001 1.28-1.06 1.28-0.64001 0-1-0.45-1-1.28v-2.81a0.25 0.25 0 0 0-0.25001-0.24h-0.63a0.24 0.24 0 0 0-0.24 0.24v2.68c0 1.45 0.59 2.35 1.81 2.35a1.51 1.51 0 0 0 1.38-0.66001v0.32a0.23 0.23 0 0 0 0.23 0.24h0.64001a0.24 0.24 0 0 0 0.24-0.24v-4.69a0.24 0.24 0 0 0-0.25-0.24zm-8.0001-0.11a1.58 1.58 0 0 0-1.43 0.70001v-2.41a0.25 0.25 0 0 0-0.25-0.25h-0.63001a0.24 0.24 0 0 0-0.24 0.25v6.7501a0.23 0.23 0 0 0 0.24 0.24h0.67001a0.24 0.24 0 0 0 0.25-0.24v-2.8c0-0.83001 0.45001-1.29 1.1-1.29 0.65001 0 1.09 0.46 1.09 1.29v2.8a0.24 0.24 0 0 0 0.24001 0.24h0.63a0.24 0.24 0 0 0 0.25-0.24v-2.68c-0.01-1.41-0.62-2.36-1.89-2.36zm1.71-28.84h-2.49a0.93001 0.93001 0 0 0-0.93001 0.93001v11.88c0 3.09-1.5 4.41-3.63 4.41s-3.57-1.32-3.57-4.41v-11.88a0.93001 0.93001 0 0 0-0.93-0.93001h-2.49a0.93001 0.93001 0 0 0-0.93001 0.93001v11.88c0 5.2501 3.18 8.4901 7.9201 8.4901s8.0001-3.24 8.0001-8.4901v-11.88a0.93001 0.93001 0 0 0-0.92001-0.93001zm-17.46 29h-0.13001a1.57 1.57 0 0 0-1.38 0.77001v-0.53a0.23 0.23 0 0 0-0.23-0.24h-0.58001a0.24 0.24 0 0 0-0.24 0.24v4.69a0.23 0.23 0 0 0 0.24 0.24h0.62001a0.25 0.25 0 0 0 0.25-0.24v-2.39c0-1 0.44001-1.49 1.31-1.49h0.14001a0.24 0.24 0 0 0 0.25-0.23v-0.63001a0.25 0.25 0 0 0-0.22-0.24zm9.4101 0.84001a1.07 1.07 0 0 1 1.08 0.92001 0.17 0.17 0 0 0 0.17 0.15h0.72001a0.15 0.15 0 0 0 0.15-0.17 0.32 0.32 0 0 0 0-0.13 2.11 2.11 0 0 0-2.1-1.72 2.19 2.19 0 0 0-2.2 2.36v0.67001a2.18 2.18 0 0 0 2.2 2.35 2.07 2.07 0 0 0 2.1-1.66 1.28 1.28 0 0 0 0-0.17 0.16 0.16 0 0 0-0.17-0.16h-0.72001a0.15 0.15 0 0 0-0.16 0.14 1 1 0 0 1-1.07 0.91001c-0.64001 0-1.11-0.45-1.11-1.28v-0.92001c0.03-0.88001 0.5-1.34 1.14-1.34zm-6.6301 0.52c0-0.43 0.39001-0.57 0.84001-0.57a0.88001 0.88001 0 0 1 1 0.64001c0 0.11 0.07 0.16 0.19 0.16h0.65001a0.17 0.17 0 0 0 0.17-0.18 0.44 0.44 0 0 0 0-0.13 1.81 1.81 0 0 0-1.93-1.39c-1 0-2 0.45-2 1.52 0 2.1 2.94 1.23 2.94 2.31 0 0.48-0.36 0.66001-1 0.66001a1 1 0 0 1-1.06-0.69001c0-0.1-0.07-0.17-0.18-0.17h-0.64001a0.19 0.19 0 0 0-0.19 0.17 0.31 0.31 0 0 0 0 0.14 1.89 1.89 0 0 0 2 1.44c1.07 0 2-0.4 2-1.57 0.17-2.12-2.76-1.26-2.76-2.39zm4.66-32.14a2.16 2.16 0 1 0-2.16-2.16 2 2 0 0 0 2.19 2.11zm6.3001 0a2.16 2.16 0 1 0-2.16-2.16 2 2 0 0 0 2.19 2.11zm-15.88 2.76a0.93001 0.93001 0 0 0-0.93001-0.93001h-2.49a0.93001 0.93001 0 0 0-0.93001 0.93001v13.51a2.54 2.54 0 1 1-5.07 0v-0.27a0.93001 0.93001 0 0 0-0.93001-0.93001h-2.4a0.93001 0.93001 0 0 0-0.93001 0.93001v0.21a6.8401 6.8401 0 1 0 13.68 0zm-52.131 10.34 7.3801-19.17a22.7 22.7 0 0 0-22.61 12.2 0.50001 0.50001 0 0 1 0 0.13l-0.32 0.83001c-2.24 5.7901 1.18 10.08 4.5 11.36a8.5501 8.5501 0 0 0 11.05-5.3501zm47.76 17.52a2.2 2.2 0 0 0-2.22 2.36v0.67001a2.23 2.23 0 1 0 4.46 0v-0.67001a2.2 2.2 0 0 0-2.24-2.36zm1.12 3.16c0 0.83001-0.46 1.28-1.12 1.28s-1.1-0.45-1.1-1.28v-0.92001c0-0.83001 0.47-1.29 1.1-1.29s1.12 0.46 1.12 1.29zm-35.5-15.71c-4.4 11.5-15 15.81-24.71 14.15a22.67 22.67 0 1 0 31.77-32.33zm31 10.59h-4a0.24 0.24 0 0 0-0.23 0.25v6.7501a0.23 0.23 0 0 0 0.23 0.24h0.63001a0.24 0.24 0 0 0 0.25-0.24v-2.65h2.5a0.24 0.24 0 0 0 0.24-0.24v-0.55001a0.25 0.25 0 0 0-0.24-0.25h-2.5v-2.27h3.12a0.25 0.25 0 0 0 0.25-0.24v-0.55a0.25 0.25 0 0 0-0.25-0.25zm85.451 2.07h-0.63a0.24 0.24 0 0 0-0.24001 0.24v2.81c0 0.83001-0.42 1.28-1.06 1.28-0.64 0-1-0.45-1-1.28v-2.81a0.25 0.25 0 0 0-0.25-0.24h-0.63001a0.24 0.24 0 0 0-0.24 0.24v2.68c0 1.45 0.59001 2.35 1.81 2.35a1.51 1.51 0 0 0 1.38-0.66001v0.32a0.23 0.23 0 0 0 0.24001 0.24h0.63a0.24 0.24 0 0 0 0.24-0.24v-4.69a0.24 0.24 0 0 0-0.28-0.24zm-10.59-15.45h-3.09c-0.6 0-0.72 0.48-0.78001 0.93001a3.13 3.13 0 0 1-3.42 2.79c-2.19 0-4-1.62-4-4.71v-4c0-3.09 1.77-4.77 4-4.77a3.27 3.27 0 0 1 3.43 2.92c0.09 0.60001 0.42 0.81001 0.75 0.81001h3.06a0.63001 0.63001 0 0 0 0.66001-0.75001 7.5501 7.5501 0 0 0-7.9001-7.0601c-4.8 0-8.3101 3.54-8.3101 8.7901v4c0 5.2501 3.51 8.7901 8.3101 8.7901a7.7101 7.7101 0 0 0 8.0001-7.0801 0.68001 0.68001 0 0 0-0.71001-0.66001zm4.6 15.45h-0.14a1.54 1.54 0 0 0-1.37 0.77001v-0.53a0.24 0.24 0 0 0-0.24001-0.24h-0.58a0.24 0.24 0 0 0-0.24001 0.24v4.69a0.23 0.23 0 0 0 0.24001 0.24h0.62a0.25 0.25 0 0 0 0.25001-0.24v-2.34c0-1 0.44-1.49 1.31-1.49h0.15a0.23 0.23 0 0 0 0.24-0.23v-0.63001a0.24 0.24 0 0 0-0.24-0.24zm-4.69 0h-1.05v-1.1a0.24 0.24 0 0 0-0.24-0.25h-0.61001a0.24 0.24 0 0 0-0.23 0.25v0.45c0 0.53-0.21 0.65001-0.72001 0.65001a0.24 0.24 0 0 0-0.24 0.24v0.47a0.24 0.24 0 0 0 0.24 0.24h0.71001v2.56c0 1.07 0.36001 1.66 1.6 1.66h0.54a0.24 0.24 0 0 0 0.24001-0.24v-0.53a0.23 0.23 0 0 0-0.24001-0.23h-0.36c-0.62001 0-0.69001-0.22-0.69001-0.81001v-2.38h1.05a0.24 0.24 0 0 0 0.23001-0.27v-0.5a0.24 0.24 0 0 0-0.23001-0.21zm18.62-0.11a1.81 1.81 0 0 0-1.61 0.84001 1.64 1.64 0 0 0-1.52-0.84001 1.47 1.47 0 0 0-1.34 0.70001v-0.35a0.25 0.25 0 0 0-0.25001-0.24h-0.67a0.24 0.24 0 0 0-0.24 0.24v4.69a0.23 0.23 0 0 0 0.24 0.24h0.63a0.25 0.25 0 0 0 0.25001-0.24v-2.64c0-0.83001 0.16-1.45 1-1.45s1 0.51 1 1.45v2.64a0.24 0.24 0 0 0 0.24 0.24h0.64001a0.24 0.24 0 0 0 0.24-0.24v-2.64c0-0.89001 0.21-1.45 1.05-1.45s1 0.56 1 1.45v2.64a0.23 0.23 0 0 0 0.24 0.24h0.71001a0.25 0.25 0 0 0 0.25-0.24v-2.53c-0.04-1.49-0.34-2.51-1.86-2.51zm-25 0a1.57 1.57 0 0 0-1.42 0.71001v-0.36a0.24 0.24 0 0 0-0.24-0.24h-0.64a0.23 0.23 0 0 0-0.23001 0.24v4.69a0.23 0.23 0 0 0 0.23001 0.24h0.64a0.24 0.24 0 0 0 0.24-0.24v-2.8c0-0.83001 0.46001-1.29 1.1-1.29 0.64 0 1.1 0.46 1.1 1.29v2.8a0.23 0.23 0 0 0 0.24 0.24h0.59a0.24 0.24 0 0 0 0.24001-0.24v-2.68c0.02-1.41-0.60001-2.36-1.87-2.36zm26-28.84h-2.49a0.93001 0.93001 0 0 0-0.93001 0.93001v7.5001h-7.1701v-7.5001a0.93001 0.93001 0 0 0-0.93-0.93001h-2.49a0.93001 0.93001 0 0 0-0.93001 0.93001v19.14a0.93001 0.93001 0 0 0 0.93001 0.93001h2.49a0.93001 0.93001 0 0 0 0.93-0.93001v-7.5601h7.1101v7.5601a0.93001 0.93001 0 0 0 0.93001 0.93001h2.49a0.93001 0.93001 0 0 0 0.93001-0.93001v-19.14a0.93001 0.93001 0 0 0-0.93001-0.93001zm-55.821 28.84a1.57 1.57 0 0 0-1.42 0.71001v-0.36a0.24 0.24 0 0 0-0.24-0.24h-0.63001a0.24 0.24 0 0 0-0.24 0.24v4.69a0.23 0.23 0 0 0 0.24 0.24h0.63001a0.24 0.24 0 0 0 0.24-0.24v-2.8c0-0.83001 0.46001-1.29 1.1-1.29 0.64001 0 1.1 0.46 1.1 1.29v2.8a0.23 0.23 0 0 0 0.24001 0.24h0.63a0.25 0.25 0 0 0 0.25001-0.24v-2.68c0-1.41-0.62001-2.36-1.9-2.36zm6.6601 4.28h-1.34c-0.35 0-0.5-0.11-0.5-0.37a0.36 0.36 0 0 1 0.41-0.37h1a1.73 1.73 0 0 0 2-1.76v-2.12a0.24 0.24 0 0 0-0.24-0.24h-0.56001a0.24 0.24 0 0 0-0.24 0.24v0.67001a1.87 1.87 0 0 0-1.06-0.33h-0.26a1.77 1.77 0 0 0-2 1.78 1.4 1.4 0 0 0 0.66 1.26 0.83001 0.83001 0 0 0-0.7 0.85001 0.82001 0.82001 0 0 0 0.47 0.79001 1.26 1.26 0 0 0-0.70001 1.19 1.61 1.61 0 0 0 1.83 1.58h1.2a1.6 1.6 0 1 0 0-3.17zm-0.75-3.41h0.28a0.91001 0.91001 0 1 1 0 1.81h-0.28a0.87001 0.87001 0 0 1-1-0.90001 0.88001 0.88001 0 0 1 1-0.91001zm0.54 5.7201h-0.82001c-0.7 0-0.95001-0.28-0.95001-0.72001 0-0.44 0.25001-0.75001 0.95001-0.75001h0.80001c0.71001 0 1 0.29 1 0.75001 0 0.46-0.27 0.72001-1 0.72001zm4.6-5.1201c0-0.43 0.39-0.57001 0.84001-0.57001a0.89001 0.89001 0 0 1 1 0.64001c0 0.11 0.07 0.16 0.18 0.16h0.66001a0.17 0.17 0 0 0 0.17-0.18 0.44 0.44 0 0 0 0-0.13 1.81 1.81 0 0 0-1.93-1.39c-1 0-2 0.45-2 1.52 0 2.1 2.94 1.23 2.94 2.31 0 0.48-0.36001 0.66001-0.95001 0.66001a1 1 0 0 1-1.06-0.69001c0-0.1-0.07-0.17-0.18001-0.17h-0.65a0.18 0.18 0 0 0-0.18 0.17 0.31 0.31 0 0 0 0 0.14 1.88 1.88 0 0 0 2.05 1.44c1.07 0 2-0.4 2-1.57 0.04-2.07-2.89-1.21-2.89-2.34zm-3.11-10.24v-2.22a0.93001 0.93001 0 0 0-0.93001-0.93001h-8.0001v-16a0.93001 0.93001 0 0 0-0.93001-0.93001h-2.49a0.93001 0.93001 0 0 0-0.93001 0.93001v19.15a0.93001 0.93001 0 0 0 0.93001 0.93001h11.4a0.93001 0.93001 0 0 0 0.95001-0.93001zm8.2501 0v-19.14a0.93001 0.93001 0 0 0-0.93001-0.93001h-2.49a0.93001 0.93001 0 0 0-0.93001 0.93001v19.14a0.93001 0.93001 0 0 0 0.93001 0.93001h2.49a0.93001 0.93001 0 0 0 0.93001-0.93001zm3 8.8801h-3.41a0.25 0.25 0 0 0-0.25001 0.24v0.47a0.24 0.24 0 0 0 0.25001 0.24h2.35l-2.41 3.08a0.56001 0.56001 0 0 0-0.19001 0.46v0.42a0.24 0.24 0 0 0 0.25001 0.24h3.41a0.23 0.23 0 0 0 0.24-0.24v-0.47a0.24 0.24 0 0 0-0.24-0.25h-2.39l2.48-3.07a0.53001 0.53001 0 0 0 0.15-0.41v-0.5a0.24 0.24 0 0 0-0.2-0.21zm4-0.11a2.21 2.21 0 0 0-2.24 2.36v0.67001a2.21 2.21 0 0 0 2.25 2.35 2.12 2.12 0 0 0 2-1.37 0.64001 0.64001 0 0 0 0-0.17 0.17 0.17 0 0 0-0.17-0.18h-0.67001a0.3 0.3 0 0 0-0.24 0.15 1 1 0 0 1-1 0.63001 1.14 1.14 0 0 1-1.13-1.23v-0.14h3.07a0.25 0.25 0 0 0 0.37-0.25v-0.48a2.2 2.2 0 0 0-2.19-2.34zm1.12 2.28h-2.24v-0.12a1.12 1.12 0 1 1 2.24 0z" fill="#023d6b"/>
 </svg>
diff --git a/doc/requirements.txt b/doc/requirements.txt
index 20a75c89b7..785ccf1c95 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -1,5 +1,5 @@
-Sphinx==3.0.3
-sphinx-autoapi===1.3.0
-sphinx_rtd_theme==0.4.3
+Sphinx==7.2.6
+sphinx-autoapi===3.0.0
+sphinx_rtd_theme==1.3.0
 sphinxcontrib-napoleon==0.7
-sphinx-copybutton==0.3.3
+sphinx-copybutton==0.5.2
diff --git a/doc/source/getting_started.rst b/doc/source/getting_started.rst
index f2ab8d1097..1749c0eb52 100644
--- a/doc/source/getting_started.rst
+++ b/doc/source/getting_started.rst
@@ -1,7 +1,7 @@
 .. _Installation:
 
 Getting Started
-===============
+==============
 
 Heat is a Python package for accelerated and distributed tensor computations. Internally, it is based on `PyTorch <https://pytorch.org/>`_. Consequently, all operating systems that support Python and PyTorch also support a Heat installation. Currently, this list contains at least Linux, MacOS and Windows. However, most of our development is done under Linux and interoperability should therefore be optimal.
 
@@ -31,12 +31,12 @@ If you do not have a recent installation on you system, you may want to upgrade
 
     sudo dnf update python3
 
-If you have new administrator privileges on your system, because you are working on a cluster for example, make sure to check its *user guide*, the module system (``module spider python``) or get in touch with the administrators.
+If you have no administrator privileges on your system, because you are working on a cluster for example, make sure to check its *user guide*, the module system (``module spider python``) or get in touch with the administrators.
 
 Optional Dependencies
 ^^^^^^^^^^^^^^^^^^^^^
 
-You can accelerate computations with Heat in different ways. For GPU acceleration ensure that you have a `CUDA <https://developer.nvidia.com/cuda-zone>`_ installation on your system. Distributed computations require an MPI stack on you computer. We recommend `MVAPICH <https://mvapich.cse.ohio-state.edu/>`_ or `OpenMPI <https://www.open-mpi.org/>`_. Finally, for parallel data I/O, Heat offers interface to `HDF5 <https://www.hdfgroup.org/solutions/hdf5/>`_ and `NetCDF <https://www.unidata.ucar.edu/software/netcdf/>`_. You can obtain these packages using your operating system's package manager.
+You can accelerate computations with Heat in different ways. For GPU acceleration ensure that you have a `CUDA <https://developer.nvidia.com/cuda-zone>`_ installation on your system. Distributed computations require an MPI stack on your computer. We recommend `MVAPICH <https://mvapich.cse.ohio-state.edu/>`_ or `OpenMPI <https://www.open-mpi.org/>`_. Finally, for parallel data I/O, Heat offers interface to `HDF5 <https://www.hdfgroup.org/solutions/hdf5/>`_ and `NetCDF <https://www.unidata.ucar.edu/software/netcdf/>`_. You can obtain these packages using your operating system's package manager.
 
 Installation
 ------------
diff --git a/doc/source/tutorial_clustering.rst b/doc/source/tutorial_clustering.rst
index ce6aa61c6b..21b4157065 100644
--- a/doc/source/tutorial_clustering.rst
+++ b/doc/source/tutorial_clustering.rst
@@ -68,8 +68,8 @@ initial centroids.
     c1.balance_()
     c2.balance_()
 
-    print(f"Number of points assigned to c1: {c1.shape[0]} "
-          f"Number of points assigned to c2: {c2.shape[0]} "
+    print(f"Number of points assigned to c1: {c1.shape[0]} \n"
+          f"Number of points assigned to c2: {c2.shape[0]} \n"
           f"Centroids = {centroids}")
 
 .. code:: text
@@ -95,7 +95,7 @@ Let's plot the assigned clusters and the respective centroids:
 
 .. image:: ../images/clustering.png
 
-We can also cluster the data with kmedians. The respective advanced initial centroid sampling is called 'kmedians++'
+We can also cluster the data with kmedians. The respective advanced initial centroid sampling is called 'kmedians++'.
 
 .. code:: python
 
@@ -110,8 +110,9 @@ We can also cluster the data with kmedians. The respective advanced initial cent
     c1.balance_()
     c2.balance_()
 
-    print(f"Number of points assigned to c1: {c1.shape[0]}"
-          f"Number of points assigned to c2: {c2.shape[0]}")
+    print(f"Number of points assigned to c1: {c1.shape[0]} \n"
+          f"Number of points assigned to c2: {c2.shape[0]} \n"
+          f"Centroids = {centroids}")
 
 Plotting the assigned clusters and the respective centroids:
 
@@ -132,7 +133,7 @@ The Iris Dataset
 ------------------------------
 The _iris_ dataset is a well known example for clustering analysis. It contains 4 measured features for samples from
 three different types of iris flowers. A subset of 150 samples is included in formats h5, csv and netcdf in Heat,
-located under 'heat/heat/datasets/iris.h5', and can be loaded in a distributed manner with Heat's parallel
+located under 'heat/heat/datasets', and can be loaded in a distributed manner with Heat's parallel
 dataloader
 
 .. code:: python
diff --git a/doc/source/tutorial_parallel_computation.rst b/doc/source/tutorial_parallel_computation.rst
index 3dde428861..684e775cea 100644
--- a/doc/source/tutorial_parallel_computation.rst
+++ b/doc/source/tutorial_parallel_computation.rst
@@ -70,7 +70,7 @@ Distributed Computing
 ---------------------
 
 .. warning::
-    For the following code examples, make sure to you have `MPI <https://computing.llnl.gov/tutorials/mpi/>`_ installed.
+    For the following code examples, make sure you have `MPI <https://computing.llnl.gov/tutorials/mpi/>`_ installed.
 
 With Heat you can even compute in distributed memory environments with multiple computation nodes, like modern high-performance cluster systems. For this, Heat makes use of the fact that operations performed on multi-dimensional arrays tend to be identical for all data items. Hence, they can be processed in data-parallel manner. Heat partitions the total number of data items equally among all processing nodes. A ``DNDarray`` assumes the role of a virtual overlay over these node-local data portions and manages them for you while offering the same interface. Consequently, operations can now be executed in parallel. Each processing node applies them locally to their own data chunk. If necessary, partial results are communicated and automatically combined behind the scenes for correct global results.
 
@@ -174,7 +174,7 @@ Output:
 
 .. code:: text
 
-    DNDarray([12.], dtype=ht.float32, device=cpu:0, split=None)
+    DNDarray(12., dtype=ht.float32, device=cpu:0, split=None)
 
 The previously ``split=0`` matrix is ``split=None`` after the reduction operation. Obviously, we can also perform operations between (differently) split ``DNDarrays``.
 
@@ -191,7 +191,7 @@ Output:
 
     DNDarray([[1., 2., 3., 4.],
               [1., 2., 3., 4.],
-              [1., 2., 3., 4.]], dtype=ht.float32, device=cpu:0, split=0)
+              [1., 2., 3., 4.]], dtype=ht.float32, device=cpu:0, split=1)
 
     [0/3] DNDarray([1., 2., 3., 4.], dtype=ht.int32, device=cpu:0, split=None)
     [1/3] DNDarray([1., 2., 3., 4.], dtype=ht.int32, device=cpu:0, split=None)
@@ -200,7 +200,7 @@ Output:
 Technical Details
 ^^^^^^^^^^^^^^^^^
 
-On a technical level, Heat is inspired by the so-called `Bulk Synchronous Parallel (BSP) <https://en.wikipedia.org/wiki/Bulk_synchronous_parallel>`_ processing model. Computations proceed in a series of hierarchical supersteps, each consisting of a number of node-local computations and subsequent communications. In contrast to the classical BSP model, communicated data is available immediately, rather than after the next global synchronization. In Heat, global synchronizations only occurs for collective MPI calls as well as at the program start and termination.
+On a technical level, Heat is inspired by the so-called `Bulk Synchronous Parallel (BSP) <https://en.wikipedia.org/wiki/Bulk_synchronous_parallel>`_ processing model. Computations proceed in a series of hierarchical supersteps, each consisting of a number of node-local computations and subsequent communications. In contrast to the classical BSP model, communicated data is available immediately, rather than after the next global synchronization. In Heat, global synchronization only occurs for collective MPI calls as well as at the program start and termination.
 
 .. image:: ../images/bsp.svg
     :align: center
@@ -223,13 +223,13 @@ You can start the distributed interactive interpreter by invoking the following
 
 .. note::
 
-    The interactive interpreter does only support a subset of all controls commands.
+    The interactive interpreter does only support a subset of all control commands.
 
 
 Parallel Performance
 --------------------
 
-When working with parallel and distributed computation in Heat there are some best practices for you may to know about. The following list covers the major ones.
+When working with parallel and distributed computation in Heat there are some best practices for you to know about. The following list covers the major ones.
 
 Dos
 ^^^
diff --git a/docker/Dockerfile b/docker/Dockerfile
deleted file mode 100644
index 2ce45a5f16..0000000000
--- a/docker/Dockerfile
+++ /dev/null
@@ -1,21 +0,0 @@
-ARG PACKAGE_NAME=heat
-ARG HEAT_VERSION=1.2.2
-ARG PYTORCH_IMG=22.05-py3
-ARG HEAT_BRANCH=main
-ARG INSTALL_TYPE=release
-
-FROM nvcr.io/nvidia/pytorch:${PYTORCH_IMG} AS base
-COPY ./tzdata.seed /tmp/tzdata.seed
-RUN debconf-set-selections /tmp/tzdata.seed
-RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y build-essential openssh-client python3-dev git && apt clean && rm -rf /var/lib/apt/lists/*
-
-FROM base AS source-install
-ARG HEAT_BRANCH
-RUN git clone -b ${HEAT_BRANCH} https://github.com/helmholtz-analytics/heat.git ; cd heat; pip install mpi4py --no-binary :all: ; pip install .[hdf5,netcdf]; pip cache purge ; cd ..; rm -rf heat
-
-FROM base AS release-install
-ARG PACKAGE_NAME
-ARG HEAT_VERSION
-RUN pip install mpi4py --no-binary :all: ; if [ "x${HEAT_VERSION}" = "x" ]; then pip install ${PACKAGE_NAME}[hdf5,netcdf]; else pip install ${PACKAGE_NAME}[hdf5,netcdf]==${HEAT_VERSION}; fi ; pip cache purge ; true
-
-FROM ${INSTALL_TYPE}-install AS final
diff --git a/docker/Dockerfile.release b/docker/Dockerfile.release
new file mode 100644
index 0000000000..3aa43fde14
--- /dev/null
+++ b/docker/Dockerfile.release
@@ -0,0 +1,18 @@
+ARG HEAT_VERSION=latest
+ARG PYTORCH_IMG=23.05-py3
+
+FROM nvcr.io/nvidia/pytorch:${PYTORCH_IMG} AS base
+COPY ./tzdata.seed /tmp/tzdata.seed
+RUN debconf-set-selections /tmp/tzdata.seed
+RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y build-essential openssh-client python3-dev git && apt clean && rm -rf /var/lib/apt/lists/*
+
+FROM base AS release-install
+ARG HEAT_VERSION
+RUN pip install --upgrade pip
+RUN pip install mpi4py --no-binary :all:
+RUN echo ${HEAT_VERSION}
+RUN if [[ ${HEAT_VERSION} =~ ^([1-9]\d*|0)(\.(([1-9]\d*)|0)){2}$ ]]; then \
+        pip install heat[hdf5,netcdf]==${HEAT_VERSION}; \
+    else \
+        pip install heat[hdf5,netcdf]; \
+    fi
diff --git a/docker/Dockerfile.source b/docker/Dockerfile.source
new file mode 100644
index 0000000000..2765d1cc41
--- /dev/null
+++ b/docker/Dockerfile.source
@@ -0,0 +1,13 @@
+ARG PYTORCH_IMG=23.05-py3
+ARG HEAT_BRANCH=main
+
+FROM nvcr.io/nvidia/pytorch:${PYTORCH_IMG} AS base
+COPY ./tzdata.seed /tmp/tzdata.seed
+RUN debconf-set-selections /tmp/tzdata.seed
+RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y build-essential openssh-client python3-dev git && apt clean && rm -rf /var/lib/apt/lists/*
+
+FROM base AS source-install
+ARG HEAT_BRANCH
+RUN pip install --upgrade pip
+RUN git clone -b ${HEAT_BRANCH} https://github.com/helmholtz-analytics/heat.git
+RUN pip install mpi4py --no-binary :all: && pushd heat && pip install .[hdf5,netcdf] && popd && rm -rf heat
diff --git a/docker/README.md b/docker/README.md
index 9202c974c5..89b11ebf95 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -2,29 +2,35 @@
 
 There is some flexibility to building the Docker images of Heat.
 
-Firstly, one can build from the released version taken from PyPI. This will either be
-the latest release or the version set through the `--build-arg=HEAT_VERSION=1.2.0`
+Firstly, one can build from the released version taken from PyPI using `Dockerfile.release`. This will either be
+the latest release or the version set through the `--build-arg HEAT_VERSION=X.Y.Z`
 argument.
 
-Secondly one can build a docker image from the GitHub sources, selected through
-`--build-arg=INSTALL_TYPE=source`. The default branch to be built is main, other
-branches can be specified using `--build-arg=HEAT_BRANCH=branchname`.
+Secondly one can build a docker image from the GitHub sources, by building using `Dockerfile.source`. The default branch to be built is main, other
+branches can be specified using `--build-arg HEAT_BRANCH=<branch-name>`.
 
 ## General build
 
 ### Docker
 
-The [Dockerfile](./Dockerfile) guiding the build of the Docker image is located in this
-directory. It is typically most convenient to `cd` over here and run the Docker build as:
+The [Dockerfile](./Dockerfile.release or ./Dockerfile.source) guiding the build of the Docker image is located in this directory. It is typically most convenient to `cd` to the `docker` directory and run the  build command as:
 
 ```console
-$ docker build --build-args HEAT_VERSION=1.2.2 --PYTORCH_IMG=22.05-py3 -t heat:local .
+$ docker build -t heat:latest -f Dockerfile.source .
 ```
 
+Or optionally, using a particular version and pytorch base image:
+
+```console
+$ docker build --build-arg HEAT_VERSION=X.Y.Z --build-arg PYTORCH_IMG=<nvcr-tag> -t heat:X.Y.Z -f Dockerfile.release .
+```
+
+The heat image is based on the nvidia pytorch container. You can find exisiting tags in the [nvidia container catalog](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags).
+
 We also offer prebuilt images in our [Package registry](https://github.com/helmholtz-analytics/heat/pkgs/container/heat) from which you can pull existing images:
 
 ```console
-$ docker pull ghcr.io/helmholtz-analytics/heat:1.2.0-dev_torch1.12_cuda11.7_py3.8
+$ docker pull ghcr.io/helmholtz-analytics/heat:<version-tag>
 ```
 
 ### Building for HPC
@@ -37,24 +43,24 @@ image also for HPC systems, such as the ones available at [Jülich Supercomputin
 
 To use one of the existing images from our registry:
 
-	$ apptainer build heat.sif docker://ghcr.io/helmholtz-analytics/heat:1.2.0-dev_torch1.12_cuda11.7_py3.8
+	$ apptainer build heat.sif docker://ghcr.io/helmholtz-analytics/heat:<version-tag>
 
 Building the image can require root access in some systems. If that is the case, we recommend building the image on a local machine, and then upload it to the desired HPC system.
 
 If you see an error indicating that there is not enough space, use the --tmpdir flag of the build command. [Apptainer docs](https://apptainer.org/docs/user/latest/build_a_container.html)
 
-#### SIB (Singularity Image Builder)
+#### SIB (Singularity Image Builder) for Apptainer images
 
 A simple `Dockerfile` (in addition to the one above) to be used with SIB could look like
 this:
 
-	FROM ghcr.io/helmholtz-analytics/heat:1.2.0_torch1.12_cuda11.7_py3.8
+	FROM ghcr.io/helmholtz-analytics/heat:<version-tag>
 
 The invocation to build the image would be:
 
-	$ sib upload ./Dockerfile heat_1.2.0_torch1.12_cuda11.7_py3.8
-	$ sib build --recipe-name heat_1.2.0_torch1.12_cuda11.7_py3.8
-	$ sib download --recipe-name heat_1.2.0_torch1.12_cuda11.7_py3.8
+	$ sib upload ./Dockerfile heat
+	$ sib build --recipe-name heat
+	$ sib download --recipe-name heat
 
 However, SIB is capable of using just about any available Docker image from any
 registry, such that a specific Singularity image can be built by simply referencing the
@@ -62,7 +68,7 @@ available image. SIB is thus used as a conversion tool.
 
 ## Running on HPC
 
-	$ singularity run --nv heat_1.2.0_torch.11_cuda11.5_py3.9.sif /bin/bash
+	$ apptainer run --nv heat /bin/bash
 	$ python
 	Python 3.8.13 (default, Mar 28 2022, 11:38:47)
 	[GCC 7.5.0] :: Anaconda, Inc. on linux
@@ -70,12 +76,12 @@ available image. SIB is thus used as a conversion tool.
 	>>> import heat as ht
 	...
 
-The `--nv` argument to `singularity`enables NVidia GPU support, which is desired for
+The `--nv` argument to `apptainer` enables NVidia GPU support, which is desired for
 Heat.
 
 ### Multi-node example
 
-The following file can be used as an example to use the singularity file together with SLURM, which allows heat to work in a multi-node environment.
+The following file can be used as an example to use the apptainer file together with SLURM, which allows heat to work in a multi-node environment.
 
 ```bash
 #!/bin/bash
@@ -85,5 +91,9 @@ The following file can be used as an example to use the singularity file togethe
 
 ...
 
-srun --mpi="pmi2" singularity exec --nv heat_1.2.0_torch.11_cuda11.5_py3.9.sif bash -c "cd ~/code/heat/examples/lasso; python demo.py"
+srun --mpi="pmi2" apptainer exec --nv heat_1.2.0_torch.11_cuda11.5_py3.9.sif bash -c "cd ~/code/heat/examples/lasso; python demo.py"
 ```
+
+## Scripts
+
+The scripts folder has a small collection of helper scripts to automate certain tasks, primarly meant for heat developers. Explanations are given at the top of the script.
diff --git a/docker/scripts/build_and_push.sh b/docker/scripts/build_and_push.sh
new file mode 100755
index 0000000000..10895596ab
--- /dev/null
+++ b/docker/scripts/build_and_push.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+### As the name suggests, this script is meant for the HeAT developers to quickly build a new Docker image with the specified HeAT version, and Pytorch IMG version. The arguments TORCH_VERSION, CUDA_VERSION, and PYTHON_VERSION should indicated the versions of thouse libraries found on the pytorch image from nvidia, and used only to create the image tag.
+# If you want to upload the image to the github package registry, use the '--upload' option. You need be logged in to the registry. Instructions here: https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-container-registry#authenticating-to-the-container-registry
+
+GHCR_UPLOAD=false
+
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    --heat-version)
+      HEAT_VERSION="$2"
+      shift # past argument
+      shift # past value
+      ;;
+    --pytorch-img)
+      PYTORCH_IMG="$2"
+      shift # past argument
+      shift # past value
+      ;;
+    --torch-version)
+      TORCH_VERSION="$2"
+      shift # past argument
+      shift # past value
+      ;;
+    --cuda-version)
+      CUDA_VERSION="$2"
+      shift # past argument
+      shift # past value
+      ;;
+    --python-version)
+      PYTHON_VERSION="$2"
+      shift # past argument
+      shift # past value
+      ;;
+    --upload)
+      GHCR_UPLOAD=true
+      shift
+      shift
+      ;;
+    -*|--*)
+      echo "Unknown option $1"
+      exit 1
+      ;;
+    *)
+  esac
+done
+
+echo "HEAT_VERSION=$HEAT_VERSION"
+echo "PYTORCH_IMG=$PYTORCH_IMG"
+echo "TORCH_VERSION=$TORCH_VERSION"
+echo "CUDA_VERSION=$CUDA_VERSION"
+echo "PYTHON_VERSION=$PYTHON_VERSION"
+
+
+ghcr_tag="ghcr.io/helmholtz-analytics/heat:${HEAT_VERSION}_torch${TORCH_VERSION}_cu${CUDA_VERSION}_py${PYTHON_VERSION}"
+
+echo "Building image $ghcr_tag"
+
+docker build --file ../Dockerfile.release \
+              --build-arg HEAT_VERSION=$HEAT_VERSION \
+              --build-arg PYTORCH_IMG=$PYTORCH_IMG \
+              --tag $ghcr_tag \
+              .
+
+if [ $GHCR_UPLOAD = true ]; then
+  echo "Push image"
+  echo "You might need to log in into ghcr.io (https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-container-registry#authenticating-to-the-container-registry)"
+  docker push $ghcr_tag
+fi
diff --git a/docker/scripts/install_print_test.sh b/docker/scripts/install_print_test.sh
new file mode 100755
index 0000000000..9103be9562
--- /dev/null
+++ b/docker/scripts/install_print_test.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# Scripts to quickly obtain all relevant information out of a new nvidia pytorch container. Run it inside a pytorch container from nvidia and it will first print the software stack (cuda version, torch version, ...), install heat from source, and run the heat unit tests. Usefull to quickly check if a container is compatible with heat.
+
+# Container setup
+apt update && DEBIAN_FRONTEND=noninteractive apt install -y build-essential openssh-client python3-dev git && apt clean && rm -rf /var/lib/apt/lists/*
+
+# Print environment
+pip list | grep torch
+python --version
+nvcc --version
+mpirun --version
+
+# Install heat from source.
+git clone https://github.com/helmholtz-analytics/heat.git
+cd heat
+pip install --upgrade pip
+pip install mpi4py --no-binary :all:
+pip install .[netcdf,hdf5,dev]
+
+# Run tests
+HEAT_TEST_USE_DEVICE=gpu mpirun -n 1 pytest heat/
diff --git a/docker/scripts/test_nvidia_image_haicore_enroot.sh b/docker/scripts/test_nvidia_image_haicore_enroot.sh
new file mode 100755
index 0000000000..7b052b22ea
--- /dev/null
+++ b/docker/scripts/test_nvidia_image_haicore_enroot.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# Example SLURM/ENROOT script. It will mount the container using enroot, and then run the test script to test the compatibility of the image with the source version of heat.
+
+# Clear environment, else mpi4py will fail to install.
+ml purge
+
+SBATCH_PARAMS=(
+	--partition 	   normal
+	--time      	   00:10:00
+	--nodes     	   1
+	--tasks-per-node   1
+	--gres		   gpu:1
+	--container-image  ~/containers/nvidia+pytorch+23.05-py3.sqsh
+	--container-writable
+	--container-mounts /etc/slurm/task_prolog.hk:/etc/slurm/task_prolog.hk,/scratch:/scratch
+	--container-mount-home
+)
+
+sbatch "${SBATCH_PARAMS[@]}" ./install_print_test.sh
diff --git a/docker/singularity-dockerfile.sample b/docker/singularity-dockerfile.sample
index be90ce3e90..107d202090 100644
--- a/docker/singularity-dockerfile.sample
+++ b/docker/singularity-dockerfile.sample
@@ -1,2 +1,2 @@
 # This is a sample file to use with the Singularity image builder
-FROM ghcr.io/helmholtz-analytics/heat:1.2.0_torch1.11_cuda11.5_py3.9
+FROM ghcr.io/helmholtz-analytics/heat:1.3.0_torch1.12_cuda11.7_py3.8
diff --git a/examples/nn/imagenet-DASO.py b/examples/nn/imagenet-DASO.py
index d1b02261ac..cd29fab782 100644
--- a/examples/nn/imagenet-DASO.py
+++ b/examples/nn/imagenet-DASO.py
@@ -330,8 +330,8 @@ def main():
         print0("Test mode - no DDP, no apex, RN50, 10 iterations")
 
     args.distributed = True  # TODO: DDDP: if ht.MPI_WORLD.size > 1 else False
-    print0("loss_scale = {}".format(args.loss_scale), type(args.loss_scale))
-    print0("\nCUDNN VERSION: {}\n".format(torch.backends.cudnn.version()))
+    print0(f"loss_scale = {args.loss_scale}", type(args.loss_scale))
+    print0(f"\nCUDNN VERSION: {torch.backends.cudnn.version()}\n")
 
     cudnn.benchmark = True
     best_prec1 = 0
@@ -379,10 +379,10 @@ def main():
 
     # create model
     if args.pretrained:
-        print0("=> using pre-trained model '{}'".format(args.arch))
+        print0(f"=> using pre-trained model '{args.arch}'")
         model = models.__dict__[args.arch](pretrained=True)
     else:
-        print0("=> creating model '{}'".format(args.arch))
+        print0(f"=> creating model '{args.arch}'")
         model = models.__dict__[args.arch]()
 
     if (
@@ -426,7 +426,7 @@ def main():
         # Use a local scope to avoid dangling references
         def resume():
             if os.path.isfile(args.resume):
-                print0("=> loading checkpoint '{}'".format(args.resume))
+                print0(f"=> loading checkpoint '{args.resume}'")
                 checkpoint = torch.load(
                     args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu)
                 )
@@ -440,7 +440,7 @@ def resume():
             else:
                 try:
                     resfile = "imgnet-checkpoint-" + str(args.world_size) + ".pth.tar"
-                    print0("=> loading checkpoint '{}'".format(resfile))
+                    print0(f"=> loading checkpoint '{resfile}'")
                     checkpoint = torch.load(
                         resfile, map_location=lambda storage, loc: storage.cuda(args.gpu)
                     )
@@ -636,11 +636,11 @@ def train(dev, train_loader, model, criterion, optimizer, epoch):
         target = data[0]["label"].squeeze().cuda(dev).long()
 
         if 0 <= args.prof == i:
-            print("Profiling begun at iteration {}".format(i))
+            print(f"Profiling begun at iteration {i}")
             torch.cuda.cudart().cudaProfilerStart()
 
         if args.prof >= 0:
-            torch.cuda.nvtx.range_push("Body of iteration {}".format(i))
+            torch.cuda.nvtx.range_push(f"Body of iteration {i}")
 
         lr_warmup(optimizer, epoch, i, train_loader_len)
 
@@ -719,7 +719,7 @@ def train(dev, train_loader, model, criterion, optimizer, epoch):
             torch.cuda.nvtx.range_pop()
 
         if args.prof >= 0 and i == args.prof + 10:
-            print0("Profiling ended at iteration {}".format(i))
+            print0(f"Profiling ended at iteration {i}")
             torch.cuda.cudart().cudaProfilerStop()
             quit()
     # todo average loss, and top1 and top5
diff --git a/examples/nn/imagenet.py b/examples/nn/imagenet.py
index f3e29cf379..53e415d921 100644
--- a/examples/nn/imagenet.py
+++ b/examples/nn/imagenet.py
@@ -324,7 +324,7 @@ def validate(val_loader, model, criterion, args):
                 progress.display(i)
 
         # TODO: this should also be done with the ProgressMeter
-        print(" * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}".format(top1=top1, top5=top5))
+        print(f" * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}")
 
     return top1.avg
 
diff --git a/heat/cluster/_kcluster.py b/heat/cluster/_kcluster.py
index 5ee65a284e..d3f0bdae19 100644
--- a/heat/cluster/_kcluster.py
+++ b/heat/cluster/_kcluster.py
@@ -132,7 +132,7 @@ def _initialize_cluster_centers(self, x: DNDarray):
         elif isinstance(self.init, DNDarray):
             if len(self.init.shape) != 2:
                 raise ValueError(
-                    "passed centroids need to be two-dimensional, but are {}".format(len(self.init))
+                    f"passed centroids need to be two-dimensional, but are {len(self.init)}"
                 )
             if self.init.shape[0] != self.n_clusters or self.init.shape[1] != x.shape[1]:
                 raise ValueError("passed centroids do not match cluster count or data shape")
diff --git a/heat/core/_operations.py b/heat/core/_operations.py
index 7b2b2d5b85..1a9d6766e5 100644
--- a/heat/core/_operations.py
+++ b/heat/core/_operations.py
@@ -24,7 +24,7 @@ def __binary_op(
     t1: Union[DNDarray, int, float],
     t2: Union[DNDarray, int, float],
     out: Optional[DNDarray] = None,
-    where: Optional[DNDarray] = True,
+    where: Union[bool, DNDarray] = True,
     fn_kwargs: Optional[Dict] = {},
 ) -> DNDarray:
     """
diff --git a/heat/core/arithmetics.py b/heat/core/arithmetics.py
index fd387987a4..a38f10c3a2 100644
--- a/heat/core/arithmetics.py
+++ b/heat/core/arithmetics.py
@@ -38,6 +38,7 @@
     "cumsum",
     "diff",
     "div",
+    "divmod",
     "divide",
     "floordiv",
     "floor_divide",
@@ -68,7 +69,14 @@
 ]
 
 
-def add(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
+def add(
+    t1: Union[DNDarray, float],
+    t2: Union[DNDarray, float],
+    /,
+    out: Optional[DNDarray] = None,
+    *,
+    where: Union[bool, DNDarray] = True,
+) -> DNDarray:
     """
     Element-wise addition of values from two operands, commutative.
     Takes the first and second operand (scalar or :class:`~heat.core.dndarray.DNDarray`) whose elements are to be added
@@ -80,12 +88,21 @@ def add(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
         The first operand involved in the addition
     t2: DNDarray or scalar
         The second operand involved in the addition
+    out: DNDarray, optional
+        The output array. It must have a shape that the inputs broadcast to and matching split axis.
+        If not provided, a freshly allocated array is returned.
+    where: DNDarray, optional
+        Condition to broadcast over the inputs. At locations where the condition is True, the `out` array
+        will be set to the added value. Elsewhere, the `out` array will retain its original value. If
+        an uninitialized `out` array is created via the default `out=None`, locations within it where the
+        condition is False will remain uninitialized. If distributed, the split axis (after broadcasting
+        if required) must match that of the `out` array.
 
     Examples
     --------
     >>> import heat as ht
     >>> ht.add(1.0, 4.0)
-    DNDarray([5.], dtype=ht.float32, device=cpu:0, split=None)
+    DNDarray(5., dtype=ht.float32, device=cpu:0, split=None)
     >>> T1 = ht.float32([[1, 2], [3, 4]])
     >>> T2 = ht.float32([[2, 2], [2, 2]])
     >>> ht.add(T1, T2)
@@ -96,16 +113,30 @@ def add(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
     DNDarray([[3., 4.],
               [5., 6.]], dtype=ht.float32, device=cpu:0, split=None)
     """
-    return _operations.__binary_op(torch.add, t1, t2)
+    return _operations.__binary_op(torch.add, t1, t2, out, where)
+
+
+def _add(self, other):
+    try:
+        return add(self, other)
+    except TypeError:
+        return NotImplemented
 
 
-DNDarray.__add__ = lambda self, other: add(self, other)
+DNDarray.__add__ = _add
 DNDarray.__add__.__doc__ = add.__doc__
-DNDarray.__radd__ = lambda self, other: add(self, other)
+DNDarray.__radd__ = lambda self, other: _add(other, self)
 DNDarray.__radd__.__doc__ = add.__doc__
 
 
-def bitwise_and(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
+def bitwise_and(
+    t1: Union[DNDarray, float],
+    t2: Union[DNDarray, float],
+    /,
+    out: Optional[DNDarray] = None,
+    *,
+    where: Union[bool, DNDarray] = True,
+) -> DNDarray:
     """
     Compute the bit-wise AND of two :class:`~heat.core.dndarray.DNDarray` ``t1`` and ``t2`` element-wise.
     Only integer and boolean types are handled. If ``x1.shape!=x2.shape``, they must be broadcastable to a common shape
@@ -117,13 +148,22 @@ def bitwise_and(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDar
         Input tensor
     t2: DNDarray or scalar
         Input tensor
+    out: DNDarray, optional
+        The output array. It must have a shape that the inputs broadcast to and matching split axis.
+        If not provided, a freshly allocated array is returned.
+    where: DNDarray, optional
+        Condition to broadcast over the inputs. At locations where the condition is True, the `out` array
+        will be set to the added value. Elsewhere, the `out` array will retain its original value. If
+        an uninitialized `out` array is created via the default `out=None`, locations within it where the
+        condition is False will remain uninitialized. If distributed, the split axis (after broadcasting
+        if required) must match that of the `out` array.
 
     Examples
     --------
     >>> ht.bitwise_and(13, 17)
-    DNDarray([1], dtype=ht.int64, device=cpu:0, split=None)
+    DNDarray(1, dtype=ht.int64, device=cpu:0, split=None)
     >>> ht.bitwise_and(14, 13)
-    DNDarray([12], dtype=ht.int64, device=cpu:0, split=None)
+    DNDarray(12, dtype=ht.int64, device=cpu:0, split=None)
     >>> ht.bitwise_and(ht.array([14,3]), 13)
     DNDarray([12,  1], dtype=ht.int64, device=cpu:0, split=None)
     >>> ht.bitwise_and(ht.array([11,7]), ht.array([4,25]))
@@ -139,14 +179,30 @@ def bitwise_and(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDar
         if heat_type_is_inexact(dt):
             raise TypeError("Operation is not supported for float types")
 
-    return _operations.__binary_op(torch.bitwise_and, t1, t2)
+    return _operations.__binary_op(torch.bitwise_and, t1, t2, out, where)
 
 
-DNDarray.__and__ = lambda self, other: bitwise_and(self, other)
+def _and(self, other):
+    try:
+        return bitwise_and(self, other)
+    except TypeError:
+        return NotImplemented
+
+
+DNDarray.__and__ = _and
 DNDarray.__and__.__doc__ = bitwise_and.__doc__
+DNDarray.__rand__ = lambda self, other: _and(other, self)
+DNDarray.__rand__.__doc__ = bitwise_and.__doc__
 
 
-def bitwise_or(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
+def bitwise_or(
+    t1: Union[DNDarray, float],
+    t2: Union[DNDarray, float],
+    /,
+    out: Optional[DNDarray] = None,
+    *,
+    where: Union[bool, DNDarray] = True,
+) -> DNDarray:
     """
     Compute the bit-wise OR of two :class:`~heat.core.dndarray.DNDarray` ``t1`` and ``t2`` element-wise.
     Only integer and boolean types are handled. If ``x1.shape!=x2.shape``, they must be broadcastable to a common shape
@@ -158,13 +214,22 @@ def bitwise_or(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarr
         Input tensor
     t2: DNDarray or scalar
         Input tensor
+    out: DNDarray, optional
+        The output array. It must have a shape that the inputs broadcast to and matching split axis.
+        If not provided, a freshly allocated array is returned.
+    where: DNDarray, optional
+        Condition to broadcast over the inputs. At locations where the condition is True, the `out` array
+        will be set to the added value. Elsewhere, the `out` array will retain its original value. If
+        an uninitialized `out` array is created via the default `out=None`, locations within it where the
+        condition is False will remain uninitialized. If distributed, the split axis (after broadcasting
+        if required) must match that of the `out` array.
 
     Examples
     --------
     >>> ht.bitwise_or(13, 16)
-    DNDarray([29], dtype=ht.int64, device=cpu:0, split=None)
+    DNDarray(29, dtype=ht.int64, device=cpu:0, split=None)
     >>> ht.bitwise_or(32, 2)
-    DNDarray([34], dtype=ht.int64, device=cpu:0, split=None)
+    DNDarray(34, dtype=ht.int64, device=cpu:0, split=None)
     >>> ht.bitwise_or(ht.array([33, 4]), 1)
     DNDarray([33,  5], dtype=ht.int64, device=cpu:0, split=None)
     >>> ht.bitwise_or(ht.array([33, 4]), ht.array([1, 2]))
@@ -183,14 +248,30 @@ def bitwise_or(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarr
         if heat_type_is_inexact(dt):
             raise TypeError("Operation is not supported for float types")
 
-    return _operations.__binary_op(torch.bitwise_or, t1, t2)
+    return _operations.__binary_op(torch.bitwise_or, t1, t2, out, where)
+
+
+def _or(self, other):
+    try:
+        return bitwise_or(self, other)
+    except TypeError:
+        return NotImplemented
 
 
-DNDarray.__or__ = lambda self, other: bitwise_or(self, other)
+DNDarray.__or__ = _or
 DNDarray.__or__.__doc__ = bitwise_or.__doc__
+DNDarray.__ror__ = lambda self, other: _or(other, self)
+DNDarray.__ror__.__doc__ = bitwise_or.__doc__
 
 
-def bitwise_xor(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
+def bitwise_xor(
+    t1: Union[DNDarray, float],
+    t2: Union[DNDarray, float],
+    /,
+    out: Optional[DNDarray] = None,
+    *,
+    where: Union[bool, DNDarray] = True,
+) -> DNDarray:
     """
     Compute the bit-wise XOR of two arrays element-wise ``t1`` and ``t2``.
     Only integer and boolean types are handled. If ``x1.shape!=x2.shape``, they must be broadcastable to a common shape
@@ -202,13 +283,22 @@ def bitwise_xor(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDar
         Input tensor
     t2: DNDarray or scalar
         Input tensor
+    out: DNDarray, optional
+        The output array. It must have a shape that the inputs broadcast to and matching split axis.
+        If not provided, a freshly allocated array is returned.
+    where: DNDarray, optional
+        Condition to broadcast over the inputs. At locations where the condition is True, the `out` array
+        will be set to the added value. Elsewhere, the `out` array will retain its original value. If
+        an uninitialized `out` array is created via the default `out=None`, locations within it where the
+        condition is False will remain uninitialized. If distributed, the split axis (after broadcasting
+        if required) must match that of the `out` array.
 
     Examples
     --------
     >>> ht.bitwise_xor(13, 17)
-    DNDarray([28], dtype=ht.int64, device=cpu:0, split=None)
+    DNDarray(28, dtype=ht.int64, device=cpu:0, split=None)
     >>> ht.bitwise_xor(31, 5)
-    DNDarray([26], dtype=ht.int64, device=cpu:0, split=None)
+    DNDarray(26, dtype=ht.int64, device=cpu:0, split=None)
     >>> ht.bitwise_xor(ht.array([31,3]), 5)
     DNDarray([26,  6], dtype=ht.int64, device=cpu:0, split=None)
     >>> ht.bitwise_xor(ht.array([31,3]), ht.array([5,6]))
@@ -222,11 +312,20 @@ def bitwise_xor(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDar
         if heat_type_is_inexact(dt):
             raise TypeError("Operation is not supported for float types")
 
-    return _operations.__binary_op(torch.bitwise_xor, t1, t2)
+    return _operations.__binary_op(torch.bitwise_xor, t1, t2, out, where)
 
 
-DNDarray.__xor__ = lambda self, other: bitwise_xor(self, other)
+def _xor(self, other):
+    try:
+        return bitwise_xor(self, other)
+    except TypeError:
+        return NotImplemented
+
+
+DNDarray.__xor__ = _xor
 DNDarray.__xor__.__doc__ = bitwise_xor.__doc__
+DNDarray.__rxor__ = lambda self, other: _xor(other, self)
+DNDarray.__rxor__.__doc__ = bitwise_xor.__doc__
 
 
 def copysign(
@@ -235,17 +334,17 @@ def copysign(
     /,
     out: Optional[DNDarray] = None,
     *,
-    where: DNDarray = True,
+    where: Union[bool, DNDarray] = True,
 ) -> DNDarray:
     """
     Create a new floating-point tensor with the magnitude of 'a' and the sign of 'b', elementwise
 
     Parameters
     ----------
-    a:     DNDarray
-           The input array
-    b:     DNDarray or Number
-           value(s) whose signbit(s) are applied to the magnitudes in 'a'
+    a:  DNDarray
+        The input array
+    b:  DNDarray or Number
+        value(s) whose signbit(s) are applied to the magnitudes in 'a'
     out: DNDarray, optional
         The output array. It must have a shape that the inputs broadcast to and matching split axis.
         If not provided, a freshly allocated array is returned.
@@ -479,8 +578,10 @@ def diff(
 def div(
     t1: Union[DNDarray, float],
     t2: Union[DNDarray, float],
+    /,
     out: Optional[DNDarray] = None,
-    where: DNDarray = True,
+    *,
+    where: Union[bool, DNDarray] = True,
 ) -> DNDarray:
     """
     Element-wise true division of values of operand ``t1`` by values of operands ``t2`` (i.e ``t1/t2``).
@@ -505,7 +606,7 @@ def div(
     Example
     ---------
     >>> ht.div(2.0, 2.0)
-    DNDarray([1.], dtype=ht.float32, device=cpu:0, split=None)
+    DNDarray(1., dtype=ht.float32, device=cpu:0, split=None)
     >>> T1 = ht.float32([[1, 2], [3, 4]])
     >>> T2 = ht.float32([[2, 2], [2, 2]])
     >>> ht.div(T1, T2)
@@ -519,9 +620,16 @@ def div(
     return _operations.__binary_op(torch.true_divide, t1, t2, out, where)
 
 
-DNDarray.__truediv__ = lambda self, other: div(self, other)
+def _truediv(self, other):
+    try:
+        return div(self, other)
+    except TypeError:
+        return NotImplemented
+
+
+DNDarray.__truediv__ = _truediv
 DNDarray.__truediv__.__doc__ = div.__doc__
-DNDarray.__rtruediv__ = lambda self, other: div(other, self)
+DNDarray.__rtruediv__ = lambda self, other: _truediv(other, self)
 DNDarray.__rtruediv__.__doc__ = div.__doc__
 
 # Alias in compliance with numpy API
@@ -529,7 +637,104 @@ def div(
 """Alias for :py:func:`div`"""
 
 
-def fmod(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
+def divmod(
+    t1: Union[DNDarray, float],
+    t2: Union[DNDarray, float],
+    out1: DNDarray = None,
+    out2: DNDarray = None,
+    /,
+    out: Tuple[DNDarray, DNDarray] = (None, None),
+    *,
+    where: Union[bool, DNDarray] = True,
+) -> Tuple[DNDarray, DNDarray]:
+    """
+    Element-wise division remainder and quotient from an integer division of values of operand ``t1`` by values of operand ``t2`` (i.e. C Library function divmod).
+    Result has the sign as the dividend ``t1``. Operation is not commutative.
+
+    Parameters
+    ----------
+    t1: DNDarray or scalar
+        The first operand whose values are divided (may be floats)
+    t2: DNDarray or scalar
+        The second operand by whose values is divided (may be floats)
+    out1: DNDarray, optional
+        The output array for the quotient. It must have a shape that the inputs broadcast to and matching split axis.
+        If not provided, a freshly allocated array is returned. If provided, it must be of the same shape as the
+        expected output. Only one of out1 and out can be provided.
+    out2: DNDarray, optional
+        The output array for the remainder. It must have a shape that the inputs broadcast to and matching split axis.
+        If not provided, a freshly allocated array is returned. If provided, it must be of the same shape as the
+        expected output. Only one of out2 and out can be provided.
+    out: tuple of two DNDarrays, optional
+        Tuple of two output arrays (quotient, remainder), respectively. Both must have a shape that the inputs broadcast to and matching split axis.
+        If not provided, a freshly allocated array is returned. If provided, they must be of the same shape as the
+        expected output. out1 and out2 cannot be used at the same time.
+    where: DNDarray, optional
+        Condition to broadcast over the inputs. At locations where the condition is True, the `out1` array
+        will be set to the quotient value and the `out2` array will be set to the remainder value. Elsewhere, the `out1` and `out2` arrays will retain their original value. If
+        an uninitialized `out1` and `out2` array is created via the default `out1=None` and `out2=None`, locations within them where the
+        condition is False will remain uninitialized. If distributed, the split axis (after broadcasting
+        if required) must match that of the `out1` and `out2` arrays.
+
+    Examples
+    --------
+    >>> ht.divmod(2.0, 2.0)
+    (DNDarray(1., dtype=ht.float32, device=cpu:0, split=None), DNDarray(0., dtype=ht.float32, device=cpu:0, split=None))
+    >>> T1 = ht.float32([[1, 2], [3, 4]])
+    >>> T2 = ht.float32([[2, 2], [2, 2]])
+    >>> ht.divmod(T1, T2)
+    (DNDarray([[0., 1.],
+               [1., 2.]], dtype=ht.float32, device=cpu:0, split=None), DNDarray([[1., 0.],
+               [1., 0.]], dtype=ht.float32, device=cpu:0, split=None))
+    >>> s = 2.0
+    >>> ht.divmod(s, T1)
+    (DNDarray([[2., 1.],
+               [0., 0.]], dtype=ht.float32, device=cpu:0, split=None), DNDarray([[0., 0.],
+               [2., 2.]], dtype=ht.float32, device=cpu:0, split=None))
+    """
+    if not isinstance(out, tuple):
+        raise TypeError("out must be a tuple of two DNDarrays")
+    if len(out) != 2:
+        raise ValueError("out must be a tuple of two DNDarrays")
+    if out[0] is not None:
+        if out1 is None:
+            out1 = out[0]
+        else:
+            raise TypeError("out[0] and out1 cannot be used at the same time")
+    if out[1] is not None:
+        if out2 is None:
+            out2 = out[1]
+        else:
+            raise TypeError("out[1] and out2 cannot be used at the same time")
+
+    # PyTorch has no divmod function
+    d = floordiv(t1, t2, out1, where=where)
+    m = mod(t1, t2, out2, where=where)
+
+    return (d, m)
+
+
+def _divmod(self, other):
+    try:
+        return divmod(self, other)
+    except TypeError:
+        return NotImplemented
+
+
+DNDarray.__divmod__ = _divmod
+DNDarray.__divmod__.__doc__ = divmod.__doc__
+DNDarray.__rdivmod__ = lambda self, other: _divmod(other, self)
+DNDarray.__rdivmod__.__doc__ = divmod.__doc__
+
+
+def fmod(
+    t1: Union[DNDarray, float],
+    t2: Union[DNDarray, float],
+    /,
+    out: Optional[DNDarray] = None,
+    *,
+    where: Union[bool, DNDarray] = True,
+) -> DNDarray:
     """
     Element-wise division remainder of values of operand ``t1`` by values of operand ``t2`` (i.e. C Library function fmod).
     Result has the sign as the dividend ``t1``. Operation is not commutative.
@@ -540,11 +745,21 @@ def fmod(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
         The first operand whose values are divided (may be floats)
     t2: DNDarray or scalar
         The second operand by whose values is divided (may be floats)
+    out: DNDarray, optional
+        The output array. It must have a shape that the inputs broadcast to and matching split axis.
+        If not provided, a freshly allocated array is returned. If provided, it must be of the same shape as the
+        expected output.
+    where: DNDarray, optional
+        Condition to broadcast over the inputs. At locations where the condition is True, the `out` array
+        will be set to the divided value. Elsewhere, the `out` array will retain its original value. If
+        an uninitialized `out` array is created via the default `out=None`, locations within it where the
+        condition is False will remain uninitialized. If distributed, the split axis (after broadcasting
+        if required) must match that of the `out` array.
 
     Examples
     --------
     >>> ht.fmod(2.0, 2.0)
-    DNDarray([0.], dtype=ht.float32, device=cpu:0, split=None)
+    DNDarray(0., dtype=ht.float32, device=cpu:0, split=None)
     >>> T1 = ht.float32([[1, 2], [3, 4]])
     >>> T2 = ht.float32([[2, 2], [2, 2]])
     >>> ht.fmod(T1, T2)
@@ -555,10 +770,17 @@ def fmod(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
     DNDarray([[0., 0.],
           [2., 2.]], dtype=ht.float32, device=cpu:0, split=None)
     """
-    return _operations.__binary_op(torch.fmod, t1, t2)
+    return _operations.__binary_op(torch.fmod, t1, t2, out, where)
 
 
-def floordiv(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
+def floordiv(
+    t1: Union[DNDarray, float],
+    t2: Union[DNDarray, float],
+    /,
+    out: Optional[DNDarray] = None,
+    *,
+    where: Union[bool, DNDarray] = True,
+) -> DNDarray:
     """
     Element-wise floor division of value of operand ``t1`` by values of operands ``t2`` (i.e. ``t1//t2``), not commutative.
 
@@ -568,6 +790,15 @@ def floordiv(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray
         The first operand whose values are divided
     t2: DNDarray or scalar
         The second operand by whose values is divided
+    out: DNDarray, optional
+        The output array. It must have a shape that the inputs broadcast to and matching split axis.
+        If not provided, a freshly allocated array is returned.
+    where: DNDarray, optional
+        Condition to broadcast over the inputs. At locations where the condition is True, the `out` array
+        will be set to the divided value. Elsewhere, the `out` array will retain its original value. If
+        an uninitialized `out` array is created via the default `out=None`, locations within it where the
+        condition is False will remain uninitialized. If distributed, the split axis (after broadcasting
+        if required) must match that of the `out` array.
 
     Examples
     --------
@@ -580,12 +811,21 @@ def floordiv(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray
     DNDarray([[1., 0.],
               [1., 1.]], dtype=ht.float32, device=cpu:0, split=None)
     """
-    return _operations.__binary_op(torch.div, t1, t2, fn_kwargs={"rounding_mode": "floor"})
+    return _operations.__binary_op(
+        torch.div, t1, t2, out, where, fn_kwargs={"rounding_mode": "floor"}
+    )
 
 
-DNDarray.__floordiv__ = lambda self, other: floordiv(self, other)
+def _floordiv(self, other):
+    try:
+        return floordiv(self, other)
+    except TypeError:
+        return NotImplemented
+
+
+DNDarray.__floordiv__ = _floordiv
 DNDarray.__floordiv__.__doc__ = floordiv.__doc__
-DNDarray.__rfloordiv__ = lambda self, other: floordiv(other, self)
+DNDarray.__rfloordiv__ = lambda self, other: _floordiv(other, self)
 DNDarray.__rfloordiv__.__doc__ = floordiv.__doc__
 
 # Alias in compliance with numpy API
@@ -593,7 +833,14 @@ def floordiv(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray
 """Alias for :py:func:`floordiv`"""
 
 
-def gcd(a: DNDarray, b: DNDarray, /, out: Optional[DNDarray] = None, *, where=True) -> DNDarray:
+def gcd(
+    a: DNDarray,
+    b: DNDarray,
+    /,
+    out: Optional[DNDarray] = None,
+    *,
+    where: Union[bool, DNDarray] = True,
+) -> DNDarray:
     """
     Returns the greatest common divisor of |a| and |b|
 
@@ -623,7 +870,12 @@ def gcd(a: DNDarray, b: DNDarray, /, out: Optional[DNDarray] = None, *, where=Tr
 
 
 def hypot(
-    a: DNDarray, b: DNDarray, /, out: Optional[DNDarray] = None, *, where: DNDarray = True
+    a: DNDarray,
+    b: DNDarray,
+    /,
+    out: Optional[DNDarray] = None,
+    *,
+    where: Union[bool, DNDarray] = True,
 ) -> DNDarray:
     r"""
     Given the 'legs' of a right triangle, return its hypotenuse. Equivalent to :math:`\sqrt{a^2 + b^2}`, element-wise.
@@ -660,7 +912,7 @@ def hypot(
     return res
 
 
-def invert(a: DNDarray, out: DNDarray = None) -> DNDarray:
+def invert(a: DNDarray, /, out: Optional[DNDarray] = None) -> DNDarray:
     """
     Computes the bitwise NOT of the given input :class:`~heat.core.dndarray.DNDarray`. The input array must be of integral
     or Boolean types. For boolean arrays, it computes the logical NOT. Bitwise_not is an alias for invert.
@@ -671,6 +923,8 @@ def invert(a: DNDarray, out: DNDarray = None) -> DNDarray:
         The input array to invert. Must be of integral or Boolean types
     out : DNDarray, optional
         Alternative output array in which to place the result. It must have the same shape as the expected output.
+        The dtype of the output will be the one of the input array, unless it is logical, in which case it will be
+        casted to int8. If not provided or None, a freshly-allocated array is returned.
 
     Examples
     --------
@@ -687,7 +941,7 @@ def invert(a: DNDarray, out: DNDarray = None) -> DNDarray:
     return _operations.__local_op(torch.bitwise_not, a, out, no_cast=True)
 
 
-DNDarray.__invert__ = lambda self, out=None: invert(self, out)
+DNDarray.__invert__ = lambda self: invert(self)
 DNDarray.__invert__.__doc__ = invert.__doc__
 
 # alias for invert
@@ -696,7 +950,12 @@ def invert(a: DNDarray, out: DNDarray = None) -> DNDarray:
 
 
 def lcm(
-    a: DNDarray, b: DNDarray, /, out: Optional[DNDarray] = None, *, where: DNDarray = True
+    a: DNDarray,
+    b: DNDarray,
+    /,
+    out: Optional[DNDarray] = None,
+    *,
+    where: Union[bool, DNDarray] = True,
 ) -> DNDarray:
     """
     Returns the lowest common multiple of |a| and |b|
@@ -733,7 +992,14 @@ def lcm(
     return res
 
 
-def left_shift(t1: DNDarray, t2: Union[DNDarray, float]) -> DNDarray:
+def left_shift(
+    t1: DNDarray,
+    t2: Union[DNDarray, float],
+    /,
+    out: Optional[DNDarray] = None,
+    *,
+    where: Union[bool, DNDarray] = True,
+) -> DNDarray:
     """
     Shift the bits of an integer to the left.
 
@@ -743,6 +1009,16 @@ def left_shift(t1: DNDarray, t2: Union[DNDarray, float]) -> DNDarray:
         Input array
     t2: DNDarray or float
         Integer number of zero bits to add
+    out: DNDarray, optional
+        Output array for the result. Must have the same shape as the expected output. The dtype of the output will be
+        the one of the input array, unless it is logical, in which case it will be casted to int8. If not provided or
+        None, a freshly-allocated array is returned.
+    where: DNDarray, optional
+        Condition to broadcast over the inputs. At locations where the condition is True, the `out` array
+        will be set to the shifted value. Elsewhere, the `out` array will retain its original value. If
+        an uninitialized `out` array is created via the default `out=None`, locations within it where the
+        condition is False will remain uninitialized. If distributed, the split axis (after broadcasting
+        if required) must match that of the `out` array.
 
     Examples
     --------
@@ -757,14 +1033,37 @@ def left_shift(t1: DNDarray, t2: Union[DNDarray, float]) -> DNDarray:
         elif dtypes[dt] == types.bool:
             arrs[dt] = types.int(arrs[dt])
 
-    return _operations.__binary_op(torch.Tensor.__lshift__, t1, t2)
+    try:
+        result = _operations.__binary_op(torch.bitwise_left_shift, t1, t2, out, where)
+    except AttributeError:  # pragma: no cover
+        result = _operations.__binary_op(
+            torch.Tensor.__lshift__, t1, t2, out, where
+        )  # pytorch < 1.10
+
+    return result
 
 
-DNDarray.__lshift__ = lambda self, other: left_shift(self, other)
+def _lshift(self, other):
+    try:
+        return left_shift(self, other)
+    except TypeError:
+        return NotImplemented
+
+
+DNDarray.__lshift__ = _lshift
 DNDarray.__lshift__.__doc__ = left_shift.__doc__
+DNDarray.__rlshift__ = lambda self, other: _lshift(other, self)
+DNDarray.__rlshift__.__doc__ = left_shift.__doc__
 
 
-def mod(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
+def mod(
+    t1: Union[DNDarray, float],
+    t2: Union[DNDarray, float],
+    /,
+    out: Optional[DNDarray] = None,
+    *,
+    where: Union[bool, DNDarray] = True,
+) -> DNDarray:
     """
     Element-wise division remainder of values of operand ``t1`` by values of operand ``t2`` (i.e. ``t1%t2``).
     Operation is not commutative. Result has the same sign as the devisor ``t2``.
@@ -776,11 +1075,20 @@ def mod(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
         The first operand whose values are divided
     t2: DNDarray or scalar
         The second operand by whose values is divided
+    out: DNDarray, optional
+        The output array. It must have a shape that the inputs broadcast to and matching split axis.
+        If not provided, a freshly allocated array is returned.
+    where: DNDarray, optional
+        Condition to broadcast over the inputs. At locations where the condition is True, the `out` array
+        will be set to the divided value. Elsewhere, the `out` array will retain its original value. If
+        an uninitialized `out` array is created via the default `out=None`, locations within it where the
+        condition is False will remain uninitialized. If distributed, the split axis (after broadcasting
+        if required) must match that of the `out` array.
 
     Examples
     --------
     >>> ht.mod(2, 2)
-    DNDarray([0], dtype=ht.int64, device=cpu:0, split=None)
+    DNDarray(0, dtype=ht.int64, device=cpu:0, split=None)
     >>> T1 = ht.int32([[1, 2], [3, 4]])
     >>> T2 = ht.int32([[2, 2], [2, 2]])
     >>> ht.mod(T1, T2)
@@ -791,16 +1099,30 @@ def mod(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
     DNDarray([[0, 0],
               [2, 2]], dtype=ht.int32, device=cpu:0, split=None)
     """
-    return remainder(t1, t2)
+    return remainder(t1, t2, out, where=where)
+
+
+def _mod(self, other):
+    try:
+        return mod(self, other)
+    except TypeError:
+        return NotImplemented
 
 
-DNDarray.__mod__ = lambda self, other: mod(self, other)
+DNDarray.__mod__ = _mod
 DNDarray.__mod__.__doc__ = mod.__doc__
-DNDarray.__rmod__ = lambda self, other: mod(other, self)
+DNDarray.__rmod__ = lambda self, other: _mod(other, self)
 DNDarray.__rmod__.__doc__ = mod.__doc__
 
 
-def mul(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
+def mul(
+    t1: Union[DNDarray, float],
+    t2: Union[DNDarray, float],
+    /,
+    out: Optional[DNDarray] = None,
+    *,
+    where: Union[bool, DNDarray] = True,
+) -> DNDarray:
     """
     Element-wise multiplication (NOT matrix multiplication) of values from two operands, commutative.
     Takes the first and second operand (scalar or :class:`~heat.core.dndarray.DNDarray`) whose elements are to be
@@ -812,11 +1134,20 @@ def mul(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
         The first operand involved in the multiplication
     t2: DNDarray or scalar
         The second operand involved in the multiplication
+    out: DNDarray, optional
+        Output array. It must have a shape that the inputs broadcast to and matching split axis. If not provided or
+        None, a freshly-allocated array is returned.
+    where: DNDarray, optional
+        Condition to broadcast over the inputs. At locations where the condition is True, the `out` array
+        will be set to the multiplied value. Elsewhere, the `out` array will retain its original value. If
+        an uninitialized `out` array is created via the default `out=None`, locations within it where the
+        condition is False will remain uninitialized. If distributed, the split axis (after broadcasting
+        if required) must match that of the `out` array.
 
     Examples
     --------
     >>> ht.mul(2.0, 4.0)
-    DNDarray([8.], dtype=ht.float32, device=cpu:0, split=None)
+    DNDarray(8., dtype=ht.float32, device=cpu:0, split=None)
     >>> T1 = ht.float32([[1, 2], [3, 4]])
     >>> s = 3.0
     >>> ht.mul(T1, s)
@@ -831,12 +1162,19 @@ def mul(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
     DNDarray([[2., 4.],
               [6., 8.]], dtype=ht.float32, device=cpu:0, split=None)
     """
-    return _operations.__binary_op(torch.mul, t1, t2)
+    return _operations.__binary_op(torch.mul, t1, t2, out, where)
+
+
+def _mul(self, other):
+    try:
+        return mul(self, other)
+    except TypeError:
+        return NotImplemented
 
 
-DNDarray.__mul__ = lambda self, other: mul(self, other)
+DNDarray.__mul__ = _mul
 DNDarray.__mul__.__doc__ = mul.__doc__
-DNDarray.__rmul__ = lambda self, other: mul(self, other)
+DNDarray.__rmul__ = lambda self, other: _mul(other, self)
 DNDarray.__rmul__.__doc__ = mul.__doc__
 
 # Alias in compliance with numpy API
@@ -845,7 +1183,11 @@ def mul(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
 
 
 def nan_to_num(
-    a: DNDarray, nan: float = 0.0, posinf: float = None, neginf: float = None, out: DNDarray = None
+    a: DNDarray,
+    nan: float = 0.0,
+    posinf: float = None,
+    neginf: float = None,
+    out: Optional[DNDarray] = None,
 ) -> DNDarray:
     """
     Replaces NaNs, positive infinity values, and negative infinity values in the input 'a' with the values specified by
@@ -908,11 +1250,11 @@ def nanprod(
     Examples
     --------
     >>> ht.nanprod(ht.array([4.,ht.nan]))
-    DNDarray([4.], dtype=ht.float32, device=cpu:0, split=None)
+    DNDarray(4., dtype=ht.float32, device=cpu:0, split=None)
     >>> ht.nanprod(ht.array([
         [1.,ht.nan],
         [3.,4.]]))
-    DNDarray([24.], dtype=ht.float32, device=cpu:0, split=None)
+    DNDarray(24., dtype=ht.float32, device=cpu:0, split=None)
     >>> ht.nanprod(ht.array([
         [1.,ht.nan],
         [ht.nan,4.]
@@ -955,11 +1297,11 @@ def nansum(
     Examples
     --------
     >>> ht.sum(ht.ones(2))
-    DNDarray([2.], dtype=ht.float32, device=cpu:0, split=None)
+    DNDarray(2., dtype=ht.float32, device=cpu:0, split=None)
     >>> ht.sum(ht.ones((3,3)))
-    DNDarray([9.], dtype=ht.float32, device=cpu:0, split=None)
+    DNDarray(9., dtype=ht.float32, device=cpu:0, split=None)
     >>> ht.sum(ht.ones((3,3)).astype(ht.int))
-    DNDarray([9], dtype=ht.int64, device=cpu:0, split=None)
+    DNDarray(9, dtype=ht.int64, device=cpu:0, split=None)
     >>> ht.sum(ht.ones((3,2,1)), axis=-3)
     DNDarray([[3.],
               [3.]], dtype=ht.float32, device=cpu:0, split=None)
@@ -1043,7 +1385,14 @@ def torch_pos(torch_tensor, out=None):
 """Alias for :py:func:`pos`"""
 
 
-def pow(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
+def pow(
+    t1: Union[DNDarray, float],
+    t2: Union[DNDarray, float],
+    /,
+    out: Optional[DNDarray] = None,
+    *,
+    where: Union[bool, DNDarray] = True,
+) -> DNDarray:
     """
     Element-wise exponential function of values of operand ``t1`` to the power of values of operand ``t2`` (i.e ``t1**t2``).
     Operation is not commutative.
@@ -1054,11 +1403,20 @@ def pow(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
         The first operand whose values represent the base
     t2: DNDarray or scalar
         The second operand by whose values represent the exponent
+    out: DNDarray, optional
+        Output array. It must have a shape that the inputs broadcast to and matching split axis. If not provided or
+        None, a freshly-allocated array is returned.
+    where: DNDarray, optional
+        Condition to broadcast over the inputs. At locations where the condition is True, the `out` array
+        will be set to the exponentiated value. Elsewhere, the `out` array will retain its original value. If
+        an uninitialized `out` array is created via the default `out=None`, locations within it where the
+        condition is False will remain uninitialized. If distributed, the split axis (after broadcasting
+        if required) must match that of the `out` array.
 
     Examples
     --------
     >>> ht.pow (3.0, 2.0)
-    DNDarray([9.], dtype=ht.float32, device=cpu:0, split=None)
+    DNDarray(9., dtype=ht.float32, device=cpu:0, split=None)
     >>> T1 = ht.float32([[1, 2], [3, 4]])
     >>> T2 = ht.float32([[3, 3], [2, 2]])
     >>> ht.pow(T1, T2)
@@ -1100,12 +1458,22 @@ def pow(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
         except AttributeError:
             # t2 is no DNDarray
             pass
-    return _operations.__binary_op(torch.pow, t1, t2)
+    return _operations.__binary_op(torch.pow, t1, t2, out, where)
+
+
+def _pow(self, other, modulo=None):
+    if modulo is not None:
+        return NotImplemented
+
+    try:
+        return pow(self, other)
+    except TypeError:
+        return NotImplemented
 
 
-DNDarray.__pow__ = lambda self, other: pow(self, other)
+DNDarray.__pow__ = _pow
 DNDarray.__pow__.__doc__ = pow.__doc__
-DNDarray.__rpow__ = lambda self, other: pow(other, self)
+DNDarray.__rpow__ = lambda self, other, modulo=None: _pow(other, self, modulo)
 DNDarray.__rpow__.__doc__ = pow.__doc__
 
 
@@ -1142,11 +1510,11 @@ def prod(
     Examples
     --------
     >>> ht.prod(ht.array([1.,2.]))
-    DNDarray([2.], dtype=ht.float32, device=cpu:0, split=None)
+    DNDarray(2., dtype=ht.float32, device=cpu:0, split=None)
     >>> ht.prod(ht.array([
         [1.,2.],
         [3.,4.]]))
-    DNDarray([24.], dtype=ht.float32, device=cpu:0, split=None)
+    DNDarray(24., dtype=ht.float32, device=cpu:0, split=None)
     >>> ht.prod(ht.array([
         [1.,2.],
         [3.,4.]
@@ -1162,7 +1530,14 @@ def prod(
 DNDarray.prod.__doc__ = prod.__doc__
 
 
-def remainder(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
+def remainder(
+    t1: Union[DNDarray, float],
+    t2: Union[DNDarray, float],
+    /,
+    out: Optional[DNDarray] = None,
+    *,
+    where: Union[bool, DNDarray] = True,
+) -> DNDarray:
     """
     Element-wise division remainder of values of operand ``t1`` by values of operand ``t2`` (i.e. ``t1%t2``).
     Operation is not commutative. Result has the same sign as the devisor ``t2``.
@@ -1173,11 +1548,20 @@ def remainder(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarra
         The first operand whose values are divided
     t2: DNDarray or scalar
         The second operand by whose values is divided
+    out: DNDarray, optional
+        Output array. It must have a shape that the inputs broadcast to and matching split axis.
+        If not provided, a freshly allocated array is returned.
+    where: DNDarray, optional
+        Condition to broadcast over the inputs. At locations where the condition is True, the `out` array
+        will be set to the divided value. Elsewhere, the `out` array will retain its original value. If
+        an uninitialized `out` array is created via the default `out=None`, locations within it where the
+        condition is False will remain uninitialized. If distributed, the split axis (after broadcasting
+        if required) must match that of the `out` array.
 
     Examples
     --------
     >>> ht.remainder(2, 2)
-    DNDarray([0], dtype=ht.int64, device=cpu:0, split=None)
+    DNDarray(0, dtype=ht.int64, device=cpu:0, split=None)
     >>> T1 = ht.int32([[1, 2], [3, 4]])
     >>> T2 = ht.int32([[2, 2], [2, 2]])
     >>> ht.remainder(T1, T2)
@@ -1188,10 +1572,17 @@ def remainder(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarra
     DNDarray([[0, 0],
             [2, 2]], dtype=ht.int32, device=cpu:0, split=None)
     """
-    return _operations.__binary_op(torch.remainder, t1, t2)
+    return _operations.__binary_op(torch.remainder, t1, t2, out, where)
 
 
-def right_shift(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
+def right_shift(
+    t1: Union[DNDarray, float],
+    t2: Union[DNDarray, float],
+    /,
+    out: Optional[DNDarray] = None,
+    *,
+    where: Union[bool, DNDarray] = True,
+) -> DNDarray:
     """
     Shift the bits of an integer to the right.
 
@@ -1201,6 +1592,16 @@ def right_shift(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDar
         Input array
     t2: DNDarray or scalar
         Integer number of bits to remove
+    out: DNDarray, optional
+        Output array for the result. Must have the same shape as the expected output. The dtype of the output will be
+        the one of the input array, unless it is logical, in which case it will be casted to int8. If not provided or
+        None, a freshly-allocated array is returned.
+    where: DNDarray, optional
+        Condition to broadcast over the inputs. At locations where the condition is True, the `out` array
+        will be set to the shifted value. Elsewhere, the `out` array will retain its original value. If
+        an uninitialized `out` array is created via the default `out=None`, locations within it where the
+        condition is False will remain uninitialized. If distributed, the split axis (after broadcasting
+        if required) must match that of the `out` array.
 
     Examples
     --------
@@ -1215,14 +1616,37 @@ def right_shift(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDar
         elif dtypes[dt] == types.bool:
             arrs[dt] = types.int(arrs[dt])
 
-    return _operations.__binary_op(torch.Tensor.__rshift__, t1, t2)
+    try:
+        result = _operations.__binary_op(torch.bitwise_right_shift, t1, t2, out, where)
+    except AttributeError:  # pragma: no cover
+        result = _operations.__binary_op(
+            torch.Tensor.__rshift__, t1, t2, out, where
+        )  # pytorch < 1.10
+
+    return result
+
+
+def _rshift(self, other):
+    try:
+        return right_shift(self, other)
+    except TypeError:
+        return NotImplemented
 
 
-DNDarray.__rshift__ = lambda self, other: right_shift(self, other)
+DNDarray.__rshift__ = _rshift
 DNDarray.__rshift__.__doc__ = right_shift.__doc__
+DNDarray.__rrshift__ = lambda self, other: _rshift(other, self)
+DNDarray.__rrshift__.__doc__ = right_shift.__doc__
 
 
-def sub(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
+def sub(
+    t1: Union[DNDarray, float],
+    t2: Union[DNDarray, float],
+    /,
+    out: Optional[DNDarray] = None,
+    *,
+    where: Union[bool, DNDarray] = True,
+) -> DNDarray:
     """
     Element-wise subtraction of values of operand ``t2`` from values of operands ``t1`` (i.e ``t1-t2``)
     Operation is not commutative.
@@ -1233,11 +1657,20 @@ def sub(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
         The first operand from which values are subtracted
     t2: DNDarray or scalar
         The second operand whose values are subtracted
+    out: DNDarray, optional
+        Output array. It must have a shape that the inputs broadcast to and matching split axis. If not provided or
+        None, a freshly-allocated array is returned.
+    where: DNDarray, optional
+        Condition to broadcast over the inputs. At locations where the condition is True, the `out` array
+        will be set to the subtracted value. Elsewhere, the `out` array will retain its original value. If
+        an uninitialized `out` array is created via the default `out=None`, locations within it where the
+        condition is False will remain uninitialized. If distributed, the split axis (after broadcasting
+        if required) must match that of the `out` array.
 
     Examples
     --------
     >>> ht.sub(4.0, 1.0)
-    DNDarray([3.], dtype=ht.float32, device=cpu:0, split=None)
+    DNDarray(3., dtype=ht.float32, device=cpu:0, split=None)
     >>> T1 = ht.float32([[1, 2], [3, 4]])
     >>> T2 = ht.float32([[2, 2], [2, 2]])
     >>> ht.sub(T1, T2)
@@ -1248,12 +1681,19 @@ def sub(t1: Union[DNDarray, float], t2: Union[DNDarray, float]) -> DNDarray:
     DNDarray([[ 1.,  0.],
               [-1., -2.]], dtype=ht.float32, device=cpu:0, split=None)
     """
-    return _operations.__binary_op(torch.sub, t1, t2)
+    return _operations.__binary_op(torch.sub, t1, t2, out, where)
+
+
+def _sub(self, other):
+    try:
+        return sub(self, other)
+    except TypeError:
+        return NotImplemented
 
 
-DNDarray.__sub__ = lambda self, other: sub(self, other)
+DNDarray.__sub__ = _sub
 DNDarray.__sub__.__doc__ = sub.__doc__
-DNDarray.__rsub__ = lambda self, other: sub(other, self)
+DNDarray.__rsub__ = lambda self, other: _sub(other, self)
 DNDarray.__rsub__.__doc__ = sub.__doc__
 
 
@@ -1292,11 +1732,11 @@ def sum(
     Examples
     --------
     >>> ht.sum(ht.ones(2))
-    DNDarray([2.], dtype=ht.float32, device=cpu:0, split=None)
+    DNDarray(2., dtype=ht.float32, device=cpu:0, split=None)
     >>> ht.sum(ht.ones((3,3)))
-    DNDarray([9.], dtype=ht.float32, device=cpu:0, split=None)
+    DNDarray(9., dtype=ht.float32, device=cpu:0, split=None)
     >>> ht.sum(ht.ones((3,3)).astype(ht.int))
-    DNDarray([9], dtype=ht.int64, device=cpu:0, split=None)
+    DNDarray(9, dtype=ht.int64, device=cpu:0, split=None)
     >>> ht.sum(ht.ones((3,2,1)), axis=-3)
     DNDarray([[3.],
               [3.]], dtype=ht.float32, device=cpu:0, split=None)
diff --git a/heat/core/base.py b/heat/core/base.py
index 13e4d777c2..9c1233ce4b 100644
--- a/heat/core/base.py
+++ b/heat/core/base.py
@@ -63,7 +63,7 @@ def __repr__(self, indent: int = 1) -> str:
         indent : int, default: 1
             Indicates the indentation for the top-level output.
         """
-        return "{}({})".format(self.__class__.__name__, json.dumps(self.get_params(), indent=4))
+        return f"{self.__class__.__name__}({json.dumps(self.get_params(), indent=4)})"
 
     def set_params(self, **params: Dict[str, object]) -> self:
         """
diff --git a/heat/core/communication.py b/heat/core/communication.py
index ce87593dbd..d505364b03 100644
--- a/heat/core/communication.py
+++ b/heat/core/communication.py
@@ -803,18 +803,43 @@ def __reduce_like(
             dummy = (
                 sendbuf.contiguous()
             )  # make a contiguous copy and reassign the storage, old will be collected
-            sendbuf.set_(
-                dummy.storage(), dummy.storage_offset(), size=dummy.shape, stride=dummy.stride()
-            )
+            # In PyTorch Version >= 2.0.0 we can use untyped_storage() instead of storage
+            # to keep backward compatibility with earlier PyTorch versions (where no untyped_storage() exists) we use a try/except
+            # (this applies to all places of Heat where untyped_storage() is used without further comment)
+            try:
+                sendbuf.set_(
+                    dummy.untyped_storage(),
+                    dummy.storage_offset(),
+                    size=dummy.shape,
+                    stride=dummy.stride(),
+                )
+            except AttributeError:
+                sendbuf.set_(
+                    dummy.storage(),
+                    dummy.storage_offset(),
+                    size=dummy.shape,
+                    stride=dummy.stride(),
+                )
             sbuf = sendbuf if CUDA_AWARE_MPI else sendbuf.cpu()
             sendbuf = self.as_buffer(sbuf)
         if isinstance(recvbuf, torch.Tensor):
             buf = recvbuf
             # nothing matches, the buffers have to be made contiguous
             dummy = recvbuf.contiguous()
-            recvbuf.set_(
-                dummy.storage(), dummy.storage_offset(), size=dummy.shape, stride=dummy.stride()
-            )
+            try:
+                recvbuf.set_(
+                    dummy.untyped_storage(),
+                    dummy.storage_offset(),
+                    size=dummy.shape,
+                    stride=dummy.stride(),
+                )
+            except AttributeError:
+                recvbuf.set_(
+                    dummy.storage(),
+                    dummy.storage_offset(),
+                    size=dummy.shape,
+                    stride=dummy.stride(),
+                )
             rbuf = recvbuf if CUDA_AWARE_MPI else recvbuf.cpu()
             if sendbuf is MPI.IN_PLACE:
                 recvbuf = self.as_buffer(rbuf)
@@ -1340,7 +1365,7 @@ def __alltoall_like(
             mpi_recvbuf = self.alltoall_recvbuffer(rbuf)
 
             exit_code = self.handle.Alltoallw(mpi_sendbuf, mpi_recvbuf, **kwargs)
-            # original_recvbuf.set_(recvbuf.storage(), recvbuf.storage_offset(), original_recvbuf.shape, original_recvbuf.stride())
+            # original_recvbuf.set_(recvbuf.untyped_storage(), recvbuf.storage_offset(), original_recvbuf.shape, original_recvbuf.stride())
             recv_axis_permutation = list(np.argsort(np.array(axis_permutation)))
 
         return exit_code, sbuf, rbuf, original_recvbuf, recv_axis_permutation
@@ -1570,7 +1595,7 @@ def __gather_like(
         # undo the recvbuf permutation and assign the temporary buffer to the original recvbuf
         # if recv_axis != 0:
         #    recvbuf = recvbuf.permute(*recv_axis_permutation)
-        #    original_recvbuf.set_(recvbuf.storage(), recvbuf.storage_offset(), recvbuf.shape, recvbuf.stride())
+        #    original_recvbuf.set_(recvbuf.untyped_storage(), recvbuf.storage_offset(), recvbuf.shape, recvbuf.stride())
 
         return exit_code, sbuf, rbuf, original_recvbuf, recv_axis_permutation
 
@@ -1812,7 +1837,7 @@ def __scatter_like(
         # undo the recvbuf permutation and assign the temporary buffer to the original recvbuf
         # if recv_axis != 0:
         #    recvbuf = recvbuf.permute(*recv_axis_permutation)
-        #    original_recvbuf.set_(recvbuf.storage(), recvbuf.storage_offset(), recvbuf.shape, recvbuf.stride())
+        #    original_recvbuf.set_(recvbuf.untyped_storage(), recvbuf.storage_offset(), recvbuf.shape, recvbuf.stride())
 
         return exit_code, sbuf, rbuf, original_recvbuf, recv_axis_permutation
 
diff --git a/heat/core/dndarray.py b/heat/core/dndarray.py
index a4c42c4e71..fca6be20b4 100644
--- a/heat/core/dndarray.py
+++ b/heat/core/dndarray.py
@@ -340,7 +340,10 @@ def strides(self) -> Tuple[int]:
         Returns bytes to step in each dimension when traversing a ``DNDarray``. numpy-like usage: ``self.strides()``
         """
         steps = list(self.larray.stride())
-        itemsize = self.larray.storage().element_size()
+        try:
+            itemsize = self.larray.untyped_storage().element_size()
+        except AttributeError:
+            itemsize = self.larray.storage().element_size()
         strides = tuple(step * itemsize for step in steps)
         return strides
 
diff --git a/heat/core/factories.py b/heat/core/factories.py
index 2d6850ce7e..34213eefba 100644
--- a/heat/core/factories.py
+++ b/heat/core/factories.py
@@ -237,7 +237,7 @@ def array(
               [3, 4, 5]], dtype=ht.int64, device=cpu:0, split=None)
     >>> b.strides
     (24, 8)
-    >>> b.larray.storage()
+    >>> b.larray.untyped_storage()
      0
      1
      2
@@ -251,7 +251,7 @@ def array(
               [3, 4, 5]], dtype=ht.int64, device=cpu:0, split=None)
     >>> c.strides
     (8, 16)
-    >>> c.larray.storage()
+    >>> c.larray.untyped_storage()
      0
      3
      1
@@ -271,7 +271,7 @@ def array(
     >>> b.strides
     [0/2] (8, 16)
     [1/2] (8, 16)
-    >>> b.larray.storage()
+    >>> b.larray.untyped_storage()
     [0/2] 0
           3
           1
@@ -323,7 +323,7 @@ def array(
                     else devices.get_device().torch_device,
                 )
             except RuntimeError:
-                raise TypeError("invalid data of type {}".format(type(obj)))
+                raise TypeError(f"invalid data of type {type(obj)}")
     else:
         if copy is False and not np.isscalar(obj) and not isinstance(obj, (Tuple, List)):
             # Python array-API compliance, cf. https://data-apis.org/array-api/2022.12/API_specification/generated/array_api.asarray.html
@@ -346,7 +346,7 @@ def array(
                 else devices.get_device().torch_device,
             )
         except RuntimeError:
-            raise TypeError("invalid data of type {}".format(type(obj)))
+            raise TypeError(f"invalid data of type {type(obj)}")
 
     # infer dtype from obj if not explicitly given
     if dtype is None:
diff --git a/heat/core/io.py b/heat/core/io.py
index 735e885388..c615a821b5 100644
--- a/heat/core/io.py
+++ b/heat/core/io.py
@@ -110,7 +110,7 @@ def load_hdf5(
         if not isinstance(path, str):
             raise TypeError(f"path must be str, not {type(path)}")
         elif not isinstance(dataset, str):
-            raise TypeError("dataset must be str, not {}".format(type(dataset)))
+            raise TypeError(f"dataset must be str, not {type(dataset)}")
         elif split is not None and not isinstance(split, int):
             raise TypeError(f"split must be None or int, not {type(split)}")
 
@@ -410,11 +410,11 @@ def save_netcdf(
         >>> ht.save_netcdf(x, 'data.nc', dataset='DATA')
         """
         if not isinstance(data, DNDarray):
-            raise TypeError("data must be heat tensor, not {}".format(type(data)))
+            raise TypeError(f"data must be heat tensor, not {type(data)}")
         if not isinstance(path, str):
-            raise TypeError("path must be str, not {}".format(type(path)))
+            raise TypeError(f"path must be str, not {type(path)}")
         if not isinstance(variable, str):
-            raise TypeError("variable must be str, not {}".format(type(path)))
+            raise TypeError(f"variable must be str, not {type(path)}")
         if dimension_names is None:
             dimension_names = [
                 __NETCDF_DIM_TEMPLATE.format(variable, dim) for dim, _ in enumerate(data.shape)
@@ -430,15 +430,11 @@ def save_netcdf(
                 )
             )
         elif not len(dimension_names) == len(data.shape):
-            raise ValueError(
-                "{0} names given for {1} dimensions".format(len(dimension_names), len(data.shape))
-            )
+            raise ValueError(f"{len(dimension_names)} names given for {len(data.shape)} dimensions")
 
         # we only support a subset of possible modes
         if mode not in __VALID_WRITE_MODES:
-            raise ValueError(
-                "mode was {}, not in possible modes {}".format(mode, __VALID_WRITE_MODES)
-            )
+            raise ValueError(f"mode was {mode}, not in possible modes {__VALID_WRITE_MODES}")
 
         failed = 0
         excep = None
@@ -468,9 +464,7 @@ def __get_expanded_split(
                 If resulting shapes do not match.
             """
             if np.prod(shape) != np.prod(expanded_shape):
-                raise ValueError(
-                    "Shapes %s and %s do not have the same size" % (shape, expanded_shape)
-                )
+                raise ValueError(f"Shapes {shape} and {expanded_shape} do not have the same size")
             if np.prod(shape) == 1:  # size 1 array
                 return split
             if len(shape) == len(expanded_shape):  # actually not expanded at all
@@ -484,7 +478,7 @@ def __get_expanded_split(
             ex_ind_nonempty, sq_ex = list(zip(*enumerated))  # transpose
             if not sq_shape == sq_ex:
                 raise ValueError(
-                    "Shapes %s and %s differ in non-empty dimensions" % (shape, expanded_shape)
+                    f"Shapes {shape} and {expanded_shape} differ in non-empty dimensions"
                 )
             if split in ind_nonempty:  # split along non-empty dimension
                 split_sq = ind_nonempty.index(split)  # split-axis in squeezed shape
@@ -664,7 +658,7 @@ def __merge_slices(
             raise excep
         elif failed:
             excep = data.comm.bcast(excep, root=failed - 1)
-            excep.args = "raised by process rank {}".format(failed - 1), *excep.args
+            excep.args = f"raised by process rank {failed - 1}", *excep.args
             raise excep from None  # raise the same error but without traceback
             # because that is on a different process
 
diff --git a/heat/core/linalg/basics.py b/heat/core/linalg/basics.py
index 874b4cfc0f..f71bf4b2d4 100644
--- a/heat/core/linalg/basics.py
+++ b/heat/core/linalg/basics.py
@@ -484,6 +484,9 @@ def matmul(a: DNDarray, b: DNDarray, allow_resplit: bool = False) -> DNDarray:
                   [11., 12., 13.],
                   [12., 13., 14.]])
     """
+    sanitation.sanitize_in(a)
+    sanitation.sanitize_in(b)
+
     if a.gshape[-1] != b.gshape[0]:
         raise ValueError(
             f"If the last dimension of a ({a.gshape[-1]}) is not the same size as the second-to-last dimension of b. ({b.gshape[-2]})"
@@ -1094,7 +1097,17 @@ def matmul(a: DNDarray, b: DNDarray, allow_resplit: bool = False) -> DNDarray:
         return c
 
 
-DNDarray.__matmul__ = lambda self, other: matmul(self, other)
+def _matmul(self, other):
+    try:
+        return matmul(self, other)
+    except TypeError:
+        return NotImplemented
+
+
+DNDarray.__matmul__ = _matmul
+DNDarray.__matmul__.__doc__ = matmul.__doc__
+DNDarray.__rmatmul__ = lambda self, other: _matmul(other, self)
+DNDarray.__rmatmul__.__doc__ = matmul.__doc__
 
 
 def matrix_norm(
diff --git a/heat/core/linalg/svdtools.py b/heat/core/linalg/svdtools.py
index ef72ccfc3b..fb90406384 100644
--- a/heat/core/linalg/svdtools.py
+++ b/heat/core/linalg/svdtools.py
@@ -85,7 +85,7 @@ def hsvd_rank(
         [2] Himpe, Leibner, Rave. Hierarchical approximate proper orthogonal decomposition. SIAM J. Sci. Comput., 40 (5), 2018.
     """
     if not isinstance(A, DNDarray):
-        raise TypeError("Argument needs to be a DNDarray but is {}.".format(type(A)))
+        raise TypeError(f"Argument needs to be a DNDarray but is {type(A)}.")
     if not A.ndim == 2:
         raise ValueError("A needs to be a 2D matrix")
     if not A.dtype == types.float32 and not A.dtype == types.float64:
@@ -197,7 +197,7 @@ def hsvd_rtol(
         [2] Himpe, Leibner, Rave. Hierarchical approximate proper orthogonal decomposition. SIAM J. Sci. Comput., 40 (5), 2018.
     """
     if not isinstance(A, DNDarray):
-        raise TypeError("Argument needs to be a DNDarray but is {}.".format(type(A)))
+        raise TypeError(f"Argument needs to be a DNDarray but is {type(A)}.")
     if not A.ndim == 2:
         raise ValueError("A needs to be a 2D matrix")
     if not A.dtype == types.float32 and not A.dtype == types.float64:
diff --git a/heat/core/linalg/tests/test_basics.py b/heat/core/linalg/tests/test_basics.py
index 6ae0038f6c..a9e8f291a1 100644
--- a/heat/core/linalg/tests/test_basics.py
+++ b/heat/core/linalg/tests/test_basics.py
@@ -809,6 +809,8 @@ def test_matmul(self):
                 a = ht.zeros((3, 3, 3), split=2)
                 b = a.copy()
                 a @ b
+            with self.assertRaises(TypeError):
+                "T" @ ht.zeros((3, 3, 3))
 
     def test_matrix_norm(self):
         a = ht.arange(9, dtype=ht.float) - 4
diff --git a/heat/core/manipulations.py b/heat/core/manipulations.py
index c35d16faf5..aca95db058 100644
--- a/heat/core/manipulations.py
+++ b/heat/core/manipulations.py
@@ -2062,7 +2062,7 @@ def reshape(a: DNDarray, *shape: Union[int, Tuple[int, ...]], **kwargs) -> DNDar
     (2/2) tensor([[ 8., 10., 12., 14.]])
     """
     if not isinstance(a, DNDarray):
-        raise TypeError("'a' must be a DNDarray, currently {}".format(type(a)))
+        raise TypeError(f"'a' must be a DNDarray, currently {type(a)}")
 
     # use numpys _ShapeLike but expand to handle torch and heat Tensors
     np_proxy = np.lib.stride_tricks.as_strided(np.ones(1), a.gshape, [0] * a.ndim, writeable=False)
@@ -4061,7 +4061,7 @@ def topk(
                 )
             )
         if out[1].dtype != types.int64:
-            raise RuntimeError("dtype of 'out[1]' is not ht.int64, found {}".format(out[1].dtype))
+            raise RuntimeError(f"dtype of 'out[1]' is not ht.int64, found {out[1].dtype}")
 
     dim = stride_tricks.sanitize_axis(a.gshape, dim)
 
@@ -4150,8 +4150,12 @@ def local_topk(*args, **kwargs):
                     gres.shape, gindices.shape, out[0].shape, out[1].shape
                 )
             )
-        out[0].larray.storage().copy_(final_array.larray.storage())
-        out[1].larray.storage().copy_(final_indices.larray.storage())
+        try:
+            out[0].larray.untyped_storage().copy_(final_array.larray.untyped_storage())
+            out[1].larray.untyped_storage().copy_(final_indices.larray.untyped_storage())
+        except AttributeError:
+            out[0].larray.storage().copy_(final_array.larray.storage())
+            out[1].larray.storage().copy_(final_indices.larray.storage())
 
         out[0]._DNDarray__dtype = a.dtype
         out[1]._DNDarray__dtype = types.int64
diff --git a/heat/core/memory.py b/heat/core/memory.py
index cd3aa927b3..72b8cc7d9b 100644
--- a/heat/core/memory.py
+++ b/heat/core/memory.py
@@ -74,12 +74,20 @@ def sanitize_memory_layout(x: torch.Tensor, order: str = "C") -> torch.Tensor:
         dims = tuple(reversed(dims))
         y = torch.empty_like(x)
         permutation = x.permute(dims).contiguous()
-        y = y.set_(
-            permutation.storage(),
-            x.storage_offset(),
-            x.shape,
-            tuple(reversed(permutation.stride())),
-        )
+        try:
+            y = y.set_(
+                permutation.untyped_storage(),
+                x.storage_offset(),
+                x.shape,
+                tuple(reversed(permutation.stride())),
+            )
+        except AttributeError:
+            y = y.set_(
+                permutation.storage(),
+                x.storage_offset(),
+                x.shape,
+                tuple(reversed(permutation.stride())),
+            )
         del permutation, dims, column_major, row_major, x
         return y
     else:
diff --git a/heat/core/sanitation.py b/heat/core/sanitation.py
index 5f6821996f..6485e4139d 100644
--- a/heat/core/sanitation.py
+++ b/heat/core/sanitation.py
@@ -347,7 +347,7 @@ def scalar_to_1d(x: DNDarray) -> DNDarray:
         if x.ndim == 1 and x.gnumel == 1:
             return x
         raise ValueError(
-            "Input needs to be a scalar DNDarray,but was found to be {}d DNDarray".format(x.ndim)
+            f"Input needs to be a scalar DNDarray,but was found to be {x.ndim}d DNDarray"
         )
     return DNDarray(
         x.larray.unsqueeze(0),
diff --git a/heat/core/statistics.py b/heat/core/statistics.py
index e0927d0b55..bdb6765a1e 100644
--- a/heat/core/statistics.py
+++ b/heat/core/statistics.py
@@ -99,7 +99,7 @@ def local_argmax(*args, **kwargs):
 
     # axis sanitation
     if axis is not None and not isinstance(axis, int):
-        raise TypeError("axis must be None or int, was {}".format(type(axis)))
+        raise TypeError(f"axis must be None or int, was {type(axis)}")
 
     # perform the global reduction
     smallest_value = -sanitation.sanitize_infinity(x)
@@ -171,7 +171,7 @@ def local_argmin(*args, **kwargs):
 
     # axis sanitation
     if axis is not None and not isinstance(axis, int):
-        raise TypeError("axis must be None or int, was {}".format(type(axis)))
+        raise TypeError(f"axis must be None or int, was {type(axis)}")
 
     # perform the global reduction
     largest_value = sanitation.sanitize_infinity(x)
@@ -1492,7 +1492,7 @@ def _local_percentile(data: torch.Tensor, axis: int, indices: torch.Tensor) -> t
     # SANITATION
     # sanitize input
     if not isinstance(x, DNDarray):
-        raise TypeError("expected x to be a DNDarray, but was {}".format(type(x)))
+        raise TypeError(f"expected x to be a DNDarray, but was {type(x)}")
     if isinstance(axis, (list, tuple)):
         raise NotImplementedError("ht.percentile(), tuple axis not implemented yet")
 
@@ -1519,7 +1519,7 @@ def _local_percentile(data: torch.Tensor, axis: int, indices: torch.Tensor) -> t
         t_q = q.larray
         t_perc_dtype = torch.promote_types(t_q.dtype, torch.float32)
     else:
-        raise TypeError("DNDarray, list or tuple supported, but q was {}".format(type(q)))
+        raise TypeError(f"DNDarray, list or tuple supported, but q was {type(q)}")
 
     nperc = t_q.numel()
     perc_dtype = types.canonical_heat_type(t_perc_dtype)
@@ -1537,17 +1537,13 @@ def _local_percentile(data: torch.Tensor, axis: int, indices: torch.Tensor) -> t
     # sanitize out
     if out is not None:
         if not isinstance(out, DNDarray):
-            raise TypeError("out must be DNDarray, was {}".format(type(out)))
+            raise TypeError(f"out must be DNDarray, was {type(out)}")
         if out.dtype is not perc_dtype:
-            raise TypeError(
-                "Wrong datatype for out: expected {}, got {}".format(perc_dtype, out.dtype)
-            )
+            raise TypeError(f"Wrong datatype for out: expected {perc_dtype}, got {out.dtype}")
         if out.gshape != output_shape:
-            raise ValueError("out must have shape {}, got {}".format(output_shape, out.gshape))
+            raise ValueError(f"out must have shape {output_shape}, got {out.gshape}")
         if out.split is not None:
-            raise ValueError(
-                "Split dimension mismatch for out: expected {}, got {}".format(None, out.split)
-            )
+            raise ValueError(f"Split dimension mismatch for out: expected {None}, got {out.split}")
     # END OF SANITATION
 
     # edge-case: x is a scalar. Return x
diff --git a/heat/core/tests/test_arithmetics.py b/heat/core/tests/test_arithmetics.py
index c2ece5a744..7c2f9c8aa8 100644
--- a/heat/core/tests/test_arithmetics.py
+++ b/heat/core/tests/test_arithmetics.py
@@ -35,6 +35,9 @@ def test_add(self):
         self.assertTrue(ht.equal(ht.add(self.a_tensor, self.an_int_scalar), result))
         self.assertTrue(ht.equal(ht.add(self.a_split_tensor, self.a_tensor), result))
 
+        self.assertTrue(ht.equal(self.a_tensor + self.a_scalar, result))
+        self.assertTrue(ht.equal(self.a_scalar + self.a_tensor, result))
+
         # Single element split
         a = ht.array([1], split=0)
         b = ht.array([1, 2], split=0)
@@ -77,12 +80,23 @@ def test_add(self):
         self.assertTrue((c == 1).all())
         self.assertTrue(c.lshape == b.lshape)
 
+        # out parameter, where parameter
+        a = ht.ones((2, 2), split=0)
+        b = out = ht.ones((2, 2), split=0)
+        where = ht.array([[True, False], [False, True]], split=0)
+        ht.add(a, b, out=out, where=where)
+        self.assertTrue(ht.equal(out, ht.array([[2, 1], [1, 2]])))
+        self.assertEqual(out.split, 0)
+        self.assertIs(out, b)
+
         with self.assertRaises(ValueError):
             ht.add(self.a_tensor, self.another_vector)
         with self.assertRaises(TypeError):
             ht.add(self.a_tensor, self.erroneous_type)
         with self.assertRaises(TypeError):
             ht.add("T", "s")
+        with self.assertRaises(TypeError):
+            self.a_tensor + "s"
 
     def test_bitwise_and(self):
         an_int_tensor = ht.array([[1, 2], [3, 4]])
@@ -103,6 +117,15 @@ def test_bitwise_and(self):
             ht.equal(ht.bitwise_and(an_int_tensor.copy().resplit_(0), an_int_vector), int_result)
         )
 
+        self.assertTrue(ht.equal(an_int_tensor & self.an_int_scalar, int_result))
+        self.assertTrue(ht.equal(self.an_int_scalar & an_int_tensor, int_result))
+
+        # out parameter, where parameter
+        out = ht.zeros_like(an_int_tensor)
+        where = ht.array([[True, False], [False, True]])
+        ht.bitwise_and(an_int_tensor, self.an_int_scalar, out=out, where=where)
+        self.assertTrue(ht.equal(out, ht.array([[0, 0], [0, 0]])))
+
         with self.assertRaises(TypeError):
             ht.bitwise_and(self.a_tensor, self.another_tensor)
         with self.assertRaises(ValueError):
@@ -119,6 +142,8 @@ def test_bitwise_and(self):
             ht.bitwise_and("s", self.an_int_scalar)
         with self.assertRaises(TypeError):
             ht.bitwise_and(self.an_int_scalar, self.a_scalar)
+        with self.assertRaises(TypeError):
+            self.a_tensor & "s"
 
     def test_bitwise_or(self):
         an_int_tensor = ht.array([[1, 2], [3, 4]])
@@ -139,6 +164,15 @@ def test_bitwise_or(self):
             ht.equal(ht.bitwise_or(an_int_tensor.copy().resplit_(0), an_int_vector), int_result)
         )
 
+        self.assertTrue(ht.equal(an_int_tensor | self.an_int_scalar, int_result))
+        self.assertTrue(ht.equal(self.an_int_scalar | an_int_tensor, int_result))
+
+        # out parameter, where parameter
+        out = ht.zeros_like(an_int_tensor)
+        where = ht.array([[True, False], [False, True]])
+        ht.bitwise_or(an_int_tensor, self.an_int_scalar, out=out, where=where)
+        self.assertTrue(ht.equal(out, ht.array([[3, 0], [0, 6]])))
+
         with self.assertRaises(TypeError):
             ht.bitwise_or(self.a_tensor, self.another_tensor)
         with self.assertRaises(ValueError):
@@ -155,6 +189,8 @@ def test_bitwise_or(self):
             ht.bitwise_or("s", self.an_int_scalar)
         with self.assertRaises(TypeError):
             ht.bitwise_or(self.an_int_scalar, self.a_scalar)
+        with self.assertRaises(TypeError):
+            self.a_tensor | "s"
 
     def test_bitwise_xor(self):
         an_int_tensor = ht.array([[1, 2], [3, 4]])
@@ -175,6 +211,15 @@ def test_bitwise_xor(self):
             ht.equal(ht.bitwise_xor(an_int_tensor.copy().resplit_(0), an_int_vector), int_result)
         )
 
+        self.assertTrue(ht.equal(an_int_tensor ^ self.an_int_scalar, int_result))
+        self.assertTrue(ht.equal(self.an_int_scalar ^ an_int_tensor, int_result))
+
+        # out parameter, where parameter
+        out = ht.zeros_like(an_int_tensor)
+        where = ht.array([[True, False], [False, True]])
+        ht.bitwise_xor(an_int_tensor, self.an_int_scalar, out=out, where=where)
+        self.assertTrue(ht.equal(out, ht.array([[3, 0], [0, 6]])))
+
         with self.assertRaises(TypeError):
             ht.bitwise_xor(self.a_tensor, self.another_tensor)
         with self.assertRaises(ValueError):
@@ -191,6 +236,8 @@ def test_bitwise_xor(self):
             ht.bitwise_xor("s", self.an_int_scalar)
         with self.assertRaises(TypeError):
             ht.bitwise_xor(self.an_int_scalar, self.a_scalar)
+        with self.assertRaises(TypeError):
+            self.a_tensor ^ "s"
 
     def test_copysign(self):
         a = ht.array([3, 2, -8, -2, 4])
@@ -384,6 +431,9 @@ def test_div(self):
         self.assertTrue(ht.equal(ht.div(self.a_tensor, self.an_int_scalar), result))
         self.assertTrue(ht.equal(ht.div(self.a_split_tensor, self.a_tensor), commutated_result))
 
+        self.assertTrue(ht.equal(self.a_tensor / self.a_scalar, result))
+        self.assertTrue(ht.equal(self.a_scalar / self.a_tensor, commutated_result))
+
         a = out = ht.empty((2, 2))
         ht.div(self.a_tensor, self.a_scalar, out=out)
         self.assertTrue(ht.equal(out, result))
@@ -442,6 +492,84 @@ def test_div(self):
                     self.a_tensor,
                     where=ht.array([[True, False], [False, True]], split=1),
                 )
+        with self.assertRaises(TypeError):
+            self.a_tensor / "T"
+
+    def test_divmod(self):
+        # basic tests as floor_device and mod are tested separately
+        result = (
+            ht.array(
+                [
+                    [
+                        0.0,
+                        1.0,
+                    ],
+                    [1.0, 2.0],
+                ]
+            ),
+            ht.array([[1.0, 0.0], [1.0, 0.0]]),
+        )
+        dm = ht.divmod(self.a_tensor, self.a_scalar)
+
+        self.assertIsInstance(dm, tuple)
+        self.assertTrue(ht.equal(ht.divmod(self.a_tensor, self.a_scalar)[0], result[0]))
+        self.assertTrue(ht.equal(ht.divmod(self.a_tensor, self.a_scalar)[1], result[1]))
+
+        result = (ht.array([1.0, 1.0]), ht.array([0.0, 0.0]))
+        dm = divmod(self.a_scalar, self.a_vector)
+        self.assertTrue(ht.equal(dm[0], result[0]))
+        self.assertTrue(ht.equal(dm[1], result[1]))
+
+        # out parameter
+        out = (ht.empty((2, 2), split=0), ht.empty((2, 2), split=0))
+        ht.divmod(self.a_split_tensor, self.a_scalar, out=out)
+        self.assertTrue(ht.equal(out[0], ht.array([[1.0, 1.0], [1.0, 1.0]])))
+        self.assertTrue(ht.equal(out[1], ht.array([[0.0, 0.0], [0.0, 0.0]])))
+
+        with self.assertRaises(TypeError):
+            divmod(self.another_tensor, self.erroneous_type)
+        with self.assertRaises(TypeError):
+            ht.divmod(ht.zeros((2, 2)), ht.zeros((2, 2)), out=1)
+        with self.assertRaises(ValueError):
+            ht.divmod(ht.zeros((2, 2)), ht.zeros((2, 2)), out=(1, 2, 3))
+        with self.assertRaises(TypeError):
+            ht.divmod(
+                ht.zeros((2, 2)),
+                ht.zeros((2, 2)),
+                ht.empty((2, 2)),
+                ht.empty((2, 2)),
+                out=(ht.empty((2, 2)), None),
+            )
+        with self.assertRaises(TypeError):
+            ht.divmod(
+                ht.zeros((2, 2)),
+                ht.zeros((2, 2)),
+                ht.empty((2, 2)),
+                ht.empty((2, 2)),
+                out=(None, ht.empty((2, 2))),
+            )
+        with self.assertRaises(TypeError):
+            divmod(ht.zeros((2, 2)), "T")
+
+    def test_floordiv(self):
+        result = ht.array([[0.0, 1.0], [1.0, 2.0]])
+        commutated_result = ht.array([[2.0, 1.0], [0.0, 0.0]])
+
+        self.assertTrue(ht.equal(ht.floordiv(self.a_scalar, self.a_scalar), ht.float32(1.0)))
+        self.assertTrue(ht.equal(ht.floordiv(self.a_tensor, self.a_scalar), result))
+        self.assertTrue(ht.equal(ht.floordiv(self.a_scalar, self.a_tensor), commutated_result))
+        self.assertTrue(ht.equal(ht.floordiv(self.a_tensor, self.another_tensor), result))
+        self.assertTrue(ht.equal(ht.floordiv(self.a_tensor, self.a_vector), result))
+        self.assertTrue(ht.equal(ht.floordiv(self.a_tensor, self.an_int_scalar), result))
+        self.assertTrue(
+            ht.equal(ht.floordiv(self.a_split_tensor, self.a_tensor), commutated_result)
+        )
+
+        self.assertTrue(ht.equal(self.a_tensor // self.a_scalar, result))
+        self.assertTrue(ht.equal(self.a_scalar // self.a_tensor, commutated_result))
+
+        with self.assertRaises(TypeError):
+            "T" // self.a_tensor
 
     def test_fmod(self):
         result = ht.array([[1.0, 0.0], [1.0, 0.0]])
@@ -515,7 +643,7 @@ def test_invert(self):
         self.assertTrue(ht.equal(ht.invert(int8_tensor), int8_result))
         self.assertTrue(ht.equal(ht.invert(int8_tensor.copy().resplit_(0)), int8_result))
         self.assertTrue(ht.equal(ht.invert(uint8_tensor), uint8_result))
-        self.assertTrue(ht.equal(ht.invert(bool_tensor), bool_result))
+        self.assertTrue(ht.equal(~bool_tensor, bool_result))
 
         with self.assertRaises(TypeError):
             ht.invert(float_tensor)
@@ -537,16 +665,25 @@ def test_lcm(self):
 
     def test_left_shift(self):
         int_tensor = ht.array([[0, 1], [2, 3]])
-        int_result = ht.array([[0, 2], [4, 6]])
 
-        self.assertTrue(ht.equal(ht.left_shift(int_tensor, 1), int_result))
-        self.assertTrue(ht.equal(ht.left_shift(int_tensor.copy().resplit_(0), 1), int_result))
+        self.assertTrue(ht.equal(ht.left_shift(int_tensor, 1), int_tensor * 2))
+        self.assertTrue(ht.equal(ht.left_shift(int_tensor.copy().resplit_(0), 1), int_tensor * 2))
+        self.assertTrue(ht.equal(int_tensor << 2, int_tensor * 4))
+        self.assertTrue(
+            ht.equal(1 << ht.ones(3, dtype=ht.int32), ht.array([2, 2, 2], dtype=ht.int32))
+        )
+
+        ht.left_shift(int_tensor, 1, out=int_tensor, where=int_tensor > 1)
+        self.assertTrue(ht.equal(int_tensor, ht.array([[0, 1], [4, 6]])))
 
         with self.assertRaises(TypeError):
             ht.left_shift(int_tensor, 2.4)
         res = ht.left_shift(ht.array([True]), 2)
         self.assertTrue(res == 4)
 
+        with self.assertRaises(TypeError):
+            int_tensor << "s"
+
     def test_mod(self):
         a_tensor = ht.array([[1, 4], [2, 2]])
         another_tensor = ht.array([[1, 2], [3, 4]])
@@ -554,8 +691,15 @@ def test_mod(self):
         another_result = ht.array([[1, 0], [0, 0]])
 
         self.assertTrue(ht.equal(ht.mod(a_tensor, another_tensor), a_result))
-        self.assertTrue(ht.equal(ht.mod(a_tensor, self.an_int_scalar), another_result))
-        self.assertTrue(ht.equal(ht.mod(self.an_int_scalar, another_tensor), a_result))
+        self.assertTrue(ht.equal(a_tensor % self.an_int_scalar, another_result))
+        self.assertTrue(ht.equal(self.an_int_scalar % another_tensor, a_result))
+
+        with self.assertRaises(TypeError):
+            ht.mod(a_tensor, "T")
+        with self.assertRaises(TypeError):
+            ht.mod("T", another_tensor)
+        with self.assertRaises(TypeError):
+            a_tensor % "s"
 
     def test_mul(self):
         result = ht.array([[2.0, 4.0], [6.0, 8.0]])
@@ -568,12 +712,17 @@ def test_mul(self):
         self.assertTrue(ht.equal(ht.mul(self.a_tensor, self.an_int_scalar), result))
         self.assertTrue(ht.equal(ht.mul(self.a_split_tensor, self.a_tensor), result))
 
+        self.assertTrue(ht.equal(self.a_tensor * self.a_scalar, result))
+        self.assertTrue(ht.equal(self.a_scalar * self.a_tensor, result))
+
         with self.assertRaises(ValueError):
             ht.mul(self.a_tensor, self.another_vector)
         with self.assertRaises(TypeError):
             ht.mul(self.a_tensor, self.erroneous_type)
         with self.assertRaises(TypeError):
             ht.mul("T", "s")
+        with self.assertRaises(TypeError):
+            self.a_tensor * "T"
 
     def test_nan_to_num(self):
         arr = ht.array([1, 2, 3, ht.nan, ht.inf, -ht.inf])
@@ -697,6 +846,9 @@ def test_pow(self):
         self.assertTrue(ht.equal(ht.pow(self.a_tensor, self.an_int_scalar), result))
         self.assertTrue(ht.equal(ht.pow(self.a_split_tensor, self.a_tensor), commutated_result))
 
+        self.assertTrue(ht.equal(self.a_tensor**self.a_scalar, result))
+        self.assertTrue(ht.equal(self.a_scalar**self.a_tensor, commutated_result))
+
         # test scalar base and exponent
         self.assertTrue(ht.equal(ht.pow(2, 3), ht.array(8)))
         self.assertTrue(ht.equal(ht.pow(2, 3.5), ht.array(11.313708498984761)))
@@ -708,6 +860,10 @@ def test_pow(self):
             ht.pow(self.a_tensor, self.erroneous_type)
         with self.assertRaises(TypeError):
             ht.pow("T", "s")
+        with self.assertRaises(TypeError):
+            pow(self.a_tensor, 2, 3)
+        with self.assertRaises(TypeError):
+            self.a_tensor ** "T"
 
     def test_prod(self):
         array_len = 11
@@ -820,10 +976,13 @@ def test_prod(self):
 
     def test_right_shift(self):
         int_tensor = ht.array([[0, 1], [2, 3]])
-        int_result = ht.array([[0, 0], [1, 1]])
 
-        self.assertTrue(ht.equal(ht.right_shift(int_tensor, 1), int_result))
-        self.assertTrue(ht.equal(ht.right_shift(int_tensor.copy().resplit_(0), 1), int_result))
+        self.assertTrue(ht.equal(ht.right_shift(int_tensor, 1), int_tensor // 2))
+        self.assertTrue(ht.equal(ht.right_shift(int_tensor.copy().resplit_(0), 1), int_tensor // 2))
+        self.assertTrue(ht.equal(int_tensor >> 2, int_tensor // 4))
+        self.assertTrue(
+            ht.equal(1 >> ht.ones(3, dtype=ht.int32), ht.array([0, 0, 0], dtype=ht.int32))
+        )
 
         with self.assertRaises(TypeError):
             ht.right_shift(int_tensor, 2.4)
@@ -831,6 +990,9 @@ def test_right_shift(self):
         res = ht.right_shift(ht.array([True]), 2)
         self.assertTrue(res == 0)
 
+        with self.assertRaises(TypeError):
+            int_tensor >> "s"
+
     def test_sub(self):
         result = ht.array([[-1.0, 0.0], [1.0, 2.0]])
         minus_result = ht.array([[1.0, 0.0], [-1.0, -2.0]])
@@ -843,12 +1005,17 @@ def test_sub(self):
         self.assertTrue(ht.equal(ht.sub(self.a_tensor, self.an_int_scalar), result))
         self.assertTrue(ht.equal(ht.sub(self.a_split_tensor, self.a_tensor), minus_result))
 
+        self.assertTrue(ht.equal(self.a_tensor - self.a_scalar, result))
+        self.assertTrue(ht.equal(self.a_scalar - self.a_tensor, minus_result))
+
         with self.assertRaises(ValueError):
             ht.sub(self.a_tensor, self.another_vector)
         with self.assertRaises(TypeError):
             ht.sub(self.a_tensor, self.erroneous_type)
         with self.assertRaises(TypeError):
             ht.sub("T", "s")
+        with self.assertRaises(TypeError):
+            self.a_tensor - "T"
 
     def test_sum(self):
         array_len = 11
diff --git a/heat/core/tests/test_dndarray.py b/heat/core/tests/test_dndarray.py
index cdb0810ed5..1ba1c45608 100644
--- a/heat/core/tests/test_dndarray.py
+++ b/heat/core/tests/test_dndarray.py
@@ -4,6 +4,8 @@
 import heat as ht
 from .test_suites.basic_test import TestCase
 
+pytorch_major_version = int(torch.__version__.split(".")[0])
+
 
 class TestDNDarray(TestCase):
     @classmethod
@@ -738,17 +740,6 @@ def test_lnbytes(self):
 
         self.assertEqual(x_bool_d.lnbytes, x_bool_d.lnumel * 1)
 
-    def test_lshift(self):
-        int_tensor = ht.array([[0, 1], [2, 3]])
-        int_result = ht.array([[0, 4], [8, 12]])
-
-        self.assertTrue(ht.equal(int_tensor << 2, int_result))
-
-        with self.assertRaises(TypeError):
-            int_tensor << 2.4
-        res = ht.left_shift(ht.array([True]), 2)
-        self.assertTrue(res == 4)
-
     def test_nbytes(self):
         # undistributed case
 
@@ -1115,17 +1106,6 @@ def test_resplit(self):
         self.assertTrue(ht.all(t1_sub == res))
         self.assertEqual(t1_sub.split, None)
 
-    def test_rshift(self):
-        int_tensor = ht.array([[0, 2], [4, 8]])
-        int_result = ht.array([[0, 0], [1, 2]])
-
-        self.assertTrue(ht.equal(int_tensor >> 2, int_result))
-
-        with self.assertRaises(TypeError):
-            int_tensor >> 2.4
-        res = ht.right_shift(ht.array([True]), 2)
-        self.assertTrue(res == 0)
-
     def test_setitem_getitem(self):
         # tests for bug #825
         a = ht.ones((102, 102), split=0)
@@ -1593,7 +1573,12 @@ def test_stride_and_strides(self):
         heat_int16 = ht.array(torch_int16)
         numpy_int16 = torch_int16.cpu().numpy()
         self.assertEqual(heat_int16.stride(), torch_int16.stride())
-        self.assertEqual(heat_int16.strides, numpy_int16.strides)
+        if pytorch_major_version >= 2:
+            self.assertTrue(
+                (np.asarray(heat_int16.strides) * 2 == np.asarray(numpy_int16.strides)).all()
+            )
+        else:
+            self.assertEqual(heat_int16.strides, numpy_int16.strides)
 
         # Local, float32, row-major memory layout
         torch_float32 = torch.arange(
@@ -1602,7 +1587,12 @@ def test_stride_and_strides(self):
         heat_float32 = ht.array(torch_float32)
         numpy_float32 = torch_float32.cpu().numpy()
         self.assertEqual(heat_float32.stride(), torch_float32.stride())
-        self.assertEqual(heat_float32.strides, numpy_float32.strides)
+        if pytorch_major_version >= 2:
+            self.assertTrue(
+                (np.asarray(heat_float32.strides) * 4 == np.asarray(numpy_float32.strides)).all()
+            )
+        else:
+            self.assertEqual(heat_float32.strides, numpy_float32.strides)
 
         # Local, float64, column-major memory layout
         torch_float64 = torch.arange(
@@ -1611,7 +1601,14 @@ def test_stride_and_strides(self):
         heat_float64_F = ht.array(torch_float64, order="F")
         numpy_float64_F = np.array(torch_float64.cpu().numpy(), order="F")
         self.assertNotEqual(heat_float64_F.stride(), torch_float64.stride())
-        self.assertEqual(heat_float64_F.strides, numpy_float64_F.strides)
+        if pytorch_major_version >= 2:
+            self.assertTrue(
+                (
+                    np.asarray(heat_float64_F.strides) * 8 == np.asarray(numpy_float64_F.strides)
+                ).all()
+            )
+        else:
+            self.assertEqual(heat_float64_F.strides, numpy_float64_F.strides)
 
         # Distributed, int16, row-major memory layout
         size = ht.communication.MPI_WORLD.size
@@ -1626,7 +1623,15 @@ def test_stride_and_strides(self):
         numpy_int16_split_strides = (
             tuple(np.array(numpy_int16.strides[:split]) / size) + numpy_int16.strides[split:]
         )
-        self.assertEqual(heat_int16_split.strides, numpy_int16_split_strides)
+        if pytorch_major_version >= 2:
+            self.assertTrue(
+                (
+                    np.asarray(heat_int16_split.strides) * 2
+                    == np.asarray(numpy_int16_split_strides)
+                ).all()
+            )
+        else:
+            self.assertEqual(heat_int16_split.strides, numpy_int16_split_strides)
 
         # Distributed, float32, row-major memory layout
         split = -1
@@ -1638,7 +1643,15 @@ def test_stride_and_strides(self):
         numpy_float32_split_strides = (
             tuple(np.array(numpy_float32.strides[:split]) / size) + numpy_float32.strides[split:]
         )
-        self.assertEqual(heat_float32_split.strides, numpy_float32_split_strides)
+        if pytorch_major_version >= 2:
+            self.assertTrue(
+                (
+                    np.asarray(heat_float32_split.strides) * 4
+                    == np.asarray(numpy_float32_split_strides)
+                ).all()
+            )
+        else:
+            self.assertEqual(heat_float32_split.strides, numpy_float32_split_strides)
 
         # Distributed, float64, column-major memory layout
         split = -2
@@ -1650,7 +1663,15 @@ def test_stride_and_strides(self):
         numpy_float64_F_split_strides = numpy_float64_F.strides[: split + 1] + tuple(
             np.array(numpy_float64_F.strides[split + 1 :]) / size
         )
-        self.assertEqual(heat_float64_F_split.strides, numpy_float64_F_split_strides)
+        if pytorch_major_version >= 2:
+            self.assertTrue(
+                (
+                    np.asarray(heat_float64_F_split.strides) * 8
+                    == np.asarray(numpy_float64_F_split_strides)
+                ).all()
+            )
+        else:
+            self.assertEqual(heat_float64_F_split.strides, numpy_float64_F_split_strides)
 
     def test_tolist(self):
         a = ht.zeros([ht.MPI_WORLD.size, ht.MPI_WORLD.size, ht.MPI_WORLD.size], dtype=ht.int32)
@@ -1691,16 +1712,30 @@ def test_torch_proxy(self):
         scalar_array = ht.array(1)
         scalar_proxy = scalar_array.__torch_proxy__()
         self.assertTrue(scalar_proxy.ndim == 0)
-        scalar_proxy_nbytes = scalar_proxy.storage().size() * scalar_proxy.storage().element_size()
+        if pytorch_major_version >= 2:
+            scalar_proxy_nbytes = (
+                scalar_proxy.untyped_storage().size()
+                * scalar_proxy.untyped_storage().element_size()
+            )
+        else:
+            scalar_proxy_nbytes = (
+                scalar_proxy.storage().size() * scalar_proxy.storage().element_size()
+            )
         self.assertTrue(scalar_proxy_nbytes == 1)
 
         dndarray = ht.zeros((4, 7, 6), split=1)
         dndarray_proxy = dndarray.__torch_proxy__()
         self.assertTrue(dndarray_proxy.ndim == dndarray.ndim)
         self.assertTrue(tuple(dndarray_proxy.shape) == dndarray.gshape)
-        dndarray_proxy_nbytes = (
-            dndarray_proxy.storage().size() * dndarray_proxy.storage().element_size()
-        )
+        if pytorch_major_version >= 2:
+            dndarray_proxy_nbytes = (
+                dndarray_proxy.untyped_storage().size()
+                * dndarray_proxy.untyped_storage().element_size()
+            )
+        else:
+            dndarray_proxy_nbytes = (
+                dndarray_proxy.storage().size() * dndarray_proxy.storage().element_size()
+            )
         self.assertTrue(dndarray_proxy_nbytes == 1)
 
     def test_xor(self):
diff --git a/heat/optim/dp_optimizer.py b/heat/optim/dp_optimizer.py
index 4b344ba5cf..5e45545349 100644
--- a/heat/optim/dp_optimizer.py
+++ b/heat/optim/dp_optimizer.py
@@ -20,8 +20,17 @@
 
 def __sum_f16_cb(buffer_a, buffer_b, _):
     # MPI custom sum function to use torch.half
-    tens_a = torch.HalfTensor().set_(torch.HalfStorage.from_buffer(buffer_a, "native"))
-    tens_b = torch.HalfTensor().set_(torch.HalfStorage.from_buffer(buffer_b, "native"))
+    # try/except is used to use UntypedStorages from Pytorch version >= 2.0.0 while keeping backward compatibility
+    try:
+        tens_a = torch.HalfTensor().set_(
+            torch.UntypedStorage.from_buffer(buffer_a, "native", dtype=torch.half)
+        )
+        tens_b = torch.HalfTensor().set_(
+            torch.UntypedStorage.from_buffer(buffer_b, "native", dtype=torch.half)
+        )
+    except AttributeError:
+        tens_a = torch.HalfTensor().set_(torch.HalfStorage.from_buffer(buffer_a, "native"))
+        tens_b = torch.HalfTensor().set_(torch.HalfStorage.from_buffer(buffer_b, "native"))
     tens_b += tens_a
     nelem = torch.prod(torch.tensor(tens_b.shape)).item()
     new_buff = MPI.memory.fromaddress(tens_b.data_ptr(), nbytes=tens_b.element_size() * nelem)
@@ -30,8 +39,17 @@ def __sum_f16_cb(buffer_a, buffer_b, _):
 
 def __sum_bfloat_cb(buffer_a, buffer_b, _):
     # MPI custom sum function to use torch.bfloat16
-    tens_a = torch.BFloat16Tensor().set_(torch.BFloat16Storage.from_buffer(buffer_a, "native"))
-    tens_b = torch.BFloat16Tensor().set_(torch.BFloat16Storage.from_buffer(buffer_b, "native"))
+    # try/except is used to use UntypedStorages from Pytorch version >= 2.0.0 while keeping backward compatibility
+    try:
+        tens_a = torch.BFloat16Tensor().set_(
+            torch.UntypedStorage.from_buffer(buffer_a, "native", dtype=torch.bfloat16)
+        )
+        tens_b = torch.BFloat16Tensor().set_(
+            torch.UntypedStorage.from_buffer(buffer_b, "native", dtype=torch.bfloat16)
+        )
+    except AttributeError:
+        tens_a = torch.BFloat16Tensor().set_(torch.BFloat16Storage.from_buffer(buffer_a, "native"))
+        tens_b = torch.BFloat16Tensor().set_(torch.BFloat16Storage.from_buffer(buffer_b, "native"))
     tens_b += tens_a
     nelem = int(tens_b.numel())
     new_buff = MPI.memory.fromaddress(tens_b.data_ptr(), nbytes=nelem * tens_b.element_size())
diff --git a/heat/optim/tests/test_dp_optimizer.py b/heat/optim/tests/test_dp_optimizer.py
index 1d18354a34..1ee7ab5253 100644
--- a/heat/optim/tests/test_dp_optimizer.py
+++ b/heat/optim/tests/test_dp_optimizer.py
@@ -2,10 +2,15 @@
 
 import os
 import torch
+import unittest
 
 from heat.core.tests.test_suites.basic_test import TestCase
 
 
+@unittest.skipIf(
+    int(os.getenv("SLURM_NNODES", "1")) < 2 or torch.cuda.device_count() == 0,
+    "only supported for GPUs and at least two nodes",
+)
 class TestDASO(TestCase):
     def test_daso(self):
         import heat.nn.functional as F
@@ -79,59 +84,53 @@ def train(model, device, optimizer, target, batches=20, scaler=None):
 
         model = Model()
         optimizer = optim.SGD(model.parameters(), lr=0.1)
-        envar = os.getenv("HEAT_TEST_USE_DEVICE", "cpu")
-        if ht.MPI_WORLD.size == 1 and envar == "cpu":
-            with self.assertRaises(TypeError):
-                ht.optim.DASO(local_optimizer="asdf", total_epochs=1)
-            with self.assertRaises(TypeError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs="aa")
-            with self.assertRaises(TypeError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, warmup_epochs="asdf")
-            with self.assertRaises(TypeError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, cooldown_epochs="asdf")
-            with self.assertRaises(TypeError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, scheduler="asdf")
-            with self.assertRaises(TypeError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, stability_level="asdf")
-            with self.assertRaises(TypeError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, max_global_skips="asdf")
-            with self.assertRaises(TypeError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, sending_chunk_size="asdf")
-            with self.assertRaises(TypeError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, verbose="asdf")
-            with self.assertRaises(TypeError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, use_mpi_groups="asdf")
-            with self.assertRaises(TypeError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, downcast_type="asdf")
-            with self.assertRaises(TypeError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, comm="asdf")
-            with self.assertRaises(TypeError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, local_skip_factor="asdf")
-            with self.assertRaises(TypeError):
-                ht.optim.DASO(
-                    local_optimizer=optimizer, total_epochs=1, skip_reduction_factor="asdf"
-                )
-                # local_skip_factor
-                # skip_reduction_factor
-            with self.assertRaises(ValueError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, downcast_type=torch.bool)
-            with self.assertRaises(ValueError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, warmup_epochs=-1)
-            with self.assertRaises(ValueError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, cooldown_epochs=-1)
-            with self.assertRaises(ValueError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, max_global_skips=-1)
-            with self.assertRaises(ValueError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, sending_chunk_size=-1)
-            with self.assertRaises(ValueError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=-1)
-            with self.assertRaises(ValueError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, local_skip_factor=-1)
-            with self.assertRaises(ValueError):
-                ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, skip_reduction_factor=-1)
-        if ht.MPI_WORLD.size != 8 or torch.cuda.device_count() == 0:
-            # only run these tests for 2 nodes, each of which has 4 GPUs
-            return
+
+        with self.assertRaises(TypeError):
+            ht.optim.DASO(local_optimizer="asdf", total_epochs=1)
+        with self.assertRaises(TypeError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs="aa")
+        with self.assertRaises(TypeError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, warmup_epochs="asdf")
+        with self.assertRaises(TypeError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, cooldown_epochs="asdf")
+        with self.assertRaises(TypeError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, scheduler="asdf")
+        with self.assertRaises(TypeError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, stability_level="asdf")
+        with self.assertRaises(TypeError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, max_global_skips="asdf")
+        with self.assertRaises(TypeError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, sending_chunk_size="asdf")
+        with self.assertRaises(TypeError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, verbose="asdf")
+        with self.assertRaises(TypeError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, use_mpi_groups="asdf")
+        with self.assertRaises(TypeError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, downcast_type="asdf")
+        with self.assertRaises(TypeError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, comm="asdf")
+        with self.assertRaises(TypeError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, local_skip_factor="asdf")
+        with self.assertRaises(TypeError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, skip_reduction_factor="asdf")
+            # local_skip_factor
+            # skip_reduction_factor
+        with self.assertRaises(ValueError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, downcast_type=torch.bool)
+        with self.assertRaises(ValueError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, warmup_epochs=-1)
+        with self.assertRaises(ValueError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, cooldown_epochs=-1)
+        with self.assertRaises(ValueError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, max_global_skips=-1)
+        with self.assertRaises(ValueError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, sending_chunk_size=-1)
+        with self.assertRaises(ValueError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=-1)
+        with self.assertRaises(ValueError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, local_skip_factor=-1)
+        with self.assertRaises(ValueError):
+            ht.optim.DASO(local_optimizer=optimizer, total_epochs=1, skip_reduction_factor=-1)
 
         # Training settings
         torch.manual_seed(1)
diff --git a/heat/preprocessing/tests/test_preprocessing.py b/heat/preprocessing/tests/test_preprocessing.py
index 2e67e66ec5..de7694a5b3 100644
--- a/heat/preprocessing/tests/test_preprocessing.py
+++ b/heat/preprocessing/tests/test_preprocessing.py
@@ -7,7 +7,7 @@
 from ...core.tests.test_suites.basic_test import TestCase
 
 atol_fit = 1e-5
-atol_inv = 1e-5
+atol_inv = 1e-4
 
 
 # generates a test data set with varying mean and variation per feature; variances of the two last features are zero, mean of the last feature is also zero, whereas mean of second last feature is nonzero.
diff --git a/heat/utils/data/matrixgallery.py b/heat/utils/data/matrixgallery.py
index 3afcd56fee..5937da869a 100644
--- a/heat/utils/data/matrixgallery.py
+++ b/heat/utils/data/matrixgallery.py
@@ -114,7 +114,7 @@ def parter(
         II = core.arange(n, dtype=dtype, split=0, device=device, comm=comm).expand_dims(0)
         JJ = core.arange(n, dtype=dtype, device=device, comm=comm).expand_dims(1)
     else:
-        raise ValueError("expected split value to be either {{None,0,1}}, but was {}".format(split))
+        raise ValueError(f"expected split value to be either {{None,0,1}}, but was {split}")
 
     return 1.0 / (II - JJ + 0.5)
 
@@ -156,15 +156,11 @@ def random_known_singularvalues(
     """
     if not isinstance(singular_values, DNDarray):
         raise RuntimeError(
-            "Argument singular_values needs to be a DNDarray but is {}.".format(
-                type(singular_values)
-            )
+            f"Argument singular_values needs to be a DNDarray but is {type(singular_values)}."
         )
-    if not singular_values.ndim == 1:
+    if singular_values.ndim != 1:
         raise RuntimeError(
-            "Argument singular_values needs to be a 1D array, but dimension is {}.".format(
-                singular_values.ndim
-            )
+            f"Argument singular_values needs to be a 1D array, but dimension is {singular_values.ndim}."
         )
     if singular_values.shape[0] > min(m, n):
         raise RuntimeError(
diff --git a/quick_start.md b/quick_start.md
index f021393d96..713075e5ee 100644
--- a/quick_start.md
+++ b/quick_start.md
@@ -6,16 +6,12 @@ No-frills instructions for [new users](#new-users-condaconda-pippip-hpchpc-docke
 
 ### `conda`
 
-A Heat conda build is [in progress](https://github.com/helmholtz-analytics/heat/issues/1050).
-The script [heat_env.yml](https://github.com/helmholtz-analytics/heat/blob/main/scripts/heat_env.yml):
+The Heat conda build includes all dependencies including OpenMPI.
 
-- creates a virtual environment `heat_env`
-- installs all dependencies including OpenMPI using [conda](https://conda.io/projects/conda/en/latest/user-guide/getting-started.html)
-- installs Heat via `pip`
-
-```
-conda env create -f heat_env.yml
+```shell
+conda create --name heat_env
 conda activate heat_env
+conda -c conda-forge heat
 ```
 
 [Test](#test) your installation.
@@ -34,12 +30,15 @@ pip install heat[hdf5,netcdf]
 
 [Test](#test) your installation.
 
+### HPC
+Work in progress.
+
 ### Docker
 
 Get the docker image from our package repository
 
 ```
-docker pull ghcr.io/helmholtz-analytics/heat:1.2.0-dev_torch1.12_cuda11.7_py3.8
+docker pull ghcr.io/helmholtz-analytics/heat:<version-tag>
 ```
 
 or build it from our Dockerfile
@@ -47,9 +46,11 @@ or build it from our Dockerfile
 ```
 git clone https://github.com/helmholtz-analytics/heat.git
 cd heat/docker
-docker build -t heat:latest .
+docker build --build-arg HEAT_VERSION=X.Y.Z --build-arg PYTORCH_IMG=<nvcr-tag> -t heat:X.Y.Z .
 ```
 
+`<nvcr-tag>` should be replaced with an existing version of the official Nvidia pytorch container image. Information and existing tags can be found on the [here](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch)
+
 See [our docker README](https://github.com/helmholtz-analytics/heat/tree/main/docker/README.md) for other details.
 
 ### Test
@@ -77,7 +78,7 @@ Local torch tensor on rank  1 :  tensor([5, 6, 7, 8, 9], dtype=torch.int32)
 
 3. [Fork](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) or, if you have write access, clone the [Heat repository](https://github.com/helmholtz-analytics/heat).
 
-4. Create a virtual environment `heat_dev` with all dependencies via [heat_dev.yml](https://github.com/helmholtz-analytics/heat/blob/main/scripts/heat_dev.yml). Note that `heat_dev.yml` does not install Heat via `pip` (as opposed to [`heat_env.yml`](#conda) for users).
+4. Create a virtual environment `heat_dev` with all dependencies via [heat_dev.yml](https://github.com/helmholtz-analytics/heat/blob/main/scripts/heat_dev.yml). Note that `heat_dev.yml` does not install Heat.
 
     ```
     conda env create -f heat_dev.yml
diff --git a/scripts/heat_dev.yml b/scripts/heat_dev.yml
index 3de812e489..1ca994771f 100644
--- a/scripts/heat_dev.yml
+++ b/scripts/heat_dev.yml
@@ -3,12 +3,12 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - python=3.9
+  - python=3.10
   - openmpi
   - mpi4py
   - h5py[version='>=2.9',build=mpi*]
   - netcdf4
-  - pytorch=1.13.0
+  - pytorch
   - torchvision
   - scipy
   - pre-commit
diff --git a/scripts/heat_env.yml b/scripts/heat_env.yml
index 9d9130c22f..1d5e1b6dcd 100644
--- a/scripts/heat_env.yml
+++ b/scripts/heat_env.yml
@@ -3,14 +3,5 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - python=3.9
-  - openmpi
-  - mpi4py
-  - h5py[version='>=2.9',build=mpi*]
-  - netcdf4
-  - pytorch=1.13.0
-  - torchvision
-  - scipy
-  - pip
-  - pip:
-    - heat
+  - python=3.10
+  - heat
diff --git a/scripts/numpy_coverage_tables.py b/scripts/numpy_coverage_tables.py
new file mode 100644
index 0000000000..1d4c8cff6a
--- /dev/null
+++ b/scripts/numpy_coverage_tables.py
@@ -0,0 +1,583 @@
+import heat
+
+numpy_functions = []
+
+# List of numpy functions
+headers = {"0": "NumPy  Mathematical Functions"}
+numpy_mathematical_functions = [
+    "sin",
+    "cos",
+    "tan",
+    "arcsin",
+    "arccos",
+    "arctan",
+    "hypot",
+    "arctan2",
+    "degrees",
+    "radians",
+    "unwrap",
+    "deg2rad",
+    "rad2deg",
+    "sinh",
+    "cosh",
+    "tanh",
+    "arcsinh",
+    "arccosh",
+    "arctanh",
+    "round",
+    "around",
+    "rint",
+    "fix",
+    "floor",
+    "ceil",
+    "trunc",
+    "prod",
+    "sum",
+    "nanprod",
+    "nansum",
+    "cumprod",
+    "cumsum",
+    "nancumprod",
+    "nancumsum",
+    "diff",
+    "ediff1d",
+    "gradient",
+    "cross",
+    "trapz",
+    "exp",
+    "expm1",
+    "exp2",
+    "log",
+    "log10",
+    "log2",
+    "log1p",
+    "logaddexp",
+    "logaddexp2",
+    "i0",
+    "sinc",
+    "signbit",
+    "copysign",
+    "frexp",
+    "ldexp",
+    "nextafter",
+    "spacing",
+    "lcm",
+    "gcd",
+    "add",
+    "reciprocal",
+    "positive",
+    "negative",
+    "multiply",
+    "divide",
+    "power",
+    "subtract",
+    "true_divide",
+    "floor_divide",
+    "float_power",
+    "fmod",
+    "mod",
+    "modf",
+    "remainder",
+    "divmod",
+    "angle",
+    "real",
+    "imag",
+    "conj",
+    "conjugate",
+    "maximum",
+    "max",
+    "amax",
+    "fmax",
+    "nanmax",
+    "minimum",
+    "min",
+    "amin",
+    "fmin",
+    "nanmin",
+    "convolve",
+    "clip",
+    "sqrt",
+    "cbrt",
+    "square",
+    "absolute",
+    "fabs",
+    "sign",
+    "heaviside",
+    "nan_to_num",
+    "real_if_close",
+    "interp",
+]
+numpy_functions.append(numpy_mathematical_functions)
+
+numpy_array_creation = [
+    "empty",
+    "empty_like",
+    "eye",
+    "identity",
+    "ones",
+    "ones_like",
+    "zeros",
+    "zeros_like",
+    "full",
+    "full_like",
+    "array",
+    "asarray",
+    "asanyarray",
+    "ascontiguousarray",
+    "asmatrix",
+    "copy",
+    "frombuffer",
+    "from_dlpack",
+    "fromfile",
+    "fromfunction",
+    "fromiter",
+    "fromstring",
+    "loadtxt",
+    "arange",
+    "linspace",
+    "logspace",
+    "geomspace",
+    "meshgrid",
+    "mgrid",
+    "ogrid",
+    "diag",
+    "diagflat",
+    "tri",
+    "tril",
+    "triu",
+    "vander",
+    "mat",
+    "bmat",
+]
+numpy_functions.append(numpy_array_creation)
+headers[str(len(headers))] = "NumPy Array Creation"
+
+numpy_array_manipulation = [
+    "copyto",
+    "shape",
+    "reshape",
+    "ravel",
+    "flat",
+    "flatten",
+    "moveaxis",
+    "rollaxis",
+    "swapaxes",
+    "T",
+    "transpose",
+    "atleast_1d",
+    "atleast_2d",
+    "atleast_3d",
+    "broadcast",
+    "broadcast_to",
+    "broadcast_arrays",
+    "expand_dims",
+    "squeeze",
+    "asarray",
+    "asanyarray",
+    "asmatrix",
+    "asfarray",
+    "asfortranarray",
+    "ascontiguousarray",
+    "asarray_chkfinite",
+    "require",
+    "concatenate",
+    "stack",
+    "block",
+    "vstack",
+    "hstack",
+    "dstack",
+    "column_stack",
+    "row_stack",
+    "split",
+    "array_split",
+    "dsplit",
+    "hsplit",
+    "vsplit",
+    "tile",
+    "repeat",
+    "delete",
+    "insert",
+    "append",
+    "resize",
+    "trim_zeros",
+    "unique",
+    "flip",
+    "fliplr",
+    "flipud",
+    "reshape",
+    "roll",
+    "rot90",
+]
+numpy_functions.append(numpy_array_manipulation)
+headers[str(len(headers))] = "NumPy Array Manipulation"
+
+numpy_binary_operations = [
+    "bitwise_and",
+    "bitwise_or",
+    "bitwise_xor",
+    "invert",
+    "left_shift",
+    "right_shift",
+    "packbits",
+    "unpackbits",
+    "binary_repr",
+]
+numpy_functions.append(numpy_binary_operations)
+headers[str(len(headers))] = "NumPy Binary Operations"
+
+numpy_io_operations = [
+    # numpy.load
+    # numpy.save
+    # numpy.savez_compressed
+    # numpy.loadtxt
+    # numpy.savez
+    # numpy.savetxt
+    # numpy.genfromtxt
+    # numpy.fromregex
+    # numpy.fromstring
+    # numpy.ndarray.tofile
+    # numpy.ndarray.tolist
+    # numpy.array2string
+    # numpy.array_repr
+    # numpy.array_str
+    # numpy.format_float_positional
+    # numpy.format_float_scientific
+    # numpy.memmap
+    # numpy.lib.format.open_memmap
+    # numpy.set_printoptions
+    # numpy.get_printoptions
+    # numpy.set_string_function
+    # numpy.printoptions
+    # numpy.binary_repr
+    # numpy.base_repr
+    # numpy.DataSource
+    # numpy.lib.format
+    "load",
+    "save",
+    "savez",
+    "savez_compressed",
+    "loadtxt",
+    "savetxt",
+    "genfromtxt",
+    "fromregex",
+    "fromstring",
+    "tofile",
+    "tolist",
+    "array2string",
+    "array_repr",
+    "array_str",
+    "format_float_positional",
+    "format_float_scientific",
+    "memmap",
+    "open_memmap",
+    "set_printoptions",
+    "get_printoptions",
+    "set_string_function",
+    "printoptions",
+    "binary_repr",
+    "base_repr",
+    "DataSource",
+    "format",
+]
+numpy_functions.append(numpy_io_operations)
+headers[str(len(headers))] = "NumPy IO Operations"
+
+numpy_linalg_operations = [
+    # numpy.dot
+    # numpy.linalg.multi_dot
+    # numpy.vdot
+    # numpy.inner
+    # numpy.outer
+    # numpy.matmul
+    # numpy.tensordot
+    # numpy.einsum
+    # numpy.einsum_path
+    # numpy.linalg.matrix_power
+    # numpy.kron
+    # numpy.linalg.cholesky
+    # numpy.linalg.qr
+    # numpy.linalg.svd
+    # numpy.linalg.eig
+    # numpy.linalg.eigh
+    # numpy.linalg.eigvals
+    # numpy.linalg.eigvalsh
+    # numpy.linalg.norm
+    # numpy.linalg.cond
+    # numpy.linalg.det
+    # numpy.linalg.matrix_rank
+    # numpy.linalg.slogdet
+    # numpy.trace
+    # numpy.linalg.solve
+    # numpy.linalg.tensorsolve
+    # numpy.linalg.lstsq
+    # numpy.linalg.inv
+    # numpy.linalg.pinv
+    # numpy.linalg.tensorinv
+    "dot",
+    "linalg.multi_dot",
+    "vdot",
+    "inner",
+    "outer",
+    "matmul",
+    "tensordot",
+    "einsum",
+    "einsum_path",
+    "linalg.matrix_power",
+    "kron",
+    "linalg.cholesky",
+    "linalg.qr",
+    "linalg.svd",
+    "linalg.eig",
+    "linalg.eigh",
+    "linalg.eigvals",
+    "linalg.eigvalsh",
+    "linalg.norm",
+    "linalg.cond",
+    "linalg.det",
+    "linalg.matrix_rank",
+    "linalg.slogdet",
+    "trace",
+    "linalg.solve",
+    "linalg.tensorsolve",
+    "linalg.lstsq",
+    "linalg.inv",
+    "linalg.pinv",
+    "linalg.tensorinv",
+]
+numpy_functions.append(numpy_linalg_operations)
+headers[str(len(headers))] = "NumPy LinAlg Operations"
+
+numpy_logic_operations = [
+    # numpy.all
+    # numpy.any
+    # numpy.isinf
+    # numpy.isfinite
+    # numpy.isnan
+    # numpy.isnat
+    # numpy.isneginf
+    # numpy.isposinf
+    # numpy.iscomplex
+    # numpy.iscomplexobj
+    # numpy.isfortran
+    # numpy.isreal
+    # numpy.isrealobj
+    # numpy.isscalar
+    # numpy.logical_and
+    # numpy.logical_or
+    # numpy.logical_not
+    # numpy.logical_xor
+    # numpy.allclose
+    # numpy.isclose
+    # numpy.array_equal
+    # numpy.array_equiv
+    # numpy.greater
+    # numpy.greater_equal
+    # numpy.less
+    # numpy.less_equal
+    # numpy.equal
+    # numpy.not_equal
+    "all",
+    "any",
+    "isfinite",
+    "isinf",
+    "isnan",
+    "isnat",
+    "isneginf",
+    "isposinf",
+    "iscomplex",
+    "iscomplexobj",
+    "isfortran",
+    "isreal",
+    "isrealobj",
+    "isscalar",
+    "logical_and",
+    "logical_or",
+    "logical_not",
+    "logical_xor",
+    "allclose",
+    "isclose",
+    "array_equal",
+    "array_equiv",
+    "greater",
+    "greater_equal",
+    "less",
+    "less_equal",
+    "equal",
+    "not_equal",
+]
+numpy_functions.append(numpy_logic_operations)
+headers[str(len(headers))] = "NumPy Logic Functions"
+
+numpy_sorting_operations = [
+    # numpy.sort
+    # numpy.lexsort
+    # numpy.argsort
+    # numpy.ndarray.sort
+    # numpy.sort_complex
+    # numpy.partition
+    # numpy.argpartition
+    # numpy.argmax
+    # numpy.nanargmax
+    # numpy.argmin
+    # numpy.nanargmin
+    # numpy.argwhere
+    # numpy.nonzero
+    # numpy.flatnonzero
+    # numpy.where
+    # numpy.searchsorted
+    # numpy.extract
+    # numpy.count_nonzero
+    "sort",
+    "lexsort",
+    "argsort",
+    "sort",
+    "sort_complex",
+    "partition",
+    "argpartition",
+    "argmax",
+    "nanargmax",
+    "argmin",
+    "nanargmin",
+    "argwhere",
+    "nonzero",
+    "flatnonzero",
+    "where",
+    "searchsorted",
+    "extract",
+    "count_nonzero",
+]
+numpy_functions.append(numpy_sorting_operations)
+headers[str(len(headers))] = "NumPy Sorting Operations"
+
+numpy_statistics_operations = [
+    # numpy.ptp
+    # numpy.percentile
+    # numpy.nanpercentile
+    # numpy.quantile
+    # numpy.nanquantile
+    # numpy.median
+    # numpy.average
+    # numpy.mean
+    # numpy.std
+    # numpy.var
+    # numpy.nanmedian
+    # numpy.nanmean
+    # numpy.nanstd
+    # numpy.nanvar
+    # numpy.corrcoef
+    # numpy.correlate
+    # numpy.cov
+    # numpy.histogram
+    # numpy.histogram2d
+    # numpy.histogramdd
+    # numpy.bincount
+    # numpy.histogram_bin_edges
+    # numpy.digitize
+    "ptp",
+    "percentile",
+    "nanpercentile",
+    "quantile",
+    "nanquantile",
+    "median",
+    "average",
+    "mean",
+    "std",
+    "var",
+    "nanmedian",
+    "nanmean",
+    "nanstd",
+    "nanvar",
+    "corrcoef",
+    "correlate",
+    "cov",
+    "histogram",
+    "histogram2d",
+    "histogramdd",
+    "bincount",
+    "histogram_bin_edges",
+    "digitize",
+]
+numpy_functions.append(numpy_statistics_operations)
+headers[str(len(headers))] = "NumPy Statistical Operations"
+
+# numpy random operations
+numpy_random_operations = [
+    # numpy.random.rand
+    # numpy.random.randn
+    # numpy.random.randint
+    # numpy.random.random_integers
+    # numpy.random.random_sample
+    # numpy.random.ranf
+    # numpy.random.sample
+    # numpy.random.choice
+    # numpy.random.bytes
+    # numpy.random.shuffle
+    # numpy.random.permutation
+    # numpy.random.seed
+    # numpy.random.get_state
+    # numpy.random.set_state
+    "random.rand",
+    "random.randn",
+    "random.randint",
+    "random.random_integers",
+    "random.random_sample",
+    "random.ranf",
+    "random.sample",
+    "random.choice",
+    "random.bytes",
+    "random.shuffle",
+    "random.permutation",
+    "random.seed",
+    "random.get_state",
+    "random.set_state",
+]
+numpy_functions.append(numpy_random_operations)
+headers[str(len(headers))] = "NumPy Random Operations"
+
+# initialize markdown file
+# open the file in write mode
+f = open("coverage_tables.md", "w")
+# write in file
+f.write("# NumPy Coverage Tables\n")
+f.write("This file is automatically generated by `./scripts/numpy_coverage_tables.py`.\n")
+f.write(
+    "Please do not edit this file directly, but instead edit `./scripts/numpy_coverage_tables.py` and run it to generate this file.\n"
+)
+f.write("The following tables show the NumPy functions supported by Heat.\n")
+
+# create Table of Contents
+f.write("## Table of Contents\n")
+for i, header in enumerate(headers):
+    f.write(f"{i+1}. [{headers[header]}](#{headers[header].lower().replace(' ', '-')})\n")
+f.write("\n")
+
+for i, function_list in enumerate(numpy_functions):
+    f.write(f"## {headers[str(i)]}\n")
+    # Initialize a list to store the rows of the Markdown table
+    table_rows = []
+
+    # Check if functions exist in the heat library and create table rows
+    for func_name in function_list:
+        if (
+            hasattr(heat, func_name)
+            or hasattr(heat.linalg, func_name.replace("linalg.", ""))
+            or hasattr(heat.random, func_name.replace("random.", ""))
+        ):
+            support_status = "✅"  # Green checkmark for supported functions
+        else:
+            support_status = "❌"  # Red cross for unsupported functions
+
+        table_row = f"| {func_name} | {support_status} |"
+        table_rows.append(table_row)
+
+    # Create the Markdown table header
+    table_header = f"| {headers[str(i)]} | Heat |\n|---|---|\n"
+
+    # Combine the header and table rows
+    markdown_table = table_header + "\n".join(table_rows)
+
+    # write link to table of contents
+    f.write("[Back to Table of Contents](#table-of-contents)\n\n")
+    # Print the Markdown table
+    f.write(markdown_table)
+    f.write("\n")