0.5 Changelog and documentation updates (#544)

Co-authored-by: erick-xanadu <[email protected]> Co-authored-by: Ali Asadi <[email protected]> Co-authored-by: David Ittah <[email protected]>
PennyLaneAI · Mar 1, 2024 · cb7641b · cb7641b
1 parent 0f39f20
commit cb7641b
Show file tree

Hide file tree

Showing 11 changed files with 552 additions and 300 deletions.
diff --git a/doc/changelog.md b/doc/changelog.md
diff --git a/doc/code/__init__.rst b/doc/code/__init__.rst
@@ -19,3 +19,11 @@ Module: catalyst.debug
     :no-heading:
     :no-inheritance-diagram:
     :fullname:
+
+Module: catalyst.cuda
+---------------------
+
+.. automodapi:: catalyst.cuda
+    :no-heading:
+    :no-inheritance-diagram:
+    :fullname:
diff --git a/doc/conf.py b/doc/conf.py
@@ -94,6 +94,7 @@ def __getattr__(cls, name):
     "mlir_quantum.dialects.mitigation",
     "mlir_quantum.compiler_driver",
     "pybind11",
+    "cudaq"
 ]
 
 mock = Mock()

diff --git a/doc/dev/installation.rst b/doc/dev/installation.rst
@@ -107,16 +107,6 @@ On **macOS**, it is strongly recommended to install the official XCode Command L
   pip install cmake ninja
   brew install libomp
 
-If you install Catalyst on a macOS system with ``ARM`` architecture (e.g. Apple M1/M2), you
-additionally need to install `Rust <https://www.rust-lang.org/tools/install>`_ and the
-``llvm-tools-preview`` rustup component:
-
-.. code-block:: console
-
-  curl https://sh.rustup.rs -sSf | sh
-  source "$HOME/.cargo/env"
-  rustup component add llvm-tools-preview
-
 All additional build and developer dependencies are managed via the repository's
 ``requirements.txt`` and can be installed as follows:
 

diff --git a/doc/dev/quick_start.rst b/doc/dev/quick_start.rst
@@ -582,7 +582,7 @@ to the ``grad`` function:
   the classical computation is differentiated using traditional autodiff.
 
   With this strategy, Catalyst only currently supports QNodes with
-  ``diff_method="param-shift"`` and ``diff_method="adjoint"``.
+  ``diff_method="parameter-shift"`` and ``diff_method="adjoint"``.
 
 - ``method="fd"``: First-order finite-differences for the entire hybrid
   function. The ``diff_method`` argument for each QNode is ignored.

diff --git a/doc/dev/sharp_bits.rst b/doc/dev/sharp_bits.rst
@@ -314,6 +314,59 @@ UserWarning: Provided arguments did not match declared signature, recompiling...
 Tracing occurring
 array(0.16996714)
 
+Specifying compile-time constants
+---------------------------------
+
+The ``@qjit`` decorator argument ``static_argnums`` allows positional arguments
+to be specified which should be treated as compile-time static arguments.
+
+This allows any hashable Python object to be passed to the function during compilation;
+the function will only be re-compiled if the hash value of the static arguments change.
+Otherwise, re-using previous static argument values will result in no re-compilation:
+
+>>> @qjit(static_argnums=(1,))
+... def f(x, y):
+...   print(f"Compiling with y={y}")
+...   return x + y
+>>> f(0.5, 0.3)
+Compiling with y=0.3
+array(0.8)
+>>> f(0.1, 0.3)  # no re-compilation occurs
+array(0.4)
+>>> f(0.1, 0.4)  # y changes, re-compilation
+Compiling with y=0.4
+array(0.5)
+
+This functionality can be used to support passing arbitrary Python objects to QJIT-compiled
+functions, as long as they are hashable:
+
+.. code-block:: python
+
+    from dataclasses import dataclass
+
+    @dataclass
+    class MyClass:
+      val: int
+
+      def __hash__(self):
+          return hash(str(self))
+
+    @qjit(static_argnums=(1,))
+    def f(x: int, y: MyClass):
+      return x + y.val
+
+>>> f(1, MyClass(5))
+array(6)
+>>> f(1, MyClass(6))  # re-compilation
+array(7)
+>>> f(2, MyClass(5))  # no re-compilation
+array(7)
+
+Note that when ``static_argnums`` is used in conjunction with type hinting,
+ahead-of-time compilation will not be possible since the static argument values
+are not yet available. Instead, compilation will be just-in-time.
+
+
 Try and compile the full workflow
 ---------------------------------
 

diff --git a/frontend/catalyst/cuda/__init__.py b/frontend/catalyst/cuda/__init__.py
@@ -24,8 +24,56 @@
 from catalyst.cuda.catalyst_to_cuda_interpreter import interpret
 
 
-def qjit(fn=None, **kwargs):
-    """Wrapper around QJIT for CUDA-quantum."""
+def cudaqjit(fn=None, **kwargs):
+    """A decorator for compiling PennyLane and JAX programs using CUDA Quantum.
+
+    .. important::
+
+        This feature currently only supports CUDA Quantum version 0.6.
+
+    .. note::
+
+        Currently, only the following devices are supported:
+
+        * :class:`softwareq.qpp <SoftwareQQPP>`: a modern C++ statevector simulator
+        * :class:`nvidia.statevec <NvidiaCuStateVec>`: The NVIDIA CuStateVec GPU simulator (with support for multi-gpu)
+        * :class:`nvidia.tensornet <NvidiaCuTensorNet>`: The NVIDIA CuTensorNet GPU simulator (with support for matrix product
+          state)
+
+    Args:
+        fn (Callable): the quantum or classical function to compile
+
+    Returns:
+        QJIT object.
+
+    **Example**
+
+    The compilation is triggered at the call site the
+    when the quantum function is executed:
+
+    .. code-block:: python
+
+        dev = qml.device("softwareq.qpp", wires=2)
+
+        @cudaqjit
+        @qml.qnode(dev)
+        def circuit(x):
+            qml.RX(x[0], wires=0)
+            qml.RY(x[1], wires=1)
+            qml.CNOT(wires=[0, 1])
+            return qml.expval(qml.PauliY(0))
+
+    >>> circuit(jnp.array([0.5, 1.4]))
+    -0.47244976756708373
+
+    From PennyLane, this functionality can also be accessed via
+
+    >>> @qml.qjit(compiler="cuda_quantum")
+
+    Note that CUDA Quantum compilation currently does not have feature parity with Catalyst
+    compilation; in particular, AutoGraph, control flow, differentiation, and various measurement
+    statistics (such as probabilities and variance) are not yet supported.
+    """
 
     if fn is not None:
         return interpret(fn, **kwargs)
@@ -83,7 +131,36 @@ def apply(self, operations, **kwargs):
 
 
 class SoftwareQQPP(BaseCudaInstructionSet):
-    """Concrete device class for qpp-cpu"""
+    """The SoftwareQ Q++ statevector simulator.
+
+    .. note::
+
+        This device currently only supports QNodes compiled with CUDA Quantum. For a
+        high-performance CPU device with support with other compilers, please use
+        ``lightning.qubit`` or ``lightning.kokkos``.
+
+    Args:
+        shots (None, int): Number of shots to use for measurments and statistics.
+            ``None`` corresponds to exact statistics.
+        wires (int): Number of wires present on the device.
+
+    **Example**
+
+    .. code-block:: python
+
+        dev = qml.device("softwareq.qpp", wires=2)
+
+        @catalyst.cuda.cudaqjit
+        @qml.qnode(dev)
+        def circuit(x):
+          qml.RX(x[0], wires=0)
+          qml.RY(x[1], wires=1)
+          qml.CNOT(wires=[0, 1])
+          return qml.expval(qml.PauliY(0))
+
+    >>> circuit(jnp.array([0.5, 1.4]))
+    -0.47244976756708373
+    """
 
     short_name = "softwareq.qpp"
 
@@ -94,7 +171,36 @@ def name(self):
 
 
 class NvidiaCuStateVec(BaseCudaInstructionSet):
-    """Concrete device class for CuStateVec"""
+    """The NVIDIA CuStateVec GPU simulator (with support for multi-gpu).
+
+    .. note::
+
+        This device currently only supports QNodes compiled with CUDA Quantum. For a multi-GPU
+        device with support with other compilers, please use ``lightning.gpu``.
+
+    Args:
+        shots (None, int): Number of shots to use for measurments and statistics.
+            ``None`` corresponds to exact statistics.
+        wires (int): Number of wires present on the device.
+        multi_gpu (bool): Whether to utilize multiple GPUs.
+
+    **Example**
+
+    .. code-block:: python
+
+        dev = qml.device("nvidia.custatevec", wires=2)
+
+        @catalyst.cuda.cudaqjit
+        @qml.qnode(dev)
+        def circuit(x):
+          qml.RX(x[0], wires=0)
+          qml.RY(x[1], wires=1)
+          qml.CNOT(wires=[0, 1])
+          return qml.expval(qml.PauliY(0))
+
+    >>> circuit(jnp.array([0.5, 1.4]))
+    -0.47244976756708373
+    """
 
     short_name = "nvidia.custatevec"
 
@@ -110,7 +216,35 @@ def name(self):  # pragma: no cover
 
 
 class NvidiaCuTensorNet(BaseCudaInstructionSet):
-    """Concrete device class for CuTensorNet"""
+    """The NVIDIA CuTensorNet GPU simulator (with support for matrix product state)
+
+    .. note::
+
+        This device currently only supports QNodes compiled with CUDA Quantum.
+
+    Args:
+        shots (None, int): Number of shots to use for measurments and statistics.
+            ``None`` corresponds to exact statistics.
+        wires (int): Number of wires present on the device.
+        mps (bool): Whether to use matrix product state approximations.
+
+    **Example**
+
+    .. code-block:: python
+
+        dev = qml.device("nvidia.cutensornet", wires=2)
+
+        @catalyst.cuda.cudaqjit
+        @qml.qnode(dev)
+        def circuit(x):
+          qml.RX(x[0], wires=0)
+          qml.RY(x[1], wires=1)
+          qml.CNOT(wires=[0, 1])
+          return qml.expval(qml.PauliY(0))
+
+    >>> circuit(jnp.array([0.5, 1.4]))
+    -0.47244976756708373
+    """
 
     short_name = "nvidia.cutensornet"
 
@@ -123,3 +257,12 @@ def name(self):  # pragma: no cover
         """Target name"""
         option = "-mps" if self.mps else ""
         return f"tensornet{option}"
+
+
+__all__ = [
+    "cudaqjit",
+    "BaseCudaInstructionSet",
+    "SoftwareQQPP",
+    "NvidiaCuStateVec",
+    "NvidiaCuTensorNet",
+]
diff --git a/frontend/catalyst/jit.py b/frontend/catalyst/jit.py
@@ -639,6 +639,10 @@ def f(
         function ``f`` be re-compiled because ``my_obj_1`` is changed. This requires that
         the mutation is properly reflected in the hash value.
 
+        Note that even when ``static_argnums`` is used in conjunction with type hinting,
+        ahead-of-time compilation will not be possible since the static argument values
+        are not yet available. Instead, compilation will be just-in-time.
+
 
     .. details::
         :title: Dynamically-shaped arrays

diff --git a/frontend/catalyst/pennylane_extensions.py b/frontend/catalyst/pennylane_extensions.py
@@ -2173,37 +2173,42 @@ def vmap(
     Raises:
         ValueError: Invalid ``in_axes``, ``out_axes``, and ``axis_size`` values.
 
-
     **Example**
 
+    For example, consider the following QNode:
+
     .. code-block:: python
 
-        @qjit
-        def workflow(x, y, z):
-            @qml.qnode(qml.device(backend, wires=1))
-            def circuit(x, y):
-                qml.RX(jnp.pi * x[0] + y, wires=0)
-                qml.RY(x[1] ** 2, wires=0)
-                qml.RX(x[1] * x[2], wires=0)
-                return qml.expval(qml.PauliZ(0))
-
-            def postcircuit(y, x, z):
-                return circuit(x, y) * z
-
-            res = vmap(postcircuit, in_axes=(0, 0, None))(y, x, z)
-            return res
-
-        y = jnp.array([jnp.pi, jnp.pi / 2, jnp.pi / 4])
-        x = jnp.array(
-            [
-                [0.1, 0.2, 0.3],
-                [0.4, 0.5, 0.6],
-                [0.7, 0.8, 0.9],
-            ]
-        )
+        dev = qml.device("lightning.qubit", wires=1)
 
-    >>> workflow(x, y, 1)
-    [-0.93005586, -0.97165424, -0.6987465]
+        @qml.qnode(dev)
+        def circuit(x, y):
+          qml.RX(jnp.pi * x[0] + y, wires=0)
+          qml.RY(x[1] ** 2, wires=0)
+          qml.RX(x[1] * x[2], wires=0)
+          return qml.expval(qml.PauliZ(0))
+
+    >>> circuit(jnp.array([0.1, 0.2, 0.3]), jnp.pi)
+    Array(-0.93005586, dtype=float64)
+
+    We can use ``catalyst.vmap`` to introduce additional batch dimensions
+    to our input arguments,
+    without needing to use a Python for loop:
+
+    >>> x = jnp.array([[0.1, 0.2, 0.3],
+    ...                [0.4, 0.5, 0.6],
+    ...                [0.7, 0.8, 0.9]])
+    >>> y = jnp.array([jnp.pi, jnp.pi / 2, jnp.pi / 4])
+    >>> qjit(vmap(cost))(x, y)
+    array([-0.93005586, -0.97165424, -0.6987465 ])
+
+    ``catalyst.vmap()`` has been implemented to match the same behaviour of
+    ``jax.vmap``, so should be a drop-in replacement in most cases.
+    Under-the-hood, it is automatically inserting Catalyst-compatible for loops,
+    which will be compiled and executed outside of Python for increased performance.
+
+    Outside of a Catalyst qjit-compiled function, ``vmap`` will simply dispatch to
+    ``jax.vmap``.
 
     .. details::
         :title: Selecting batching axes for arguments