From 38f4d894d5943911b54053d82654af23b6a20fa3 Mon Sep 17 00:00:00 2001
From: Christoph Lehner <christoph@lhnr.de>
Date: Sun, 5 Nov 2023 20:27:20 +0100
Subject: [PATCH] add forward autodiff

---
 lib/gpt/ad/__init__.py              |   3 +-
 lib/gpt/ad/forward/__init__.py      |  22 ++++
 lib/gpt/ad/forward/infinitesimal.py |  73 ++++++++++++
 lib/gpt/ad/forward/landau.py        |  55 +++++++++
 lib/gpt/ad/forward/series.py        | 168 ++++++++++++++++++++++++++++
 lib/gpt/ad/forward/transform.py     |  31 +++++
 lib/gpt/ad/reverse/node.py          |  15 ++-
 tests/ad/ad.py                      | 141 ++++++++++++++++++++++-
 8 files changed, 503 insertions(+), 5 deletions(-)
 create mode 100644 lib/gpt/ad/forward/__init__.py
 create mode 100644 lib/gpt/ad/forward/infinitesimal.py
 create mode 100644 lib/gpt/ad/forward/landau.py
 create mode 100644 lib/gpt/ad/forward/series.py
 create mode 100644 lib/gpt/ad/forward/transform.py

diff --git a/lib/gpt/ad/__init__.py b/lib/gpt/ad/__init__.py
index 44b32da2..5a53f84a 100644
--- a/lib/gpt/ad/__init__.py
+++ b/lib/gpt/ad/__init__.py
@@ -17,5 +17,4 @@
 #    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 #
 import gpt.ad.reverse
-
-# import gpt.ad.forward
+import gpt.ad.forward
diff --git a/lib/gpt/ad/forward/__init__.py b/lib/gpt/ad/forward/__init__.py
new file mode 100644
index 00000000..75455b14
--- /dev/null
+++ b/lib/gpt/ad/forward/__init__.py
@@ -0,0 +1,22 @@
+#
+#    GPT - Grid Python Toolkit
+#    Copyright (C) 2023  Christoph Lehner (christoph.lehner@ur.de, https://github.com/lehner/gpt)
+#
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 2 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License along
+#    with this program; if not, write to the Free Software Foundation, Inc.,
+#    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+from gpt.ad.forward.infinitesimal import infinitesimal
+from gpt.ad.forward.landau import landau
+from gpt.ad.forward.series import series
+from gpt.ad.forward.transform import norm2, inner_product, cshift
diff --git a/lib/gpt/ad/forward/infinitesimal.py b/lib/gpt/ad/forward/infinitesimal.py
new file mode 100644
index 00000000..7ecead9e
--- /dev/null
+++ b/lib/gpt/ad/forward/infinitesimal.py
@@ -0,0 +1,73 @@
+#
+#    GPT - Grid Python Toolkit
+#    Copyright (C) 2023  Christoph Lehner (christoph.lehner@ur.de, https://github.com/lehner/gpt)
+#
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 2 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License along
+#    with this program; if not, write to the Free Software Foundation, Inc.,
+#    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+
+class infinitesimal:
+    def __init__(self, value):
+        if isinstance(value, str):
+            value = {value: 1}
+        self.value = value
+
+    def __pow__(self, n):
+        value = {}
+        for v in self.value:
+            value[v] = self.value[v] * n
+        return infinitesimal(value)
+
+    def __mul__(self, other):
+        value = {}
+        for v1 in self.value:
+            value[v1] = self.value[v1]
+        for v2 in other.value:
+            if v2 in value:
+                value[v2] += other.value[v2]
+            else:
+                value[v2] = other.value[v2]
+        return infinitesimal(value)
+
+    def __str__(self):
+        r = ""
+        for v in sorted(self.value):
+            if r != "":
+                r = r + "*"
+            if self.value[v] == 1:
+                r = r + v
+            else:
+                r = r + f"{v}**{self.value[v]}"
+        return r
+
+    def __hash__(self):
+        return hash(self.__str__())
+
+    def __eq__(self, other):
+        return self.__str__() == other.__str__()
+
+    def __cmp__(self, other):
+        return self.__str__().__cmp__(other.__str__())
+
+    def symbols(self):
+        return tuple(sorted(list(self.value.keys())))
+
+    def behaves_as(self, other):
+        for s in other.value:
+            n0 = self.value[s] if s in self.value else 0
+            n1 = other.value[s]
+            if n0 < n1:
+                return False
+        return True
diff --git a/lib/gpt/ad/forward/landau.py b/lib/gpt/ad/forward/landau.py
new file mode 100644
index 00000000..51139788
--- /dev/null
+++ b/lib/gpt/ad/forward/landau.py
@@ -0,0 +1,55 @@
+#
+#    GPT - Grid Python Toolkit
+#    Copyright (C) 2023  Christoph Lehner (christoph.lehner@ur.de, https://github.com/lehner/gpt)
+#
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 2 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License along
+#    with this program; if not, write to the Free Software Foundation, Inc.,
+#    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+
+class landau:
+    def __init__(self, *infinitesimals):
+        self.infinitesimals = infinitesimals
+
+    def accept(self, i):
+        for j in self.infinitesimals:
+            if i.behaves_as(j):
+                return False
+        return True
+
+    def __add__(self, other):
+        if self is other:
+            return self
+        infinitesimals = []
+        for i in self.infinitesimals + other.infinitesimals:
+            keep = True
+            for n, j in enumerate(infinitesimals):
+                if i.behaves_as(j):
+                    keep = False
+                elif j.behaves_as(i):
+                    infinitesimals[n] = i
+            if keep:
+                infinitesimals.append(i)
+        infinitesimals = list(set(infinitesimals))
+        return landau(*infinitesimals)
+
+    def __str__(self):
+        a = []
+        for i in self.infinitesimals:
+            a.append(str(i))
+        r = ",".join(sorted(a))
+        return f"O({r})"
+
+    def __eq__(self, other):
+        return str(self) == str(other)
diff --git a/lib/gpt/ad/forward/series.py b/lib/gpt/ad/forward/series.py
new file mode 100644
index 00000000..a6410c64
--- /dev/null
+++ b/lib/gpt/ad/forward/series.py
@@ -0,0 +1,168 @@
+#
+#    GPT - Grid Python Toolkit
+#    Copyright (C) 2023  Christoph Lehner (christoph.lehner@ur.de, https://github.com/lehner/gpt)
+#
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 2 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License along
+#    with this program; if not, write to the Free Software Foundation, Inc.,
+#    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+import gpt as g
+from gpt.ad.forward import infinitesimal
+
+
+def promote(other, landau_O):
+    if isinstance(other, infinitesimal):
+        other = series({other: 1}, landau_O)
+    elif g.util.is_num(other):
+        other = series({infinitesimal({}): other}, landau_O)
+    return other
+
+
+class series:
+    def __init__(self, terms, landau_O):
+        self.landau_O = landau_O
+        if not isinstance(terms, dict):
+            i0 = infinitesimal({})
+            terms = {i0: terms}
+        self.terms = terms
+
+    def __str__(self):
+        r = ""
+        for t in self.terms:
+            if r != "":
+                r = r + " + "
+            r = r + "(" + str(self.terms[t]) + ")"
+            si = str(t)
+            if si != "":
+                r = r + "*" + si
+        return r
+
+    def distribute2(self, other, functional):
+        other = promote(other, self.landau_O)
+        # first merge landau_Os
+        landau_O = self.landau_O + other.landau_O
+        # then merge terms
+        terms = {}
+        for t1 in self.terms:
+            for t2 in other.terms:
+                i = t1 * t2
+                if not landau_O.accept(i):
+                    continue
+                if i not in terms:
+                    terms[i] = g(functional(self.terms[t1], other.terms[t2]))
+                else:
+                    terms[i] += functional(self.terms[t1], other.terms[t2])
+        return series(terms, landau_O)
+
+    def distribute1(self, functional):
+        # then merge terms
+        terms = {}
+        for t1 in self.terms:
+            if t1 not in terms:
+                terms[t1] = g(functional(self.terms[t1]))
+            else:
+                terms[t1] += functional(self.terms[t1])
+        return series(terms, self.landau_O)
+
+    def function(self, functional):
+        root = self[1]
+        # get nilpotent power
+        nilpotent = self - root
+        maxn = 0
+        i0 = infinitesimal({})
+        for t in nilpotent.terms:
+            if t == i0:
+                continue
+            n = 1
+            tn = t
+            while self.landau_O.accept(tn):
+                tn = tn * t
+                n += 1
+            maxn = max([maxn, n])
+        res = series({i0: functional(root, 0)}, self.landau_O)
+        delta = nilpotent
+        nfac = 1.0
+        for i in range(1, maxn):
+            nfac *= i
+            res += delta * functional(root, i) / nfac
+            if i != maxn - 1:
+                delta = delta * nilpotent
+        return res
+
+    def __iadd__(self, other):
+        res = self + other
+        self.landau_O = res.landau_O
+        self.terms = res.terms
+        return self
+
+    def __mul__(self, other):
+        return self.distribute2(other, lambda a, b: a * b)
+
+    def __rmul__(self, other):
+        if g.util.is_num(other):
+            return self.__mul__(other)
+        raise Exception("Not implemented")
+
+    def __add__(self, other):
+        other = promote(other, self.landau_O)
+        # first merge landau_Os
+        landau_O = self.landau_O + other.landau_O
+        # then merge terms
+        terms = {}
+        for t1 in self.terms:
+            if not landau_O.accept(t1):
+                continue
+            terms[t1] = self.terms[t1]
+        for t2 in other.terms:
+            if not landau_O.accept(t2):
+                continue
+            if t2 not in terms:
+                terms[t2] = other.terms[t2]
+            else:
+                terms[t2] = g(terms[t2] + other.terms[t2])
+        return series(terms, landau_O)
+
+    def __sub__(self, other):
+        other = promote(other, self.landau_O)
+        # first merge landau_Os
+        landau_O = self.landau_O + other.landau_O
+        # then merge terms
+        terms = {}
+        for t1 in self.terms:
+            if not landau_O.accept(t1):
+                continue
+            terms[t1] = self.terms[t1]
+        for t2 in other.terms:
+            if not landau_O.accept(t2):
+                continue
+            if t2 not in terms:
+                terms[t2] = other.terms[t2]
+            else:
+                terms[t2] = g(terms[t2] - other.terms[t2])
+        return series(terms, landau_O)
+
+    def __truediv__(self, other):
+        return (1.0 / other) * self
+
+    def __radd__(self, other):
+        return self.__add__(other, self)
+
+    def __getitem__(self, tag):
+        if tag == 1:
+            tag = infinitesimal({})
+        return self.terms[tag]
+
+    def __setitem__(self, tag, value):
+        if tag == 1:
+            tag = infinitesimal({})
+        self.terms[tag] = value
diff --git a/lib/gpt/ad/forward/transform.py b/lib/gpt/ad/forward/transform.py
new file mode 100644
index 00000000..ca8bcf7b
--- /dev/null
+++ b/lib/gpt/ad/forward/transform.py
@@ -0,0 +1,31 @@
+#
+#    GPT - Grid Python Toolkit
+#    Copyright (C) 2023  Christoph Lehner (christoph.lehner@ur.de, https://github.com/lehner/gpt)
+#
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 2 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License along
+#    with this program; if not, write to the Free Software Foundation, Inc.,
+#    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+import gpt as g
+
+
+def inner_product(sx, sy):
+    return sx.distribute2(sy, lambda a, b: g.inner_product(a, b))
+
+
+def norm2(sx):
+    return inner_product(sx, sx)
+
+
+def cshift(sx, mu, disp):
+    return sx.distribute1(lambda a: g.cshift(a, mu, disp))
diff --git a/lib/gpt/ad/reverse/node.py b/lib/gpt/ad/reverse/node.py
index ac9c714d..4c199082 100644
--- a/lib/gpt/ad/reverse/node.py
+++ b/lib/gpt/ad/reverse/node.py
@@ -50,7 +50,6 @@ def traverse(nodes, n, visited=None):
         return forward_free
 
 
-
 class node_differentiable_functional(g.group.differentiable_functional):
     def __init__(self, node, arguments):
         self.node = node
@@ -72,6 +71,7 @@ def gradient(self, fields, dfields):
         self.node()
         return [self.arguments[i].gradient for i in indices]
 
+
 # gctr = 0
 
 
@@ -121,6 +121,12 @@ def __rmul__(x, y):
         return node_base.__mul__(y, x)
 
     def __add__(x, y):
+        if not isinstance(x, node_base):
+            x = node_base(x, with_gradient=False)
+
+        if not isinstance(y, node_base):
+            y = node_base(y, with_gradient=False)
+
         def _forward():
             return x.value + y.value
 
@@ -134,6 +140,12 @@ def _backward(z):
         return node_base(_forward, _backward, (x, y))
 
     def __sub__(x, y):
+        if not isinstance(x, node_base):
+            x = node_base(x, with_gradient=False)
+
+        if not isinstance(y, node_base):
+            y = node_base(y, with_gradient=False)
+
         def _forward():
             return x.value - y.value
 
@@ -206,6 +218,5 @@ def functional(self, *arguments):
         return node_differentiable_functional(self, arguments)
 
 
-
 def node(x, with_gradient=True):
     return node_base(x, with_gradient=with_gradient)
diff --git a/tests/ad/ad.py b/tests/ad/ad.py
index 1bca9f15..f20ec29f 100755
--- a/tests/ad/ad.py
+++ b/tests/ad/ad.py
@@ -3,9 +3,13 @@
 # Authors: Christoph Lehner 2023
 #
 import gpt as g
+import numpy as np
 
 rng = g.random("test")
 
+#####################################
+# reverse AD tests
+#####################################
 for prec in [g.double]:
     grid = g.grid([4, 4, 4, 4], prec)
     g.message(f"Testing in precision {prec.__name__}")
@@ -23,7 +27,7 @@
 
     # test a few simple models
     for c, learn_rate in [
-        (rad.norm2(a1) + 3.0*rad.norm2(a2*b1 + b2 + t1*x), 1e-1),
+        (rad.norm2(a1) + 3.0 * rad.norm2(a2 * b1 + b2 + t1 * x), 1e-1),
         (rad.norm2(rad.relu(a2 * rad.relu(a1 * x + b1) + t1 * x + b2) - x), 1e-1),
         (
             rad.norm2(
@@ -65,3 +69,138 @@
         v1 = f(ff)
         g.message(f"Reduced value from {v0} to {v1} with Adam")
         assert v1 < v0
+
+
+#####################################
+# forward AD tests
+#####################################
+fad = g.ad.forward
+dm = fad.infinitesimal("dm")
+alpha = fad.infinitesimal("alpha")
+
+assert (dm**4 * alpha).behaves_as(dm**3 * alpha)
+assert fad.landau(dm**4, dm * alpha) + fad.landau(
+    dm**2, alpha**2, dm**3 * alpha
+) == fad.landau(dm**2, alpha**2, dm * alpha)
+
+# landau O notation to keep series with O(1), O(dm), O(alpha), O(alpha*dm) terms
+# On determines terms that are neglected
+On = fad.landau(dm**3, alpha**3, dm**2 * alpha, dm * alpha**2)
+x = fad.series(3, On)
+x[dm] = 2.2
+x[alpha * dm] = 3.1612
+x[alpha] = 4.88
+
+y = x * x
+assert abs(y[1] - 9) < 1e-8
+assert abs(y[dm] - 13.2) < 1e-8
+assert abs(y[alpha * dm] - 2 * 10.736 - 2 * 3 * 3.1612) < 1e-8
+assert abs(y[alpha] - 29.28) < 1e-8
+
+
+# define function
+def fcos(x, nderiv):
+    if nderiv % 2 == 0:
+        return (-1) ** (nderiv // 2) * np.cos(x)
+    else:
+        return (-1) ** ((nderiv + 1) // 2) * np.sin(x)
+
+
+fy = y.function(fcos)
+
+eps = 1e-5
+
+err = abs(np.cos(y[1]) - fy[1])
+g.message(f"Error O(1): {err}")
+assert err < 1e-8
+
+err = abs((np.cos(y[1] + y[dm] * eps) - np.cos(y[1] - y[dm] * eps)) / eps / 2 - fy[dm])
+g.message(f"Error O(dm): {err}")
+assert err < 1e-5
+
+err = abs((np.cos(y[1] + y[alpha] * eps) - np.cos(y[1] - y[alpha] * eps)) / eps / 2 - fy[alpha])
+g.message(f"Error O(alpha): {err}")
+assert err < 1e-5
+
+err = abs(
+    (
+        np.cos(y[1] + y[dm] * eps + y[dm**2] * eps**2)
+        + np.cos(y[1] - y[dm] * eps + y[dm**2] * eps**2)
+        - 2 * np.cos(y[1])
+    )
+    / eps**2
+    / 2
+    - fy[dm**2]
+)
+g.message(f"Error O(dm**2): {err}")
+assert err < 1e-5
+
+err = abs(
+    (
+        np.cos(y[1] + y[alpha] * eps + y[alpha**2] * eps**2)
+        + np.cos(y[1] - y[alpha] * eps + y[alpha**2] * eps**2)
+        - 2 * np.cos(y[1])
+    )
+    / eps**2
+    / 2
+    - fy[alpha**2]
+)
+g.message(f"Error O(alpha**2): {err}")
+assert err < 1e-5
+
+err = abs(
+    (
+        +np.cos(y[1] + y[dm] * eps + y[alpha] * eps + y[alpha * dm] * eps**2)
+        - np.cos(y[1] - y[dm] * eps + y[alpha] * eps - y[alpha * dm] * eps**2)
+        - np.cos(y[1] + y[dm] * eps - y[alpha] * eps - y[alpha * dm] * eps**2)
+        + np.cos(y[1] - y[dm] * eps - y[alpha] * eps + y[alpha * dm] * eps**2)
+    )
+    / eps**2
+    / 4
+    - fy[alpha * dm]
+)
+g.message(f"Error O(alpha*dm): {err}")
+assert err < 1e-5
+
+# now test with lattice
+lx = fad.series(rng.cnormal(g.mcolor(grid)), On)
+lx[dm] = rng.cnormal(g.mcolor(grid))
+lx[alpha] = rng.cnormal(g.mcolor(grid))
+ly = 2 * lx + 3 * lx * lx
+
+ly = fad.series(rng.cnormal(g.vcolor(grid)), On)
+ly[dm] = rng.cnormal(g.vcolor(grid))
+ly[alpha] = rng.cnormal(g.vcolor(grid))
+
+lz = lx * ly
+
+eps = 1e-4
+
+
+def scale(lam):
+    return g.inner_product(
+        g(ly[1] + ly[dm] * lam), g((lx[1] + lx[dm] * lam) * (ly[1] + ly[dm] * lam))
+    )
+
+
+est = (scale(eps) - scale(-eps)) / 2 / eps
+exa = fad.inner_product(ly, lx * ly)[dm]
+err2 = abs(est - exa) / abs(exa)
+assert err2 < 1e-7
+g.message(f"d <.,.> / dm : {err2}")
+
+est = (scale(eps) + scale(-eps) - 2 * scale(0)) / eps**2 / 2
+exa = fad.inner_product(ly, lx * ly)[dm**2]
+err2 = abs(est - exa) / abs(exa)
+assert err2 < 1e-7
+g.message(f"d <.,.> / dm**2 : {err2}")
+
+test = fad.norm2(fad.cshift(fad.cshift(lz, 0, 1), 0, -1) - lz)
+g.message(test)
+
+# TODO:
+# - fad.series, rad.node need to play nice with g.eval
+#   (inherit from g.evaluable)
+# - fad.series, rad.node play nice with regular g.inner_product etc.
+#   for use in regular algorithms; inherit from lattice_like which
+#   should add maps to rad.inner_product, etc.