From 38f4d894d5943911b54053d82654af23b6a20fa3 Mon Sep 17 00:00:00 2001 From: Christoph Lehner Date: Sun, 5 Nov 2023 20:27:20 +0100 Subject: [PATCH] add forward autodiff --- lib/gpt/ad/__init__.py | 3 +- lib/gpt/ad/forward/__init__.py | 22 ++++ lib/gpt/ad/forward/infinitesimal.py | 73 ++++++++++++ lib/gpt/ad/forward/landau.py | 55 +++++++++ lib/gpt/ad/forward/series.py | 168 ++++++++++++++++++++++++++++ lib/gpt/ad/forward/transform.py | 31 +++++ lib/gpt/ad/reverse/node.py | 15 ++- tests/ad/ad.py | 141 ++++++++++++++++++++++- 8 files changed, 503 insertions(+), 5 deletions(-) create mode 100644 lib/gpt/ad/forward/__init__.py create mode 100644 lib/gpt/ad/forward/infinitesimal.py create mode 100644 lib/gpt/ad/forward/landau.py create mode 100644 lib/gpt/ad/forward/series.py create mode 100644 lib/gpt/ad/forward/transform.py diff --git a/lib/gpt/ad/__init__.py b/lib/gpt/ad/__init__.py index 44b32da2..5a53f84a 100644 --- a/lib/gpt/ad/__init__.py +++ b/lib/gpt/ad/__init__.py @@ -17,5 +17,4 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # import gpt.ad.reverse - -# import gpt.ad.forward +import gpt.ad.forward diff --git a/lib/gpt/ad/forward/__init__.py b/lib/gpt/ad/forward/__init__.py new file mode 100644 index 00000000..75455b14 --- /dev/null +++ b/lib/gpt/ad/forward/__init__.py @@ -0,0 +1,22 @@ +# +# GPT - Grid Python Toolkit +# Copyright (C) 2023 Christoph Lehner (christoph.lehner@ur.de, https://github.com/lehner/gpt) +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +from gpt.ad.forward.infinitesimal import infinitesimal +from gpt.ad.forward.landau import landau +from gpt.ad.forward.series import series +from gpt.ad.forward.transform import norm2, inner_product, cshift diff --git a/lib/gpt/ad/forward/infinitesimal.py b/lib/gpt/ad/forward/infinitesimal.py new file mode 100644 index 00000000..7ecead9e --- /dev/null +++ b/lib/gpt/ad/forward/infinitesimal.py @@ -0,0 +1,73 @@ +# +# GPT - Grid Python Toolkit +# Copyright (C) 2023 Christoph Lehner (christoph.lehner@ur.de, https://github.com/lehner/gpt) +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + + +class infinitesimal: + def __init__(self, value): + if isinstance(value, str): + value = {value: 1} + self.value = value + + def __pow__(self, n): + value = {} + for v in self.value: + value[v] = self.value[v] * n + return infinitesimal(value) + + def __mul__(self, other): + value = {} + for v1 in self.value: + value[v1] = self.value[v1] + for v2 in other.value: + if v2 in value: + value[v2] += other.value[v2] + else: + value[v2] = other.value[v2] + return infinitesimal(value) + + def __str__(self): + r = "" + for v in sorted(self.value): + if r != "": + r = r + "*" + if self.value[v] == 1: + r = r + v + else: + r = r + f"{v}**{self.value[v]}" + return r + + def __hash__(self): + return hash(self.__str__()) + + def __eq__(self, other): + return self.__str__() == other.__str__() + + def __cmp__(self, other): + return self.__str__().__cmp__(other.__str__()) + + def symbols(self): + return tuple(sorted(list(self.value.keys()))) + + def behaves_as(self, other): + for s in other.value: + n0 = self.value[s] if s in self.value else 0 + n1 = other.value[s] + if n0 < n1: + return False + return True diff --git a/lib/gpt/ad/forward/landau.py b/lib/gpt/ad/forward/landau.py new file mode 100644 index 00000000..51139788 --- /dev/null +++ b/lib/gpt/ad/forward/landau.py @@ -0,0 +1,55 @@ +# +# GPT - Grid Python Toolkit +# Copyright (C) 2023 Christoph Lehner (christoph.lehner@ur.de, https://github.com/lehner/gpt) +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + + +class landau: + def __init__(self, *infinitesimals): + self.infinitesimals = infinitesimals + + def accept(self, i): + for j in self.infinitesimals: + if i.behaves_as(j): + return False + return True + + def __add__(self, other): + if self is other: + return self + infinitesimals = [] + for i in self.infinitesimals + other.infinitesimals: + keep = True + for n, j in enumerate(infinitesimals): + if i.behaves_as(j): + keep = False + elif j.behaves_as(i): + infinitesimals[n] = i + if keep: + infinitesimals.append(i) + infinitesimals = list(set(infinitesimals)) + return landau(*infinitesimals) + + def __str__(self): + a = [] + for i in self.infinitesimals: + a.append(str(i)) + r = ",".join(sorted(a)) + return f"O({r})" + + def __eq__(self, other): + return str(self) == str(other) diff --git a/lib/gpt/ad/forward/series.py b/lib/gpt/ad/forward/series.py new file mode 100644 index 00000000..a6410c64 --- /dev/null +++ b/lib/gpt/ad/forward/series.py @@ -0,0 +1,168 @@ +# +# GPT - Grid Python Toolkit +# Copyright (C) 2023 Christoph Lehner (christoph.lehner@ur.de, https://github.com/lehner/gpt) +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +import gpt as g +from gpt.ad.forward import infinitesimal + + +def promote(other, landau_O): + if isinstance(other, infinitesimal): + other = series({other: 1}, landau_O) + elif g.util.is_num(other): + other = series({infinitesimal({}): other}, landau_O) + return other + + +class series: + def __init__(self, terms, landau_O): + self.landau_O = landau_O + if not isinstance(terms, dict): + i0 = infinitesimal({}) + terms = {i0: terms} + self.terms = terms + + def __str__(self): + r = "" + for t in self.terms: + if r != "": + r = r + " + " + r = r + "(" + str(self.terms[t]) + ")" + si = str(t) + if si != "": + r = r + "*" + si + return r + + def distribute2(self, other, functional): + other = promote(other, self.landau_O) + # first merge landau_Os + landau_O = self.landau_O + other.landau_O + # then merge terms + terms = {} + for t1 in self.terms: + for t2 in other.terms: + i = t1 * t2 + if not landau_O.accept(i): + continue + if i not in terms: + terms[i] = g(functional(self.terms[t1], other.terms[t2])) + else: + terms[i] += functional(self.terms[t1], other.terms[t2]) + return series(terms, landau_O) + + def distribute1(self, functional): + # then merge terms + terms = {} + for t1 in self.terms: + if t1 not in terms: + terms[t1] = g(functional(self.terms[t1])) + else: + terms[t1] += functional(self.terms[t1]) + return series(terms, self.landau_O) + + def function(self, functional): + root = self[1] + # get nilpotent power + nilpotent = self - root + maxn = 0 + i0 = infinitesimal({}) + for t in nilpotent.terms: + if t == i0: + continue + n = 1 + tn = t + while self.landau_O.accept(tn): + tn = tn * t + n += 1 + maxn = max([maxn, n]) + res = series({i0: functional(root, 0)}, self.landau_O) + delta = nilpotent + nfac = 1.0 + for i in range(1, maxn): + nfac *= i + res += delta * functional(root, i) / nfac + if i != maxn - 1: + delta = delta * nilpotent + return res + + def __iadd__(self, other): + res = self + other + self.landau_O = res.landau_O + self.terms = res.terms + return self + + def __mul__(self, other): + return self.distribute2(other, lambda a, b: a * b) + + def __rmul__(self, other): + if g.util.is_num(other): + return self.__mul__(other) + raise Exception("Not implemented") + + def __add__(self, other): + other = promote(other, self.landau_O) + # first merge landau_Os + landau_O = self.landau_O + other.landau_O + # then merge terms + terms = {} + for t1 in self.terms: + if not landau_O.accept(t1): + continue + terms[t1] = self.terms[t1] + for t2 in other.terms: + if not landau_O.accept(t2): + continue + if t2 not in terms: + terms[t2] = other.terms[t2] + else: + terms[t2] = g(terms[t2] + other.terms[t2]) + return series(terms, landau_O) + + def __sub__(self, other): + other = promote(other, self.landau_O) + # first merge landau_Os + landau_O = self.landau_O + other.landau_O + # then merge terms + terms = {} + for t1 in self.terms: + if not landau_O.accept(t1): + continue + terms[t1] = self.terms[t1] + for t2 in other.terms: + if not landau_O.accept(t2): + continue + if t2 not in terms: + terms[t2] = other.terms[t2] + else: + terms[t2] = g(terms[t2] - other.terms[t2]) + return series(terms, landau_O) + + def __truediv__(self, other): + return (1.0 / other) * self + + def __radd__(self, other): + return self.__add__(other, self) + + def __getitem__(self, tag): + if tag == 1: + tag = infinitesimal({}) + return self.terms[tag] + + def __setitem__(self, tag, value): + if tag == 1: + tag = infinitesimal({}) + self.terms[tag] = value diff --git a/lib/gpt/ad/forward/transform.py b/lib/gpt/ad/forward/transform.py new file mode 100644 index 00000000..ca8bcf7b --- /dev/null +++ b/lib/gpt/ad/forward/transform.py @@ -0,0 +1,31 @@ +# +# GPT - Grid Python Toolkit +# Copyright (C) 2023 Christoph Lehner (christoph.lehner@ur.de, https://github.com/lehner/gpt) +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +import gpt as g + + +def inner_product(sx, sy): + return sx.distribute2(sy, lambda a, b: g.inner_product(a, b)) + + +def norm2(sx): + return inner_product(sx, sx) + + +def cshift(sx, mu, disp): + return sx.distribute1(lambda a: g.cshift(a, mu, disp)) diff --git a/lib/gpt/ad/reverse/node.py b/lib/gpt/ad/reverse/node.py index ac9c714d..4c199082 100644 --- a/lib/gpt/ad/reverse/node.py +++ b/lib/gpt/ad/reverse/node.py @@ -50,7 +50,6 @@ def traverse(nodes, n, visited=None): return forward_free - class node_differentiable_functional(g.group.differentiable_functional): def __init__(self, node, arguments): self.node = node @@ -72,6 +71,7 @@ def gradient(self, fields, dfields): self.node() return [self.arguments[i].gradient for i in indices] + # gctr = 0 @@ -121,6 +121,12 @@ def __rmul__(x, y): return node_base.__mul__(y, x) def __add__(x, y): + if not isinstance(x, node_base): + x = node_base(x, with_gradient=False) + + if not isinstance(y, node_base): + y = node_base(y, with_gradient=False) + def _forward(): return x.value + y.value @@ -134,6 +140,12 @@ def _backward(z): return node_base(_forward, _backward, (x, y)) def __sub__(x, y): + if not isinstance(x, node_base): + x = node_base(x, with_gradient=False) + + if not isinstance(y, node_base): + y = node_base(y, with_gradient=False) + def _forward(): return x.value - y.value @@ -206,6 +218,5 @@ def functional(self, *arguments): return node_differentiable_functional(self, arguments) - def node(x, with_gradient=True): return node_base(x, with_gradient=with_gradient) diff --git a/tests/ad/ad.py b/tests/ad/ad.py index 1bca9f15..f20ec29f 100755 --- a/tests/ad/ad.py +++ b/tests/ad/ad.py @@ -3,9 +3,13 @@ # Authors: Christoph Lehner 2023 # import gpt as g +import numpy as np rng = g.random("test") +##################################### +# reverse AD tests +##################################### for prec in [g.double]: grid = g.grid([4, 4, 4, 4], prec) g.message(f"Testing in precision {prec.__name__}") @@ -23,7 +27,7 @@ # test a few simple models for c, learn_rate in [ - (rad.norm2(a1) + 3.0*rad.norm2(a2*b1 + b2 + t1*x), 1e-1), + (rad.norm2(a1) + 3.0 * rad.norm2(a2 * b1 + b2 + t1 * x), 1e-1), (rad.norm2(rad.relu(a2 * rad.relu(a1 * x + b1) + t1 * x + b2) - x), 1e-1), ( rad.norm2( @@ -65,3 +69,138 @@ v1 = f(ff) g.message(f"Reduced value from {v0} to {v1} with Adam") assert v1 < v0 + + +##################################### +# forward AD tests +##################################### +fad = g.ad.forward +dm = fad.infinitesimal("dm") +alpha = fad.infinitesimal("alpha") + +assert (dm**4 * alpha).behaves_as(dm**3 * alpha) +assert fad.landau(dm**4, dm * alpha) + fad.landau( + dm**2, alpha**2, dm**3 * alpha +) == fad.landau(dm**2, alpha**2, dm * alpha) + +# landau O notation to keep series with O(1), O(dm), O(alpha), O(alpha*dm) terms +# On determines terms that are neglected +On = fad.landau(dm**3, alpha**3, dm**2 * alpha, dm * alpha**2) +x = fad.series(3, On) +x[dm] = 2.2 +x[alpha * dm] = 3.1612 +x[alpha] = 4.88 + +y = x * x +assert abs(y[1] - 9) < 1e-8 +assert abs(y[dm] - 13.2) < 1e-8 +assert abs(y[alpha * dm] - 2 * 10.736 - 2 * 3 * 3.1612) < 1e-8 +assert abs(y[alpha] - 29.28) < 1e-8 + + +# define function +def fcos(x, nderiv): + if nderiv % 2 == 0: + return (-1) ** (nderiv // 2) * np.cos(x) + else: + return (-1) ** ((nderiv + 1) // 2) * np.sin(x) + + +fy = y.function(fcos) + +eps = 1e-5 + +err = abs(np.cos(y[1]) - fy[1]) +g.message(f"Error O(1): {err}") +assert err < 1e-8 + +err = abs((np.cos(y[1] + y[dm] * eps) - np.cos(y[1] - y[dm] * eps)) / eps / 2 - fy[dm]) +g.message(f"Error O(dm): {err}") +assert err < 1e-5 + +err = abs((np.cos(y[1] + y[alpha] * eps) - np.cos(y[1] - y[alpha] * eps)) / eps / 2 - fy[alpha]) +g.message(f"Error O(alpha): {err}") +assert err < 1e-5 + +err = abs( + ( + np.cos(y[1] + y[dm] * eps + y[dm**2] * eps**2) + + np.cos(y[1] - y[dm] * eps + y[dm**2] * eps**2) + - 2 * np.cos(y[1]) + ) + / eps**2 + / 2 + - fy[dm**2] +) +g.message(f"Error O(dm**2): {err}") +assert err < 1e-5 + +err = abs( + ( + np.cos(y[1] + y[alpha] * eps + y[alpha**2] * eps**2) + + np.cos(y[1] - y[alpha] * eps + y[alpha**2] * eps**2) + - 2 * np.cos(y[1]) + ) + / eps**2 + / 2 + - fy[alpha**2] +) +g.message(f"Error O(alpha**2): {err}") +assert err < 1e-5 + +err = abs( + ( + +np.cos(y[1] + y[dm] * eps + y[alpha] * eps + y[alpha * dm] * eps**2) + - np.cos(y[1] - y[dm] * eps + y[alpha] * eps - y[alpha * dm] * eps**2) + - np.cos(y[1] + y[dm] * eps - y[alpha] * eps - y[alpha * dm] * eps**2) + + np.cos(y[1] - y[dm] * eps - y[alpha] * eps + y[alpha * dm] * eps**2) + ) + / eps**2 + / 4 + - fy[alpha * dm] +) +g.message(f"Error O(alpha*dm): {err}") +assert err < 1e-5 + +# now test with lattice +lx = fad.series(rng.cnormal(g.mcolor(grid)), On) +lx[dm] = rng.cnormal(g.mcolor(grid)) +lx[alpha] = rng.cnormal(g.mcolor(grid)) +ly = 2 * lx + 3 * lx * lx + +ly = fad.series(rng.cnormal(g.vcolor(grid)), On) +ly[dm] = rng.cnormal(g.vcolor(grid)) +ly[alpha] = rng.cnormal(g.vcolor(grid)) + +lz = lx * ly + +eps = 1e-4 + + +def scale(lam): + return g.inner_product( + g(ly[1] + ly[dm] * lam), g((lx[1] + lx[dm] * lam) * (ly[1] + ly[dm] * lam)) + ) + + +est = (scale(eps) - scale(-eps)) / 2 / eps +exa = fad.inner_product(ly, lx * ly)[dm] +err2 = abs(est - exa) / abs(exa) +assert err2 < 1e-7 +g.message(f"d <.,.> / dm : {err2}") + +est = (scale(eps) + scale(-eps) - 2 * scale(0)) / eps**2 / 2 +exa = fad.inner_product(ly, lx * ly)[dm**2] +err2 = abs(est - exa) / abs(exa) +assert err2 < 1e-7 +g.message(f"d <.,.> / dm**2 : {err2}") + +test = fad.norm2(fad.cshift(fad.cshift(lz, 0, 1), 0, -1) - lz) +g.message(test) + +# TODO: +# - fad.series, rad.node need to play nice with g.eval +# (inherit from g.evaluable) +# - fad.series, rad.node play nice with regular g.inner_product etc. +# for use in regular algorithms; inherit from lattice_like which +# should add maps to rad.inner_product, etc.