From f1a42828caba7a6ea871d7ba2efa72eee2d316bf Mon Sep 17 00:00:00 2001 From: uvilla Date: Fri, 8 Sep 2017 13:00:43 -0500 Subject: [PATCH] Add files --- AdvectionDiffusionBayesian.html | 30712 +++++++++++++++ AdvectionDiffusionBayesian.ipynb | 449 + FEniCS101.html | 2145 ++ FEniCS101.ipynb | 394 + FEniCS101.py | 187 + HessianSpectrum_LinearSourceInversion.html | 21501 +++++++++++ PoissonBayesian.ipynb | 506 + PoissonDeterministic-InexactNewton.html | 38336 +++++++++++++++++++ PoissonDeterministic-InexactNewton.ipynb | 715 + PoissonDeterministic-InexactNewton.py | 319 + PoissonDeterministic-SD.html | 30807 +++++++++++++++ PoissonDeterministic-SD.ipynb | 498 + PoissonDeterministic-SD.py | 246 + PoissonDeterministic.ipynb | 640 + SubsurfaceBayesian.html | 28166 ++++++++++++++ SubsurfaceBayesian.ipynb | 559 + UnconstrainedMinimization.html | 11180 ++++++ UnconstrainedMinimization.ipynb | 431 + UnconstrainedMinimization.py | 165 + index.html | 54 + model_gls.py | 495 + model_subsurf.py | 477 + nb.py | 235 + 23 files changed, 169217 insertions(+) create mode 100644 AdvectionDiffusionBayesian.html create mode 100644 AdvectionDiffusionBayesian.ipynb create mode 100644 FEniCS101.html create mode 100644 FEniCS101.ipynb create mode 100644 FEniCS101.py create mode 100644 HessianSpectrum_LinearSourceInversion.html create mode 100644 PoissonBayesian.ipynb create mode 100644 PoissonDeterministic-InexactNewton.html create mode 100644 PoissonDeterministic-InexactNewton.ipynb create mode 100644 PoissonDeterministic-InexactNewton.py create mode 100644 PoissonDeterministic-SD.html create mode 100644 PoissonDeterministic-SD.ipynb create mode 100644 PoissonDeterministic-SD.py create mode 100644 PoissonDeterministic.ipynb create mode 100644 SubsurfaceBayesian.html create mode 100644 SubsurfaceBayesian.ipynb create mode 100644 UnconstrainedMinimization.html create mode 100644 UnconstrainedMinimization.ipynb create mode 100644 UnconstrainedMinimization.py create mode 100644 index.html create mode 100644 model_gls.py create mode 100644 model_subsurf.py create mode 100644 nb.py diff --git a/AdvectionDiffusionBayesian.html b/AdvectionDiffusionBayesian.html new file mode 100644 index 0000000..9d6e32b --- /dev/null +++ b/AdvectionDiffusionBayesian.html @@ -0,0 +1,30712 @@ + + + +AdvectionDiffusionBayesian + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+
+
+

$ +\def\D{\mathcal{D}} +\def\ipar{m} +\def\R{\mathbb{R}} +\def\del{\partial} +\def\vec{\bf} +\def\priorm{\mu_0} +\def\C{\mathcal{C}} +\def\Acal{\mathcal{A}} +\def\postm{\mu_{\rm{post}}} +\def\iparpost{\ipar_\text{post}} +\def\obs{\vec{d}} +\def\yobs{\obs^{\text{obs}}} +\def\obsop{\mathcal{B}} +\def\dd{\vec{\bar{d}}} +\def\iFF{\mathcal{F}} +\def\iFFadj{\mathcal{F}^*} +\def\ncov{\Gamma_{\mathrm{noise}}} +$

+

Example: Bayesian initial condition inversion in an advection-diffusion problem

In this example we tackle the problem of quantifying the uncertainty in the solution of an inverse problem governed by a parabolic PDE via the Bayesian inference framework. The underlying PDE is a time-dependent advection-diffusion equation in which we seek to infer an unknown initial condition from spatio-temporal point measurements.

+

The Bayesian inverse problem:

Following the Bayesian framework, we utilize +a Gaussian prior measure $\priorm = \mathcal{N}(\ipar_0,\C_0)$, +with $\C_0=\Acal^{-2}$ where $\Acal$ is an elliptic differential operator as +described in the PoissonBayesian example, and use an additive +Gaussian noise model. Therefore, the solution of the Bayesian inverse +problem is the posterior measure, $\postm = \mathcal{N}(\iparpost,\C_\text{post})$ with +$\iparpost$ and $\C_\text{post}$.

+
    +
  • The posterior mean $\iparpost$ is characterized as the minimizer of
  • +
+$$ +\begin{aligned} +& \mathcal{J}(\ipar) := + \frac{1}{2} \left\| \mathcal{B}u(\ipar) -\obs \right\|^2_{\ncov^{-1}} + + \frac 12 \left\| \Acal(\ipar - \ipar_0 \right)\|^2_{L^2(\D)}, +\end{aligned} +$$

which can also be interpreted as the regularized functional to be +minimized in deterministic inversion. The observation operator $\mathcal{B}$ extracts the values of the forward solution $u$ on a set of +locations $\{\vec{x}_1, \ldots, \vec{x}_n\} \subset \D$ at +times $\{t_1, \ldots, t_N\} \subset [0, T]$.

+
    +
  • The posterior covariance $\C_{\text{post}}$ is the inverse of the Hessian of $\mathcal{J}(\ipar)$, i.e.,
  • +
+$$ +\C_{\text{post}} = (\iFFadj \ncov^{-1} \iFF + \C_0^{-1})^{-1}. +$$

The forward problem:

The PDE in the parameter-to-observable map $\iFF$ models diffusive transport +in a domain $\D \subset \R^d$ ($d \in \{2, 3\}$):

+$$ +\begin{split} +u_t - \kappa\Delta u + \bf{v} \cdot \nabla u &= 0 & \quad \text{in } \D\times(0,T),\\ + u(\cdot, 0) &= \ipar & \quad \text{in } \D,\\ + \kappa \nabla u\cdot \vec{n} &= 0 & \quad \text{on } \partial\D \times (0,T). +\end{split} +$$

Here, $\kappa > 0$ is the diffusion coefficient and $T > 0$ is the final +time. The velocity field +$\vec{v}$ is computed by solving the following steady-state +Navier-Stokes equation with the side walls driving the flow:

+$$ +\begin{aligned} +- \frac{1}{\operatorname{Re}} \Delta \bf{v} + \nabla q + \bf{v} \cdot \nabla \bf{v} &= 0 &\quad&\text{ in }\D,\\ +\nabla \cdot \bf{v} &= 0 &&\text{ in }\D,\\ +\bf{v} &= \bf{g} &&\text{ on } \partial\D. +\end{aligned} +$$

Here, $q$ is pressure, $\text{Re}$ is the Reynolds number. The Dirichlet boundary data +$\vec{g} \in \R^d$ is given by +$\vec{g} = \vec{e}_2$ on the left wall of the domain, +$\vec{g}=-\vec{e}_2$ on the right wall, and $\vec{g} = \vec{0}$ everywhere else.

+

The adjoint problem:

$$ +\begin{aligned} +-p_t - \nabla \cdot (p \vec{v}) - \kappa \Delta p &= -\obsop^* (\obsop u - \obs) & \quad &\text{ in } \D\times (0,T),\\ + p(\cdot, T) &= 0 & &\text{ in } \D,\\ +(\vec{v}p+\kappa\nabla p)\cdot \vec{n} &= 0 & &\text{ on } \partial\D\times (0,T). +\end{aligned} +$$ +
+
+
+
+
+
In [1]:
+
+
+
import dolfin as dl
+import sys
+sys.path.append( "../" )
+from hippylib import *
+import numpy as np
+import matplotlib.pyplot as plt
+%matplotlib inline
+sys.path.append( "../applications/ad_diff/" )
+from model_gls import TimeDependentAD
+
+import nb
+
+import logging
+logging.getLogger('FFC').setLevel(logging.WARNING)
+logging.getLogger('UFL').setLevel(logging.WARNING)
+dl.set_log_active(False)
+
+np.random.seed(1)
+
+ +
+
+
+ +
+
+
+
+
+
+

Construction of the velocity field

+
+
+
+
+
+
In [2]:
+
+
+
def v_boundary(x,on_boundary):
+    return on_boundary
+
+def q_boundary(x,on_boundary):
+    return x[0] < dl.DOLFIN_EPS and x[1] < dl.DOLFIN_EPS
+        
+def computeVelocityField(mesh):
+    Xh = dl.VectorFunctionSpace(mesh,'Lagrange', 2)
+    Wh = dl.FunctionSpace(mesh, 'Lagrange', 1)
+    XW = dl.MixedFunctionSpace([Xh, Wh])
+
+    Re = 1e2
+    
+    g = dl.Expression(('0.0','(x[0] < 1e-14) - (x[0] > 1 - 1e-14)'))
+    bc1 = dl.DirichletBC(XW.sub(0), g, v_boundary)
+    bc2 = dl.DirichletBC(XW.sub(1), dl.Constant(0), q_boundary, 'pointwise')
+    bcs = [bc1, bc2]
+    
+    vq = dl.Function(XW)
+    (v,q) = dl.split(vq)
+    (v_test, q_test) = dl.TestFunctions (XW)
+    
+    def strain(v):
+        return dl.sym(dl.nabla_grad(v))
+    
+    F = ( (2./Re)*dl.inner(strain(v),strain(v_test))+ dl.inner (dl.nabla_grad(v)*v, v_test)
+           - (q * dl.div(v_test)) + ( dl.div(v) * q_test) ) * dl.dx
+           
+    dl.solve(F == 0, vq, bcs, solver_parameters={"newton_solver":
+                                         {"relative_tolerance":1e-4, "maximum_iterations":100}})
+    
+    plt.figure(figsize=(15,5))
+    vh = dl.project(v,Xh)
+    qh = dl.project(q,Wh)
+    nb.plot(nb.coarsen_v(vh), subplot_loc=121,mytitle="Velocity")
+    nb.plot(qh, subplot_loc=122,mytitle="Pressure")
+    plt.show()
+        
+    return v
+
+ +
+
+
+ +
+
+
+
+
+
+

Set up the mesh and finite element spaces

+
+
+
+
+
+
In [3]:
+
+
+
mesh = dl.refine( dl.Mesh("ad_20.xml") )
+wind_velocity = computeVelocityField(mesh)
+Vh = dl.FunctionSpace(mesh, "Lagrange", 1)
+print "Number of dofs: {0}".format( Vh.dim() )
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+
+
Number of dofs: 2023
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Set up model (prior, true/proposed initial condition)

+
+
+
+
+
+
In [4]:
+
+
+
#gamma = 1
+#delta = 10
+#prior = LaplacianPrior(Vh, gamma, delta)
+
+gamma = 1
+delta = 8
+prior = BiLaplacianPrior(Vh, gamma, delta)
+
+prior.mean = dl.interpolate(dl.Expression('0.5'), Vh).vector()
+true_initial_condition = dl.interpolate(dl.Expression('min(0.5,exp(-100*(pow(x[0]-0.35,2) +  pow(x[1]-0.7,2))))'), Vh).vector()
+problem = TimeDependentAD(mesh, [Vh,Vh,Vh], 0., 4., 1., .2, wind_velocity, True, prior)
+
+objs = [dl.Function(Vh,true_initial_condition),
+        dl.Function(Vh,prior.mean)]
+mytitles = ["True Initial Condition", "Prior mean"]
+nb.multi1_plot(objs, mytitles)
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

Generate the synthetic observations

+
+
+
+
+
+
In [5]:
+
+
+
rel_noise = 0.001
+utrue = problem.generate_vector(STATE)
+x = [utrue, true_initial_condition, None]
+problem.solveFwd(x[STATE], x, 1e-9)
+MAX = utrue.norm("linf", "linf")
+noise_std_dev = rel_noise * MAX
+problem.ud.copy(utrue)
+problem.ud.randn_perturb(noise_std_dev)
+problem.noise_variance = noise_std_dev*noise_std_dev
+
+nb.show_solution(Vh, true_initial_condition, utrue, "Solution")
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

Test the gradient and the Hessian of the cost (negative log posterior)

+
+
+
+
+
+
In [6]:
+
+
+
a0 = true_initial_condition.copy()
+modelVerify(problem, a0, 1e-12)
+
+ +
+
+
+ +
+
+ + +
+
+
(yy, H xx) - (xx, H yy) =  -2.66447204172e-14
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

Evaluate the gradient and apply the Hessian to a vector

+
+
+
+
+
+
In [7]:
+
+
+
[u,a,p] = problem.generate_vector()
+problem.solveFwd(u, [u,a,p], 1e-12)
+problem.solveAdj(p, [u,a,p], 1e-12)
+mg = problem.generate_vector(PARAMETER)
+grad_norm = problem.evalGradientParameter([u,a,p], mg)
+        
+print "(g,g) = ", grad_norm
+    
+H = ReducedHessian(problem, 1e-12, gauss_newton_approx=False, misfit_only=True) 
+
+ +
+
+
+ +
+
+ + +
+
+
(g,g) =  1.67407425719e+12
+
+
+
+ +
+
+ +
+
+
+
+
+
+

The Gaussian approximation of the posterior

+
+
+
+
+
+
In [8]:
+
+
+
k = 80
+p = 20
+print "Single Pass Algorithm. Requested eigenvectors: {0}; Oversampling {1}.".format(k,p)
+Omega = np.random.randn(a.array().shape[0], k+p)
+d, U = singlePassG(H, prior.R, prior.Rsolver, Omega, k)
+#d, U = singlePass(H, Omega, k)
+
+posterior = GaussianLRPosterior( prior, d, U )
+
+plt.plot(range(0,k), d, 'b*', range(0,k+1), np.ones(k+1), '-r')
+plt.yscale('log')
+plt.xlabel('number')
+plt.ylabel('eigenvalue')
+
+nb.plot_eigenvectors(Vh, U, mytitle="Eigenvector", which=[0,1,2,5,10,20,30,45,60])
+
+ +
+
+
+ +
+
+ + +
+
+
Single Pass Algorithm. Requested eigenvectors: 80; Oversampling 20.
+
+
+
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

Compute the MAP point

+
+
+
+
+
+
In [9]:
+
+
+
H.misfit_only = False
+        
+solver = CGSolverSteihaug()
+solver.set_operator(H)
+solver.set_preconditioner( posterior.Hlr )
+solver.parameters["print_level"] = 1
+solver.parameters["rel_tolerance"] = 1e-6
+solver.solve(a, -mg)
+problem.solveFwd(u, [u,a,p], 1e-12)
+ 
+total_cost, reg_cost, misfit_cost = problem.cost([u,a,p])
+print "Total cost {0:5g}; Reg Cost {1:5g}; Misfit {2:5g}".format(total_cost, reg_cost, misfit_cost)
+    
+posterior.mean = a
+
+plt.figure(figsize=(7.5,5))
+nb.plot(dl.Function(Vh, a), mytitle="Initial Condition")
+plt.show()
+
+nb.show_solution(Vh, a, u, "Solution")
+
+ +
+
+
+ +
+
+ + +
+
+
 Iterartion :  0  (B r, r) =  30439.3323582
+ Iteration :  1  (B r, r) =  0.0608857833183
+ Iteration :  2  (B r, r) =  1.04078113333e-05
+ Iteration :  3  (B r, r) =  9.4910975382e-09
+Relative/Absolute residual less than tol
+Converged in  3  iterations with final norm  9.74222640786e-05
+Total cost 84.6353; Reg Cost 69.0841; Misfit 15.5513
+
+
+
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

Prior and posterior pointwise variance fields

+
+
+
+
+
+
In [10]:
+
+
+
compute_trace = False
+if compute_trace:
+    post_tr, prior_tr, corr_tr = posterior.trace()
+    print "Posterior trace {0:5g}; Prior trace {1:5g}; Correction trace {2:5g}".format(post_tr, prior_tr, corr_tr)
+post_pw_variance, pr_pw_variance, corr_pw_variance = posterior.pointwise_variance()
+
+objs = [dl.Function(Vh, pr_pw_variance),
+        dl.Function(Vh, post_pw_variance)]
+mytitles = ["Prior Variance", "Posterior Variance"]
+nb.multi1_plot(objs, mytitles, logscale=True)
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

Draw samples from the prior and posterior distributions

+
+
+
+
+
+
In [11]:
+
+
+
nsamples = 5
+noise = dl.Vector()
+posterior.init_vector(noise,"noise")
+noise_size = noise.array().shape[0]
+s_prior = dl.Function(Vh, name="sample_prior")
+s_post = dl.Function(Vh, name="sample_post")
+
+pr_max =  2.5*math.sqrt( pr_pw_variance.max() ) + prior.mean.max()
+pr_min = -2.5*math.sqrt( pr_pw_variance.min() ) + prior.mean.min()
+ps_max =  2.5*math.sqrt( post_pw_variance.max() ) + posterior.mean.max()
+ps_min = -2.5*math.sqrt( post_pw_variance.max() ) + posterior.mean.min()
+
+for i in range(nsamples):
+    noise.set_local( np.random.randn( noise_size ) )
+    posterior.sample(noise, s_prior.vector(), s_post.vector())
+    plt.figure(figsize=(15,5))
+    nb.plot(s_prior, subplot_loc=121,mytitle="Prior sample", vmin=pr_min, vmax=pr_max)
+    nb.plot(s_post, subplot_loc=122,mytitle="Posterior sample", vmin=ps_min, vmax=ps_max)
+    plt.show()
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+ + diff --git a/AdvectionDiffusionBayesian.ipynb b/AdvectionDiffusionBayesian.ipynb new file mode 100644 index 0000000..1a9938c --- /dev/null +++ b/AdvectionDiffusionBayesian.ipynb @@ -0,0 +1,449 @@ +{ + "metadata": { + "name": "" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$\n", + "\\def\\D{\\mathcal{D}}\n", + "\\def\\ipar{m}\n", + "\\def\\R{\\mathbb{R}}\n", + "\\def\\del{\\partial}\n", + "\\def\\vec{\\bf}\n", + "\\def\\priorm{\\mu_0}\n", + "\\def\\C{\\mathcal{C}}\n", + "\\def\\Acal{\\mathcal{A}}\n", + "\\def\\postm{\\mu_{\\rm{post}}}\n", + "\\def\\iparpost{\\ipar_\\text{post}}\n", + "\\def\\obs{\\vec{d}} \n", + "\\def\\yobs{\\obs^{\\text{obs}}}\n", + "\\def\\obsop{\\mathcal{B}}\n", + "\\def\\dd{\\vec{\\bar{d}}}\n", + "\\def\\iFF{\\mathcal{F}}\n", + "\\def\\iFFadj{\\mathcal{F}^*}\n", + "\\def\\ncov{\\Gamma_{\\mathrm{noise}}}\n", + "$\n", + "#Example: Bayesian initial condition inversion in an advection-diffusion problem \n", + "\n", + "In this example we tackle the problem of quantifying the uncertainty in the solution of an inverse problem governed by a parabolic PDE via the Bayesian inference framework. The underlying PDE is a time-dependent advection-diffusion equation in which we seek to infer an unknown initial condition from spatio-temporal point measurements.\n", + "\n", + "### The Bayesian inverse problem:\n", + "\n", + "Following the Bayesian framework, we utilize \n", + "a Gaussian prior measure $\\priorm = \\mathcal{N}(\\ipar_0,\\C_0)$,\n", + "with $\\C_0=\\Acal^{-2}$ where $\\Acal$ is an elliptic differential operator as \n", + "described in the PoissonBayesian example, and use an additive\n", + "Gaussian noise model. Therefore, the solution of the Bayesian inverse\n", + "problem is the posterior measure, $\\postm = \\mathcal{N}(\\iparpost,\\C_\\text{post})$ with\n", + "$\\iparpost$ and $\\C_\\text{post}$.\n", + "\n", + "- The posterior mean $\\iparpost$ is characterized as the minimizer of\n", + "\n", + "$$\n", + "\\begin{aligned}\n", + "& \\mathcal{J}(\\ipar) :=\n", + " \\frac{1}{2} \\left\\| \\mathcal{B}u(\\ipar) -\\obs \\right\\|^2_{\\ncov^{-1}}\n", + " + \\frac 12 \\left\\| \\Acal(\\ipar - \\ipar_0 \\right)\\|^2_{L^2(\\D)},\n", + "\\end{aligned}\n", + "$$\n", + "\n", + "which can also be interpreted as the regularized functional to be\n", + "minimized in deterministic inversion. The observation operator $\\mathcal{B}$ extracts the values of the forward solution $u$ on a set of\n", + "locations $\\{\\vec{x}_1, \\ldots, \\vec{x}_n\\} \\subset \\D$ at\n", + "times $\\{t_1, \\ldots, t_N\\} \\subset [0, T]$.\n", + "\n", + "- The posterior covariance $\\C_{\\text{post}}$ is the inverse of the Hessian of $\\mathcal{J}(\\ipar)$, i.e.,\n", + "\n", + "$$\n", + "\\C_{\\text{post}} = (\\iFFadj \\ncov^{-1} \\iFF + \\C_0^{-1})^{-1}.\n", + "$$\n", + "\n", + "\n", + "### The forward problem:\n", + "\n", + "The PDE in the parameter-to-observable map $\\iFF$ models diffusive transport\n", + "in a domain $\\D \\subset \\R^d$ ($d \\in \\{2, 3\\}$):\n", + "\n", + "$$\n", + "\\begin{split}\n", + "u_t - \\kappa\\Delta u + \\bf{v} \\cdot \\nabla u &= 0 & \\quad \\text{in } \\D\\times(0,T),\\\\\n", + " u(\\cdot, 0) &= \\ipar & \\quad \\text{in } \\D,\\\\\n", + " \\kappa \\nabla u\\cdot \\vec{n} &= 0 & \\quad \\text{on } \\partial\\D \\times (0,T).\n", + "\\end{split}\n", + "$$\n", + "\n", + "Here, $\\kappa > 0$ is the diffusion coefficient and $T > 0$ is the final\n", + "time. The velocity field\n", + "$\\vec{v}$ is computed by solving the following steady-state\n", + "Navier-Stokes equation with the side walls driving the flow:\n", + "\n", + "$$\n", + "\\begin{aligned}\n", + "- \\frac{1}{\\operatorname{Re}} \\Delta \\bf{v} + \\nabla q + \\bf{v} \\cdot \\nabla \\bf{v} &= 0 &\\quad&\\text{ in }\\D,\\\\\n", + "\\nabla \\cdot \\bf{v} &= 0 &&\\text{ in }\\D,\\\\\n", + "\\bf{v} &= \\bf{g} &&\\text{ on } \\partial\\D.\n", + "\\end{aligned}\n", + "$$\n", + "\n", + "Here, $q$ is pressure, $\\text{Re}$ is the Reynolds number. The Dirichlet boundary data\n", + "$\\vec{g} \\in \\R^d$ is given by \n", + "$\\vec{g} = \\vec{e}_2$ on the left wall of the domain, \n", + "$\\vec{g}=-\\vec{e}_2$ on the right wall, and $\\vec{g} = \\vec{0}$ everywhere else.\n", + "\n", + "### The adjoint problem:\n", + "\n", + "$$\n", + "\\begin{aligned}\n", + "-p_t - \\nabla \\cdot (p \\vec{v}) - \\kappa \\Delta p &= -\\obsop^* (\\obsop u - \\obs) & \\quad &\\text{ in } \\D\\times (0,T),\\\\\n", + " p(\\cdot, T) &= 0 & &\\text{ in } \\D,\\\\ \n", + "(\\vec{v}p+\\kappa\\nabla p)\\cdot \\vec{n} &= 0 & &\\text{ on } \\partial\\D\\times (0,T).\n", + "\\end{aligned}\n", + "$$" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import dolfin as dl\n", + "import sys\n", + "sys.path.append( \"../\" )\n", + "from hippylib import *\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "sys.path.append( \"../applications/ad_diff/\" )\n", + "from model_gls import TimeDependentAD\n", + "\n", + "import nb\n", + "\n", + "import logging\n", + "logging.getLogger('FFC').setLevel(logging.WARNING)\n", + "logging.getLogger('UFL').setLevel(logging.WARNING)\n", + "dl.set_log_active(False)\n", + "\n", + "np.random.seed(1)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Construction of the velocity field" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def v_boundary(x,on_boundary):\n", + " return on_boundary\n", + "\n", + "def q_boundary(x,on_boundary):\n", + " return x[0] < dl.DOLFIN_EPS and x[1] < dl.DOLFIN_EPS\n", + " \n", + "def computeVelocityField(mesh):\n", + " Xh = dl.VectorFunctionSpace(mesh,'Lagrange', 2)\n", + " Wh = dl.FunctionSpace(mesh, 'Lagrange', 1)\n", + " XW = dl.MixedFunctionSpace([Xh, Wh])\n", + "\n", + " Re = 1e2\n", + " \n", + " g = dl.Expression(('0.0','(x[0] < 1e-14) - (x[0] > 1 - 1e-14)'))\n", + " bc1 = dl.DirichletBC(XW.sub(0), g, v_boundary)\n", + " bc2 = dl.DirichletBC(XW.sub(1), dl.Constant(0), q_boundary, 'pointwise')\n", + " bcs = [bc1, bc2]\n", + " \n", + " vq = dl.Function(XW)\n", + " (v,q) = dl.split(vq)\n", + " (v_test, q_test) = dl.TestFunctions (XW)\n", + " \n", + " def strain(v):\n", + " return dl.sym(dl.nabla_grad(v))\n", + " \n", + " F = ( (2./Re)*dl.inner(strain(v),strain(v_test))+ dl.inner (dl.nabla_grad(v)*v, v_test)\n", + " - (q * dl.div(v_test)) + ( dl.div(v) * q_test) ) * dl.dx\n", + " \n", + " dl.solve(F == 0, vq, bcs, solver_parameters={\"newton_solver\":\n", + " {\"relative_tolerance\":1e-4, \"maximum_iterations\":100}})\n", + " \n", + " plt.figure(figsize=(15,5))\n", + " vh = dl.project(v,Xh)\n", + " qh = dl.project(q,Wh)\n", + " nb.plot(nb.coarsen_v(vh), subplot_loc=121,mytitle=\"Velocity\")\n", + " nb.plot(qh, subplot_loc=122,mytitle=\"Pressure\")\n", + " plt.show()\n", + " \n", + " return v" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Set up the mesh and finite element spaces" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "mesh = dl.refine( dl.Mesh(\"ad_20.xml\") )\n", + "wind_velocity = computeVelocityField(mesh)\n", + "Vh = dl.FunctionSpace(mesh, \"Lagrange\", 1)\n", + "print \"Number of dofs: {0}\".format( Vh.dim() )" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up model (prior, true/proposed initial condition)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "#gamma = 1\n", + "#delta = 10\n", + "#prior = LaplacianPrior(Vh, gamma, delta)\n", + "\n", + "gamma = 1\n", + "delta = 8\n", + "prior = BiLaplacianPrior(Vh, gamma, delta)\n", + "\n", + "prior.mean = dl.interpolate(dl.Expression('0.5'), Vh).vector()\n", + "true_initial_condition = dl.interpolate(dl.Expression('min(0.5,exp(-100*(pow(x[0]-0.35,2) + pow(x[1]-0.7,2))))'), Vh).vector()\n", + "problem = TimeDependentAD(mesh, [Vh,Vh,Vh], 0., 4., 1., .2, wind_velocity, True, prior)\n", + "\n", + "objs = [dl.Function(Vh,true_initial_condition),\n", + " dl.Function(Vh,prior.mean)]\n", + "mytitles = [\"True Initial Condition\", \"Prior mean\"]\n", + "nb.multi1_plot(objs, mytitles)\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Generate the synthetic observations" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "rel_noise = 0.001\n", + "utrue = problem.generate_vector(STATE)\n", + "x = [utrue, true_initial_condition, None]\n", + "problem.solveFwd(x[STATE], x, 1e-9)\n", + "MAX = utrue.norm(\"linf\", \"linf\")\n", + "noise_std_dev = rel_noise * MAX\n", + "problem.ud.copy(utrue)\n", + "problem.ud.randn_perturb(noise_std_dev)\n", + "problem.noise_variance = noise_std_dev*noise_std_dev\n", + "\n", + "nb.show_solution(Vh, true_initial_condition, utrue, \"Solution\")" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Test the gradient and the Hessian of the cost (negative log posterior)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "a0 = true_initial_condition.copy()\n", + "modelVerify(problem, a0, 1e-4, 1e-4)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Evaluate the gradient and apply the Hessian to a vector" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "[u,a,p] = problem.generate_vector()\n", + "problem.solveFwd(u, [u,a,p], 1e-12)\n", + "problem.solveAdj(p, [u,a,p], 1e-12)\n", + "mg = problem.generate_vector(PARAMETER)\n", + "grad_norm = problem.evalGradientParameter([u,a,p], mg)\n", + " \n", + "print \"(g,g) = \", grad_norm\n", + " \n", + "H = ReducedHessian(problem, 1e-12, gauss_newton_approx=False, misfit_only=True) " + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The Gaussian approximation of the posterior" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "k = 80\n", + "p = 20\n", + "print \"Single Pass Algorithm. Requested eigenvectors: {0}; Oversampling {1}.\".format(k,p)\n", + "Omega = np.random.randn(a.array().shape[0], k+p)\n", + "d, U = singlePassG(H, prior.R, prior.Rsolver, Omega, k)\n", + "#d, U = singlePass(H, Omega, k)\n", + "\n", + "posterior = GaussianLRPosterior( prior, d, U )\n", + "\n", + "plt.plot(range(0,k), d, 'b*', range(0,k+1), np.ones(k+1), '-r')\n", + "plt.yscale('log')\n", + "plt.xlabel('number')\n", + "plt.ylabel('eigenvalue')\n", + "\n", + "nb.plot_eigenvectors(Vh, U, mytitle=\"Eigenvector\", which=[0,1,2,5,10,20,30,45,60])" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Compute the MAP point" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "H.misfit_only = False\n", + " \n", + "solver = CGSolverSteihaug()\n", + "solver.set_operator(H)\n", + "solver.set_preconditioner( posterior.Hlr )\n", + "solver.parameters[\"print_level\"] = 1\n", + "solver.parameters[\"rel_tolerance\"] = 1e-6\n", + "solver.solve(a, -mg)\n", + "problem.solveFwd(u, [u,a,p], 1e-12)\n", + " \n", + "total_cost, reg_cost, misfit_cost = problem.cost([u,a,p])\n", + "print \"Total cost {0:5g}; Reg Cost {1:5g}; Misfit {2:5g}\".format(total_cost, reg_cost, misfit_cost)\n", + " \n", + "posterior.mean = a\n", + "\n", + "plt.figure(figsize=(7.5,5))\n", + "nb.plot(dl.Function(Vh, a), mytitle=\"Initial Condition\")\n", + "plt.show()\n", + "\n", + "nb.show_solution(Vh, a, u, \"Solution\")" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prior and posterior pointwise variance fields" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "compute_trace = False\n", + "if compute_trace:\n", + " post_tr, prior_tr, corr_tr = posterior.trace()\n", + " print \"Posterior trace {0:5g}; Prior trace {1:5g}; Correction trace {2:5g}\".format(post_tr, prior_tr, corr_tr)\n", + "post_pw_variance, pr_pw_variance, corr_pw_variance = posterior.pointwise_variance()\n", + "\n", + "objs = [dl.Function(Vh, pr_pw_variance),\n", + " dl.Function(Vh, post_pw_variance)]\n", + "mytitles = [\"Prior Variance\", \"Posterior Variance\"]\n", + "nb.multi1_plot(objs, mytitles, logscale=True)\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Draw samples from the prior and posterior distributions" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "nsamples = 5\n", + "noise = dl.Vector()\n", + "posterior.init_vector(noise,\"noise\")\n", + "noise_size = noise.array().shape[0]\n", + "s_prior = dl.Function(Vh, name=\"sample_prior\")\n", + "s_post = dl.Function(Vh, name=\"sample_post\")\n", + "\n", + "pr_max = 2.5*math.sqrt( pr_pw_variance.max() ) + prior.mean.max()\n", + "pr_min = -2.5*math.sqrt( pr_pw_variance.min() ) + prior.mean.min()\n", + "ps_max = 2.5*math.sqrt( post_pw_variance.max() ) + posterior.mean.max()\n", + "ps_min = -2.5*math.sqrt( post_pw_variance.max() ) + posterior.mean.min()\n", + "\n", + "for i in range(nsamples):\n", + " noise.set_local( np.random.randn( noise_size ) )\n", + " posterior.sample(noise, s_prior.vector(), s_post.vector())\n", + " plt.figure(figsize=(15,5))\n", + " nb.plot(s_prior, subplot_loc=121,mytitle=\"Prior sample\", vmin=pr_min, vmax=pr_max)\n", + " nb.plot(s_post, subplot_loc=122,mytitle=\"Posterior sample\", vmin=ps_min, vmax=ps_max)\n", + " plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/FEniCS101.html b/FEniCS101.html new file mode 100644 index 0000000..5d70186 --- /dev/null +++ b/FEniCS101.html @@ -0,0 +1,2145 @@ + + + +FEniCS101 + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+
+
+

FEniCS101 Tutorial

In this tutorial we consider the boundary value problem (BVP)

+\begin{eqnarray*} +- \nabla \cdot (k \nabla u) = f & \text{ in } \Omega,\\ +u = u_0 & \text{ on } \Gamma_D = \Gamma_{\rm left} \bigcup \Gamma_{\rm right},\\ +k \frac{\partial u}{\partial {\bf{n}}} = \sigma & \text{ on } \Gamma_N = \Gamma_{\rm top} \bigcup \Gamma_{\rm bottom}, +\end{eqnarray*}

where $\Omega = (0,1) \times (0,1)$, $\Gamma_D$ and and $\Gamma_N$ are the union of +the left and right, and top and bottom boundaries of $\Omega$, +respectively.

+

Here +\begin{eqnarray} +k(x,y) = 1 & \text{ on } \Omega\ +f(x,y) = \left(4\pi^2+\frac{\pi^2}{4}\right)\sin(2 \pi x) \sin\left(\frac{\pi}{2} y\right) & \text{ on } \Omega\ +u0(x,y) = 0 & \text{ on } \Gamma_D, \ +\sigma(x) = \left{ \begin{array}{l} -\frac{\pi}{2}\sin(2 \pi x) \ 0 \end{array}\right. +& \begin{array}{l} \text{ on } \Gamma{\rm bottom},\ \text{ on } \Gamma_{\rm top}.\end{array} +\end{eqnarray}

+

The exact solution is +$$ u_e(x,y) = \sin(2\pi x)\sin\left(\frac{\pi}{2}y\right). $$

+

Weak formulation

Let us define the Hilbert spaces $V_{u_0}, V_0 \in \Omega$ as +$$ V_{u_0} := \left\{ v \in H^1(\Omega) \text{ s. t. } v = u_0 \text{ on } \Gamma_D \right\},$$ +$$ V_{0} := \left\{ v \in H^1(\Omega) \text{ s. t. } v = 0 \text{ on } \Gamma_D \right\}.$$

+

To obtain the weak formulation, we multiply the PDE by an arbitrary function $v \in V_0$ and integrate over the domain $\Omega$ leading to

+$$ -\int_{\Omega} \nabla \cdot (k \nabla u) v \, dx = \int_\Omega f v \, dx\quad \forall \; v \in V_0. $$

Then, integration by parts the non-conforming term gives

+$$ \int_{\Omega} k \nabla u \cdot \nabla v \, dx - \int_{\partial \Omega} k \frac{\partial u}{\partial {\bf n} } v \, ds = \int_\Omega f v \, dx \quad \forall \; v \in V_0. $$

Finally by recalling that $ v = 0 $ on $\Gamma_D$ and that $k \frac{\partial u}{\partial {\bf n} } = \sigma $ on $\Gamma_N$, we find the weak formulation:

+

Find $u \in V_{u_0}$ such that +$$ \int_{\Omega} k \nabla u \cdot \nabla v \, dx = \int_\Omega f v \, dx + \int_{\Gamma_N} \sigma v \, ds \quad \forall \; v \in V_0. $$

+ +
+
+
+
+
+
+
+
+

1. Load modules

To start we load the following modules:

+
    +
  • dolfin: the python/C++ interface to FEniCS

    +
  • +
  • math: the python module for mathematical functions

    +
  • +
  • numpy: a python package for linear algebra

    +
  • +
  • matplotlib: a python package used for plotting the results

    +
  • +
+ +
+
+
+
+
+
In [1]:
+
+
+
from dolfin import *
+
+import math
+import numpy as np
+import logging
+
+import matplotlib.pyplot as plt
+%matplotlib inline
+import nb
+
+logging.getLogger('FFC').setLevel(logging.WARNING)
+logging.getLogger('UFL').setLevel(logging.WARNING)
+set_log_active(False)
+
+ +
+
+
+ +
+
+
+
+
+
+

2. Define the mesh and the finite element space

We construct a triangulation (mesh) $\mathcal{T}_h$ of the computational domain $\Omega := [0, 1]^2$ with n elements in each direction.

+

On the mesh $\mathcal{T}_h$, we then define the finite element space $V_h \subset H^1(\Omega)$ consisting of globally continuous piecewise polinomials functions. The degree variable defines the polinomial degree.

+ +
+
+
+
+
+
In [2]:
+
+
+
n = 16
+degree = 1
+mesh = RectangleMesh(0, 0, 1, 1, n, n)
+nb.plot(mesh)
+
+Vh  = FunctionSpace(mesh, 'Lagrange', degree)
+print "dim(Vh) = ", Vh.dim()
+
+ +
+
+
+ +
+
+ + +
+
+
dim(Vh) =  289
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

3. Define boundary labels

To partition the boundary of $\Omega$ in the subdomains $\Gamma_{\rm top}$, $\Gamma_{\rm bottom}$, $\Gamma_{\rm left}$, $\Gamma_{\rm right}$ we assign a unique label boundary_parts to each of part of $\partial \Omega$.

+ +
+
+
+
+
+
In [3]:
+
+
+
class TopBoundary(SubDomain):
+    def inside(self, x, on_boundary):
+        return on_boundary and abs(x[1] - 1) < DOLFIN_EPS
+    
+class BottomBoundary(SubDomain):
+    def inside(self, x, on_boundary):
+        return on_boundary and abs(x[1]) < DOLFIN_EPS
+    
+class LeftBoundary(SubDomain):
+    def inside(self, x, on_boundary):
+        return on_boundary and abs(x[0]) < DOLFIN_EPS
+    
+class RightBoundary(SubDomain):
+    def inside(self, x, on_boundary):
+        return on_boundary and abs(x[0] - 1) < DOLFIN_EPS
+    
+boundary_parts = FacetFunction("size_t", mesh)
+boundary_parts.set_all(0)
+
+Gamma_top = TopBoundary()
+Gamma_top.mark(boundary_parts, 1)
+Gamma_bottom = BottomBoundary()
+Gamma_bottom.mark(boundary_parts, 2)
+Gamma_left = LeftBoundary()
+Gamma_left.mark(boundary_parts, 3)
+Gamma_right = RightBoundary()
+Gamma_right.mark(boundary_parts, 4)
+
+ +
+
+
+ +
+
+
+
+
+
+

4. Define the coefficients of the PDE and the boundary conditions

We first define the coefficients of the PDE using the Constant and Expression classes. Constant is used to define coefficients that do not depend on the space coordinates, Expression is used to define coefficients that are a known function of the space coordinates x[0] (x-axis direction) and x[1] (y-axis direction).

+

In the finite element method community, Dirichlet boundary conditions are also known as essential boundary conditions since they are imposed directly in the definition of the finite element space. In FEniCS, we use the class DirichletBC to indicate this type of condition.

+

On the other hand, Newman boundary conditions are also known as natural boundary conditions since they are weakly imposed as boundary integrals in the variational formulation (weak form). In FEniCS, we create a new boundary measure ds[i] to integrate over the portion of the boundary marked with label i.

+ +
+
+
+
+
+
In [4]:
+
+
+
u_L = Constant(0.)
+u_R = Constant(0.)
+
+sigma_bottom = Expression('-(pi/2.0)*sin(2*pi*x[0])')
+sigma_top    = Expression('0')
+
+f = Expression('(4.0*pi*pi+pi*pi/4.0)*(sin(2*pi*x[0])*sin((pi/2.0)*x[1]))')
+
+bcs = [DirichletBC(Vh, u_L, boundary_parts, 3),
+       DirichletBC(Vh, u_R, boundary_parts, 4)]
+
+ds = Measure("ds", subdomain_data=boundary_parts)
+
+ +
+
+
+ +
+
+
+
+
+
+

5. Define and solve the variational problem

We also define two special types of functions: the TrialFunction u and the TestFunction v. These special types of function are used by FEniCS to generate the finite element vectors and matrices which stem from the weak formulation of the PDE.

+

More specifically, by denoting by $\left[{\phi_i(x)}\right]_{i=1}^{{\rm dim}(V_h)}$ the finite element basis for the space $V_h$, a function $u_h \in V_h$ can be written as +$$ u_h = \sum_{i=1}^{{\rm dim}(V_h)} {\rm u}_i \phi_i(x), $$ +where ${\rm u}_i$ represents the coefficients in the finite element expansion of $u_h$.

+

We then define

+
    +
  • the bilinear form $a(u_h, v_h) = \int_\Omega \nabla u_h \cdot \nabla v_h dx $;

    +
  • +
  • the linear form $L(v_h) = \int_\Omega f v_h dx + + \int_{\Gamma_{\rm top}} \sigma_{\rm top} v_h ds \int_{\Gamma_{\rm bottom}} \sigma_{\rm bottom} v_h ds $.

    +
  • +
+

We can then solve the variational problem

+

Find $u_h \in V_h$ such that +$$ a(u_h, v_h) = L(v_h) \quad \forall\, v_h \in V_h $$

+

using directly the built-in solve method in FEniCS.

+

NOTE: As an alternative one can also assemble the finite element matrix $A$ and the right hand side $b$ that stems from the discretization of $a$ and $L$, and then solve the linear system +$$ A {\rm u} = {\rm b}, $$ +where

+
    +
  • ${\rm u}$ is the vector collecting the coefficient of the finite element expasion of $u_h$,

    +
  • +
  • the entries of the matrix A are such that $A_{ij} = a(\phi_j, \phi_i)$,

    +
  • +
  • the entries of the right hand side b are such that $b_i = L(\phi_i)$.

    +
  • +
+ +
+
+
+
+
+
In [5]:
+
+
+
u = TrialFunction(Vh)
+v = TestFunction(Vh)
+a = inner(nabla_grad(u), nabla_grad(v))*dx
+L = f*v*dx + sigma_top*v*ds(1) + sigma_bottom*v*ds(2)
+
+uh = Function(Vh)
+
+#solve(a == L, uh, bcs=bcs)
+A, b = assemble_system(a,L, bcs=bcs)
+solve(A, uh.vector(), b, "cg")
+
+nb.plot(uh)
+
+ +
+
+
+ +
+
+ + +
Out[5]:
+ + +
+
<matplotlib.collections.TriMesh at 0x11664fd10>
+
+ +
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

6. Compute the discretization error

For this problem, the exact solution is known. +We can therefore compute the following norms of the discretization error (i.e. the of the difference between the finite element solution $u_h$ and the exact solution $u_{\rm ex}$) +$$ \| u_{\rm ex} - u_h \|_{L^2{\Omega}} := \sqrt{ \int_{\Omega} (u_{\rm ex} - u_h)^2 \, dx }, $$ +and +$$ \| u_{\rm ex} - u_h \|_{H^1{\Omega}} := \sqrt{ \int_{\Omega} (u_{\rm ex} - u_h)^2 \, dx + \int_{\Omega} |\nabla u_{\rm ex} - \nabla u_h|^2 \, dx}. $$

+ +
+
+
+
+
+
In [6]:
+
+
+
u_e = Expression('sin(2*pi*x[0])*sin((pi/2.0)*x[1])')
+grad_u_e = Expression( ('2*pi*cos(2*pi*x[0])*sin((pi/2.0)*x[1])', 'pi/2.0*sin(2*pi*x[0])*cos((pi/2.0)*x[1])'))
+
+err_L2 = sqrt( assemble( (uh-u_e)**2*dx ) )
+err_grad = sqrt( assemble( inner(nabla_grad(uh) - grad_u_e, nabla_grad(uh) - grad_u_e)*dx ) )
+err_H1 = sqrt( err_L2**2 + err_grad**2)
+
+print "|| u_h - u_e ||_L2 = ", err_L2
+print "|| u_h - u_e ||_H1 = ", err_H1
+
+ +
+
+
+ +
+
+ + +
+
+
|| u_h - u_e ||_L2 =  0.00842388144799
+|| u_h - u_e ||_H1 =  0.394127114471
+
+
+
+ +
+
+ +
+
+
+
+
+
+

7. Convergence of the finite element method

We now verify numerically a well-known convergence result for the finite element method.

+

Let denote with $s$ the polynomial degree of the finite element space, and assume that the solution $u_{\rm ex}$ is at least in $H^{s+1}(\Omega)$. Then we have +$$ \| u_{\rm ex} - u_h \|_{H^1} \leq C h^{s}, \quad \| u_{\rm ex} - u_h \|_{L^2} \leq C h^{s+1}. $$

+

In the code below, the function compute(n, degree) solves the PDE using a mesh with n elements in each direction and finite element spaces of polinomial order degree.

+

The figure below shows the discretization errors in the $H^1$ and $L^2$ as a function of the mesh size $h$ ($h = \frac{1}{n}$) for piecewise linear (P1, $s=1$) and piecewise quadratic (P2, $s=2$) finite elements. We observe that numerical results are consistent with the finite element convergence theory. In particular:

+
    +
  • for piecewise linear finite element P1 we observe first order convergence in the $H^1$-norm and second order convergence in the $L^2$-norm;

    +
  • +
  • for piecewise quadratic finite element P2 we observe second order convergence in the $H^1$-norm and third order convergence in the $L^2$-norm.

    +
  • +
+ +
+
+
+
+
+
In [7]:
+
+
+
def compute(n, degree):
+    mesh = RectangleMesh(0, 0, 1, 1, n, n)
+    Vh  = FunctionSpace(mesh, 'Lagrange', degree)
+    boundary_parts = FacetFunction("size_t", mesh)
+    boundary_parts.set_all(0)
+    
+    Gamma_top = TopBoundary()
+    Gamma_top.mark(boundary_parts, 1)
+    Gamma_bottom = BottomBoundary()
+    Gamma_bottom.mark(boundary_parts, 2)
+    Gamma_left = LeftBoundary()
+    Gamma_left.mark(boundary_parts, 3)
+    Gamma_right = RightBoundary()
+    Gamma_right.mark(boundary_parts, 4)
+    
+    bcs = [DirichletBC(Vh, u_L, boundary_parts, 3), DirichletBC(Vh, u_R, boundary_parts, 4)]
+    ds = Measure("ds", subdomain_data=boundary_parts)
+    
+    u = TrialFunction(Vh)
+    v = TestFunction(Vh)
+    a = inner(nabla_grad(u), nabla_grad(v))*dx
+    L = f*v*dx + sigma_top*v*ds(1) + sigma_bottom*v*ds(2)
+    uh = Function(Vh)
+    solve(a == L, uh, bcs=bcs)
+    err_L2 = sqrt( assemble( (uh-u_e)**2*dx ) )
+    err_grad = sqrt( assemble( inner(nabla_grad(uh) - grad_u_e, nabla_grad(uh) - grad_u_e)*dx ) )
+    err_H1 = sqrt( err_L2**2 + err_grad**2)
+    
+    return err_L2, err_H1
+
+nref = 5
+n = 8*np.power(2,np.arange(0,nref))
+h = 1./n
+
+err_L2_P1 = np.zeros(nref)
+err_H1_P1 = np.zeros(nref)
+err_L2_P2 = np.zeros(nref)
+err_H1_P2 = np.zeros(nref)
+
+for i in range(nref):
+    err_L2_P1[i], err_H1_P1[i] = compute(n[i], 1)
+    err_L2_P2[i], err_H1_P2[i] = compute(n[i], 2)
+    
+plt.figure(figsize=(15,5))
+
+plt.subplot(121)
+plt.loglog(h, err_H1_P1, '-or')
+plt.loglog(h, err_L2_P1, '-*b')
+plt.loglog(h, h*.5*err_H1_P1[0]/h[0], '--g')
+plt.loglog(h, np.power(h,2)*.5*np.power( err_L2_P1[0]/h[0], 2), '-.k')
+plt.xlabel("Mesh size h")
+plt.ylabel("Error")
+plt.title("P1 Finite Element")
+plt.legend(["H1 error", "L2 error", "First Order", "Second Order"], 'lower right')
+
+
+plt.subplot(122)
+plt.loglog(h, err_H1_P2, '-or')
+plt.loglog(h, err_L2_P2, '-*b')
+plt.loglog(h, np.power(h/h[0],2)*.5*err_H1_P2[0], '--g')
+plt.loglog(h, np.power(h/h[0],3)*.5*err_L2_P2[0], '-.k')
+plt.xlabel("Mesh size h")
+plt.ylabel("Error")
+plt.title("P2 Finite Element")
+plt.legend(["H1 error", "L2 error", "Second Order", "Third Order"], 'lower right')
+
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+ + diff --git a/FEniCS101.ipynb b/FEniCS101.ipynb new file mode 100644 index 0000000..edbeb9e --- /dev/null +++ b/FEniCS101.ipynb @@ -0,0 +1,394 @@ +{ + "metadata": { + "name": "" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# FEniCS101 Tutorial\n", + "\n", + "In this tutorial we consider the boundary value problem (BVP)\n", + "\n", + "\\begin{eqnarray*}\n", + "- \\nabla \\cdot (k \\nabla u) = f & \\text{ in } \\Omega,\\\\\n", + "u = u_0 & \\text{ on } \\Gamma_D = \\Gamma_{\\rm left} \\bigcup \\Gamma_{\\rm right},\\\\\n", + "k \\frac{\\partial u}{\\partial {\\bf{n}}} = \\sigma & \\text{ on } \\Gamma_N = \\Gamma_{\\rm top} \\bigcup \\Gamma_{\\rm bottom},\n", + "\\end{eqnarray*}\n", + "\n", + "where $\\Omega = (0,1) \\times (0,1)$, $\\Gamma_D$ and and $\\Gamma_N$ are the union of\n", + "the left and right, and top and bottom boundaries of $\\Omega$,\n", + "respectively.\n", + "\n", + "Here\n", + "\\begin{eqnarray*}\n", + "k(x,y) = 1 & \\text{ on } \\Omega\\\\\n", + "f(x,y) = \\left(4\\pi^2+\\frac{\\pi^2}{4}\\right)\\sin(2 \\pi x) \\sin\\left(\\frac{\\pi}{2} y\\right) & \\text{ on } \\Omega\\\\\n", + "u_0(x,y) = 0 & \\text{ on } \\Gamma_D, \\\\\n", + "\\sigma(x) = \\left\\{ \\begin{array}{l} -\\frac{\\pi}{2}\\sin(2 \\pi x) \\\\ 0 \\end{array}\\right.\n", + "& \\begin{array}{l} \\text{ on } \\Gamma_{\\rm bottom},\\\\ \\text{ on } \\Gamma_{\\rm top}.\\end{array}\n", + "\\end{eqnarray*}\n", + "\n", + "The exact solution is\n", + "$$ u_e(x,y) = \\sin(2\\pi x)\\sin\\left(\\frac{\\pi}{2}y\\right). $$\n", + "\n", + "## Weak formulation\n", + "\n", + "Let us define the Hilbert spaces $V_{u_0}, V_0 \\in \\Omega$ as\n", + "$$ V_{u_0} := \\left\\{ v \\in H^1(\\Omega) \\text{ s. t. } v = u_0 \\text{ on } \\Gamma_D \\right\\},$$\n", + "$$ V_{0} := \\left\\{ v \\in H^1(\\Omega) \\text{ s. t. } v = 0 \\text{ on } \\Gamma_D \\right\\}.$$\n", + "\n", + "To obtain the weak formulation, we multiply the PDE by an arbitrary function $v \\in V_0$ and integrate over the domain $\\Omega$ leading to\n", + "\n", + "$$ -\\int_{\\Omega} \\nabla \\cdot (k \\nabla u) v \\, dx = \\int_\\Omega f v \\, dx\\quad \\forall \\; v \\in V_0. $$\n", + "\n", + "Then, integration by parts the non-conforming term gives\n", + "\n", + "$$ \\int_{\\Omega} k \\nabla u \\cdot \\nabla v \\, dx - \\int_{\\partial \\Omega} k \\frac{\\partial u}{\\partial {\\bf n} } v \\, ds = \\int_\\Omega f v \\, dx \\quad \\forall \\; v \\in V_0. $$\n", + "\n", + "Finally by recalling that $ v = 0 $ on $\\Gamma_D$ and that $k \\frac{\\partial u}{\\partial {\\bf n} } = \\sigma $ on $\\Gamma_N$, we find the weak formulation:\n", + "\n", + "*Find * $u \\in V_{u_0}$ *such that*\n", + "$$ \\int_{\\Omega} k \\nabla u \\cdot \\nabla v \\, dx = \\int_\\Omega f v \\, dx + \\int_{\\Gamma_N} \\sigma v \\, ds \\quad \\forall \\; v \\in V_0. $$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Load modules\n", + "\n", + "To start we load the following modules:\n", + "\n", + "- dolfin: the python/C++ interface to FEniCS\n", + "\n", + "- [math](https://docs.python.org/2/library/math.html): the python module for mathematical functions\n", + "\n", + "- [numpy](http://www.numpy.org/): a python package for linear algebra\n", + "\n", + "- [matplotlib](http://matplotlib.org/): a python package used for plotting the results" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from dolfin import *\n", + "\n", + "import math\n", + "import numpy as np\n", + "import logging\n", + "\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import nb\n", + "\n", + "logging.getLogger('FFC').setLevel(logging.WARNING)\n", + "logging.getLogger('UFL').setLevel(logging.WARNING)\n", + "set_log_active(False)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Define the mesh and the finite element space\n", + "\n", + "We construct a triangulation (mesh) $\\mathcal{T}_h$ of the computational domain $\\Omega := [0, 1]^2$ with `n` elements in each direction.\n", + "\n", + "On the mesh $\\mathcal{T}_h$, we then define the finite element space $V_h \\subset H^1(\\Omega)$ consisting of globally continuous piecewise polinomials functions. The `degree` variable defines the polinomial degree." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "n = 16\n", + "degree = 1\n", + "mesh = RectangleMesh(0, 0, 1, 1, n, n)\n", + "nb.plot(mesh)\n", + "\n", + "Vh = FunctionSpace(mesh, 'Lagrange', degree)\n", + "print \"dim(Vh) = \", Vh.dim()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Define boundary labels\n", + "\n", + "To partition the boundary of $\\Omega$ in the subdomains $\\Gamma_{\\rm top}$, $\\Gamma_{\\rm bottom}$, $\\Gamma_{\\rm left}$, $\\Gamma_{\\rm right}$ we assign a unique label `boundary_parts` to each of part of $\\partial \\Omega$." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "class TopBoundary(SubDomain):\n", + " def inside(self, x, on_boundary):\n", + " return on_boundary and abs(x[1] - 1) < DOLFIN_EPS\n", + " \n", + "class BottomBoundary(SubDomain):\n", + " def inside(self, x, on_boundary):\n", + " return on_boundary and abs(x[1]) < DOLFIN_EPS\n", + " \n", + "class LeftBoundary(SubDomain):\n", + " def inside(self, x, on_boundary):\n", + " return on_boundary and abs(x[0]) < DOLFIN_EPS\n", + " \n", + "class RightBoundary(SubDomain):\n", + " def inside(self, x, on_boundary):\n", + " return on_boundary and abs(x[0] - 1) < DOLFIN_EPS\n", + " \n", + "boundary_parts = FacetFunction(\"size_t\", mesh)\n", + "boundary_parts.set_all(0)\n", + "\n", + "Gamma_top = TopBoundary()\n", + "Gamma_top.mark(boundary_parts, 1)\n", + "Gamma_bottom = BottomBoundary()\n", + "Gamma_bottom.mark(boundary_parts, 2)\n", + "Gamma_left = LeftBoundary()\n", + "Gamma_left.mark(boundary_parts, 3)\n", + "Gamma_right = RightBoundary()\n", + "Gamma_right.mark(boundary_parts, 4)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Define the coefficients of the PDE and the boundary conditions\n", + "\n", + "We first define the coefficients of the PDE using the `Constant` and `Expression` classes. `Constant` is used to define coefficients that do not depend on the space coordinates, `Expression` is used to define coefficients that are a known function of the space coordinates `x[0]` (x-axis direction) and `x[1]` (y-axis direction).\n", + "\n", + "In the finite element method community, Dirichlet boundary conditions are also known as *essential* boundary conditions since they are imposed directly in the definition of the finite element space. In FEniCS, we use the class `DirichletBC` to indicate this type of condition.\n", + "\n", + "On the other hand, Newman boundary conditions are also known as *natural* boundary conditions since they are weakly imposed as boundary integrals in the variational formulation (weak form). In FEniCS, we create a new boundary measure `ds[i]` to integrate over the portion of the boundary marked with label `i`." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "u_L = Constant(0.)\n", + "u_R = Constant(0.)\n", + "\n", + "sigma_bottom = Expression('-(pi/2.0)*sin(2*pi*x[0])')\n", + "sigma_top = Expression('0')\n", + "\n", + "f = Expression('(4.0*pi*pi+pi*pi/4.0)*(sin(2*pi*x[0])*sin((pi/2.0)*x[1]))')\n", + "\n", + "bcs = [DirichletBC(Vh, u_L, boundary_parts, 3),\n", + " DirichletBC(Vh, u_R, boundary_parts, 4)]\n", + "\n", + "ds = Measure(\"ds\", subdomain_data=boundary_parts)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Define and solve the variational problem\n", + "\n", + "We also define two special types of functions: the `TrialFunction` `u` and the `TestFunction` `v`. These special types of function are used by `FEniCS` to generate the finite element vectors and matrices which stem from the weak formulation of the PDE.\n", + "\n", + "More specifically, by denoting by $\\left[{\\phi_i(x)}\\right]_{i=1}^{{\\rm dim}(V_h)}$ the finite element basis for the space $V_h$, a function $u_h \\in V_h$ can be written as\n", + "$$ u_h = \\sum_{i=1}^{{\\rm dim}(V_h)} {\\rm u}_i \\phi_i(x), $$\n", + "where ${\\rm u}_i$ represents the coefficients in the finite element expansion of $u_h$.\n", + "\n", + "We then define\n", + "\n", + "- the bilinear form $a(u_h, v_h) = \\int_\\Omega \\nabla u_h \\cdot \\nabla v_h dx $;\n", + "\n", + "- the linear form $L(v_h) = \\int_\\Omega f v_h dx + + \\int_{\\Gamma_{\\rm top}} \\sigma_{\\rm top} v_h ds \\int_{\\Gamma_{\\rm bottom}} \\sigma_{\\rm bottom} v_h ds $.\n", + "\n", + "\n", + "We can then solve the variational problem\n", + "\n", + "*Find *$u_h \\in V_h$* such that*\n", + "$$ a(u_h, v_h) = L(v_h) \\quad \\forall\\, v_h \\in V_h $$\n", + "\n", + "using directly the built-in `solve` method in FEniCS.\n", + "\n", + "**NOTE:** As an alternative one can also assemble the finite element matrix $A$ and the right hand side $b$ that stems from the discretization of $a$ and $L$, and then solve the linear system\n", + "$$ A {\\rm u} = {\\rm b}, $$\n", + "where\n", + "\n", + "- ${\\rm u}$ is the vector collecting the coefficient of the finite element expasion of $u_h$,\n", + "\n", + "- the entries of the matrix A are such that $A_{ij} = a(\\phi_j, \\phi_i)$,\n", + "\n", + "- the entries of the right hand side b are such that $b_i = L(\\phi_i)$." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "u = TrialFunction(Vh)\n", + "v = TestFunction(Vh)\n", + "a = inner(nabla_grad(u), nabla_grad(v))*dx\n", + "L = f*v*dx + sigma_top*v*ds(1) + sigma_bottom*v*ds(2)\n", + "\n", + "uh = Function(Vh)\n", + "\n", + "#solve(a == L, uh, bcs=bcs)\n", + "A, b = assemble_system(a,L, bcs=bcs)\n", + "solve(A, uh.vector(), b, \"cg\")\n", + "\n", + "nb.plot(uh)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Compute the discretization error\n", + "\n", + "For this problem, the exact solution is known.\n", + "We can therefore compute the following norms of the discretization error (i.e. the of the difference between the finite element solution $u_h$ and the exact solution $u_{\\rm ex}$)\n", + "$$ \\| u_{\\rm ex} - u_h \\|_{L^2{\\Omega}} := \\sqrt{ \\int_{\\Omega} (u_{\\rm ex} - u_h)^2 \\, dx }, $$ \n", + "and\n", + "$$ \\| u_{\\rm ex} - u_h \\|_{H^1{\\Omega}} := \\sqrt{ \\int_{\\Omega} (u_{\\rm ex} - u_h)^2 \\, dx + \\int_{\\Omega} |\\nabla u_{\\rm ex} - \\nabla u_h|^2 \\, dx}. $$" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "u_e = Expression('sin(2*pi*x[0])*sin((pi/2.0)*x[1])')\n", + "grad_u_e = Expression( ('2*pi*cos(2*pi*x[0])*sin((pi/2.0)*x[1])', 'pi/2.0*sin(2*pi*x[0])*cos((pi/2.0)*x[1])'))\n", + "\n", + "err_L2 = sqrt( assemble( (uh-u_e)**2*dx ) )\n", + "err_grad = sqrt( assemble( inner(nabla_grad(uh) - grad_u_e, nabla_grad(uh) - grad_u_e)*dx ) )\n", + "err_H1 = sqrt( err_L2**2 + err_grad**2)\n", + "\n", + "print \"|| u_h - u_e ||_L2 = \", err_L2\n", + "print \"|| u_h - u_e ||_H1 = \", err_H1" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Convergence of the finite element method\n", + "\n", + "We now verify numerically a well-known convergence result for the finite element method.\n", + "\n", + "Let denote with $s$ the polynomial degree of the finite element space, and assume that the solution $u_{\\rm ex}$ is at least in $H^{s+1}(\\Omega)$. Then we have\n", + "$$ \\| u_{\\rm ex} - u_h \\|_{H^1} \\leq C h^{s}, \\quad \\| u_{\\rm ex} - u_h \\|_{L^2} \\leq C h^{s+1}. $$\n", + "\n", + "In the code below, the function `compute(n, degree)` solves the PDE using a mesh with `n` elements in each direction and finite element spaces of polinomial order `degree`.\n", + "\n", + "The figure below shows the discretization errors in the $H^1$ and $L^2$ as a function of the mesh size $h$ ($h = \\frac{1}{n}$) for piecewise linear (P1, $s=1$) and piecewise quadratic (P2, $s=2$) finite elements. We observe that numerical results are consistent with the finite element convergence theory. In particular:\n", + "\n", + "- for piecewise linear finite element P1 we observe first order convergence in the $H^1$-norm and second order convergence in the $L^2$-norm;\n", + "\n", + "- for piecewise quadratic finite element P2 we observe second order convergence in the $H^1$-norm and third order convergence in the $L^2$-norm." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def compute(n, degree):\n", + " mesh = RectangleMesh(0, 0, 1, 1, n, n)\n", + " Vh = FunctionSpace(mesh, 'Lagrange', degree)\n", + " boundary_parts = FacetFunction(\"size_t\", mesh)\n", + " boundary_parts.set_all(0)\n", + " \n", + " Gamma_top = TopBoundary()\n", + " Gamma_top.mark(boundary_parts, 1)\n", + " Gamma_bottom = BottomBoundary()\n", + " Gamma_bottom.mark(boundary_parts, 2)\n", + " Gamma_left = LeftBoundary()\n", + " Gamma_left.mark(boundary_parts, 3)\n", + " Gamma_right = RightBoundary()\n", + " Gamma_right.mark(boundary_parts, 4)\n", + " \n", + " bcs = [DirichletBC(Vh, u_L, boundary_parts, 3), DirichletBC(Vh, u_R, boundary_parts, 4)]\n", + " ds = Measure(\"ds\", subdomain_data=boundary_parts)\n", + " \n", + " u = TrialFunction(Vh)\n", + " v = TestFunction(Vh)\n", + " a = inner(nabla_grad(u), nabla_grad(v))*dx\n", + " L = f*v*dx + sigma_top*v*ds(1) + sigma_bottom*v*ds(2)\n", + " uh = Function(Vh)\n", + " solve(a == L, uh, bcs=bcs)\n", + " err_L2 = sqrt( assemble( (uh-u_e)**2*dx ) )\n", + " err_grad = sqrt( assemble( inner(nabla_grad(uh) - grad_u_e, nabla_grad(uh) - grad_u_e)*dx ) )\n", + " err_H1 = sqrt( err_L2**2 + err_grad**2)\n", + " \n", + " return err_L2, err_H1\n", + "\n", + "nref = 5\n", + "n = 8*np.power(2,np.arange(0,nref))\n", + "h = 1./n\n", + "\n", + "err_L2_P1 = np.zeros(nref)\n", + "err_H1_P1 = np.zeros(nref)\n", + "err_L2_P2 = np.zeros(nref)\n", + "err_H1_P2 = np.zeros(nref)\n", + "\n", + "for i in range(nref):\n", + " err_L2_P1[i], err_H1_P1[i] = compute(n[i], 1)\n", + " err_L2_P2[i], err_H1_P2[i] = compute(n[i], 2)\n", + " \n", + "plt.figure(figsize=(15,5))\n", + "\n", + "plt.subplot(121)\n", + "plt.loglog(h, err_H1_P1, '-or')\n", + "plt.loglog(h, err_L2_P1, '-*b')\n", + "plt.loglog(h, h*.5*err_H1_P1[0]/h[0], '--g')\n", + "plt.loglog(h, np.power(h,2)*.5*np.power( err_L2_P1[0]/h[0], 2), '-.k')\n", + "plt.xlabel(\"Mesh size h\")\n", + "plt.ylabel(\"Error\")\n", + "plt.title(\"P1 Finite Element\")\n", + "plt.legend([\"H1 error\", \"L2 error\", \"First Order\", \"Second Order\"], 'lower right')\n", + "\n", + "\n", + "plt.subplot(122)\n", + "plt.loglog(h, err_H1_P2, '-or')\n", + "plt.loglog(h, err_L2_P2, '-*b')\n", + "plt.loglog(h, np.power(h/h[0],2)*.5*err_H1_P2[0], '--g')\n", + "plt.loglog(h, np.power(h/h[0],3)*.5*err_L2_P2[0], '-.k')\n", + "plt.xlabel(\"Mesh size h\")\n", + "plt.ylabel(\"Error\")\n", + "plt.title(\"P2 Finite Element\")\n", + "plt.legend([\"H1 error\", \"L2 error\", \"Second Order\", \"Third Order\"], 'lower right')\n", + "\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/FEniCS101.py b/FEniCS101.py new file mode 100644 index 0000000..306e394 --- /dev/null +++ b/FEniCS101.py @@ -0,0 +1,187 @@ +# FEniCS101 Tutorial +# +# In this tutorial we consider the boundary value problem (BVP) +# +# - div (k grad u) = f in Omega, +# u = u0 on Gamma_D = Gamma_left U Gamma_right +# k grad u . n = sigma on Gamma_N = Gamma_top U Gamma_bottom, +# +# where Omega = (0,1)^2, Gamma_D and and Gamma_N are the union of +# the left and right, and top and bottom boundaries of Omega, respectively. +# +# The diffusivity coefficient, forcing term and boundary conditions are chosen +# such that exact solution is +# $$ u_e(x,y) = \sin(2\pi x)\sin\left(\frac{\pi}{2}y\right). $$ + +# 1. Import modules + +from dolfin import * + +import math +import numpy as np +import logging + +import matplotlib.pyplot as plt +import nb + +logging.getLogger('FFC').setLevel(logging.WARNING) +logging.getLogger('UFL').setLevel(logging.WARNING) +set_log_active(False) + +# 2. Define the mesh and the finite element space + +n = 16 +degree = 1 +mesh = RectangleMesh(0, 0, 1, 1, n, n) +nb.plot(mesh) + +Vh = FunctionSpace(mesh, 'Lagrange', degree) +print "dim(Vh) = ", Vh.dim() + +# 3. Define boundary labels + +class TopBoundary(SubDomain): + def inside(self, x, on_boundary): + return on_boundary and abs(x[1] - 1) < DOLFIN_EPS + +class BottomBoundary(SubDomain): + def inside(self, x, on_boundary): + return on_boundary and abs(x[1]) < DOLFIN_EPS + +class LeftBoundary(SubDomain): + def inside(self, x, on_boundary): + return on_boundary and abs(x[0]) < DOLFIN_EPS + +class RightBoundary(SubDomain): + def inside(self, x, on_boundary): + return on_boundary and abs(x[0] - 1) < DOLFIN_EPS + +boundary_parts = FacetFunction("size_t", mesh) +boundary_parts.set_all(0) + +Gamma_top = TopBoundary() +Gamma_top.mark(boundary_parts, 1) +Gamma_bottom = BottomBoundary() +Gamma_bottom.mark(boundary_parts, 2) +Gamma_left = LeftBoundary() +Gamma_left.mark(boundary_parts, 3) +Gamma_right = RightBoundary() +Gamma_right.mark(boundary_parts, 4) + +# 4. Define the coefficients of the PDE and the boundary conditions + + +u_L = Constant(0.) +u_R = Constant(0.) + +sigma_bottom = Expression('-(pi/2.0)*sin(2*pi*x[0])') +sigma_top = Expression('0') + +f = Expression('(4.0*pi*pi+pi*pi/4.0)*(sin(2*pi*x[0])*sin((pi/2.0)*x[1]))') + +bcs = [DirichletBC(Vh, u_L, boundary_parts, 3), + DirichletBC(Vh, u_R, boundary_parts, 4)] + +ds = Measure("ds", subdomain_data=boundary_parts) + + + +# 5. Define and solve the variational problem + +u = TrialFunction(Vh) +v = TestFunction(Vh) +a = inner(nabla_grad(u), nabla_grad(v))*dx +L = f*v*dx + sigma_top*v*ds(1) + sigma_bottom*v*ds(2) + +uh = Function(Vh) + +#solve(a == L, uh, bcs=bcs) +A, b = assemble_system(a,L, bcs=bcs) +solve(A, uh.vector(), b, "cg") + +nb.plot(uh) + +# + +# 6. Compute the discretization error + +u_e = Expression('sin(2*pi*x[0])*sin((pi/2.0)*x[1])') +grad_u_e = Expression( ('2*pi*cos(2*pi*x[0])*sin((pi/2.0)*x[1])', 'pi/2.0*sin(2*pi*x[0])*cos((pi/2.0)*x[1])')) + +err_L2 = sqrt( assemble( (uh-u_e)**2*dx ) ) +err_grad = sqrt( assemble( inner(nabla_grad(uh) - grad_u_e, nabla_grad(uh) - grad_u_e)*dx ) ) +err_H1 = sqrt( err_L2**2 + err_grad**2) + +print "|| u_h - u_e ||_L2 = ", err_L2 +print "|| u_h - u_e ||_H1 = ", err_H1 + +# 7. Convergence of the finite element method + +def compute(n, degree): + mesh = RectangleMesh(0, 0, 1, 1, n, n) + Vh = FunctionSpace(mesh, 'Lagrange', degree) + boundary_parts = FacetFunction("size_t", mesh) + boundary_parts.set_all(0) + + Gamma_top = TopBoundary() + Gamma_top.mark(boundary_parts, 1) + Gamma_bottom = BottomBoundary() + Gamma_bottom.mark(boundary_parts, 2) + Gamma_left = LeftBoundary() + Gamma_left.mark(boundary_parts, 3) + Gamma_right = RightBoundary() + Gamma_right.mark(boundary_parts, 4) + + bcs = [DirichletBC(Vh, u_L, boundary_parts, 3), DirichletBC(Vh, u_R, boundary_parts, 4)] + ds = Measure("ds", subdomain_data=boundary_parts) + + u = TrialFunction(Vh) + v = TestFunction(Vh) + a = inner(nabla_grad(u), nabla_grad(v))*dx + L = f*v*dx + sigma_top*v*ds(1) + sigma_bottom*v*ds(2) + uh = Function(Vh) + solve(a == L, uh, bcs=bcs) + err_L2 = sqrt( assemble( (uh-u_e)**2*dx ) ) + err_grad = sqrt( assemble( inner(nabla_grad(uh) - grad_u_e, nabla_grad(uh) - grad_u_e)*dx ) ) + err_H1 = sqrt( err_L2**2 + err_grad**2) + + return err_L2, err_H1 + +nref = 5 +n = 8*np.power(2,np.arange(0,nref)) +h = 1./n + +err_L2_P1 = np.zeros(nref) +err_H1_P1 = np.zeros(nref) +err_L2_P2 = np.zeros(nref) +err_H1_P2 = np.zeros(nref) + +for i in range(nref): + err_L2_P1[i], err_H1_P1[i] = compute(n[i], 1) + err_L2_P2[i], err_H1_P2[i] = compute(n[i], 2) + +plt.figure(figsize=(15,5)) + +plt.subplot(121) +plt.loglog(h, err_H1_P1, '-or') +plt.loglog(h, err_L2_P1, '-*b') +plt.loglog(h, h*.5*err_H1_P1[0]/h[0], '--g') +plt.loglog(h, np.power(h,2)*.5*np.power( err_L2_P1[0]/h[0], 2), '-.k') +plt.xlabel("Mesh size h") +plt.ylabel("Error") +plt.title("P1 Finite Element") +plt.legend(["H1 error", "L2 error", "First Order", "Second Order"], 'lower right') + + +plt.subplot(122) +plt.loglog(h, err_H1_P2, '-or') +plt.loglog(h, err_L2_P2, '-*b') +plt.loglog(h, np.power(h/h[0],2)*.5*err_H1_P2[0], '--g') +plt.loglog(h, np.power(h/h[0],3)*.5*err_L2_P2[0], '-.k') +plt.xlabel("Mesh size h") +plt.ylabel("Error") +plt.title("P2 Finite Element") +plt.legend(["H1 error", "L2 error", "Second Order", "Third Order"], 'lower right') + +plt.show() + diff --git a/HessianSpectrum_LinearSourceInversion.html b/HessianSpectrum_LinearSourceInversion.html new file mode 100644 index 0000000..95b6e26 --- /dev/null +++ b/HessianSpectrum_LinearSourceInversion.html @@ -0,0 +1,21501 @@ + + + +HessianSpectrum_LinearSourceInversion + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+
+
+

Spectrum of the Reduced Hessian

The linear source inversion problem

We consider the following linear source inversion problem. +Find the state $u \in H^1_{\Gamma_D}(\Omega)$ and the source (parameter) $a \in H^1(\Omega)$ that solves +\begin{align*} +{} & \min_a \frac{1}{2\sigma^2} | Bu - u_d |^2 + \frac{1}{2} \int_\Omega \left[ \delta|a-a_0|^2 + \gamma|\nabla (a - a_0)|^2 \right] dx & {}\\ +{\rm s.t.} & {} &{} \\ +{} & -{\rm div}(k \nabla u) + {\bf v}\cdot \nabla u + cu = a & {\rm in} \; \Omega\\ +{} & u = 0 & {\rm on } \; \Gamma_D\\ +{} & k \frac{\partial u}{\partial n} = 0 & {\rm on } \; \Gamma_N\\ +\end{align*}

+

Here:

+
    +
  • $u_d$ is a $n_{\rm obs}$ finite dimensional vector that denotes noisy observations of the state $u$ in $n_{\rm obs}$ locations $\{ {\bf x}_i\}_{i=1}^{n_{\rm obs}}$. More specifically, $u_d(i) = u_{\rm true}( {\bf x}_i ) + \eta_i$, where $\eta_i$ are i.i.d. $\mathcal{N}(0, \sigma^2)$.

    +
  • +
  • $B: H^1_0(\Omega) \rightarrow \mathbb{R}^{n_{\rm obs}}$ is the linear operator that evaluates the state $u$ at the observation locations $\{ {\bf x}_i\}_{i=1}^{n_{\rm obs}}$.

    +
  • +
  • $\delta$ and $\gamma$ are the parameters of the regularization penalizing the $L^2(\Omega)$ and $H^1(\Omega)$ norm of $a-a_0$, respectively.

    +
  • +
  • $k$, ${\bf v}$, $c$ are given coefficients representing the diffusivity coefficient, the advective velocity and the reaction term, respectively.

    +
  • +
  • $\Gamma_D \subset \partial \Omega$, $\Gamma_N \subset \partial \Omega$ represents the subdomain of $\partial\Omega$ where we impose Dirichlet or Neumann boundary conditions, respectively.

    +
  • +
+ +
+
+
+
+
+
+
+
+

Set up

+
+
+
+
+
+
In [1]:
+
+
+
from dolfin import *
+
+import numpy as np
+import time
+import logging
+
+import matplotlib.pyplot as plt
+%matplotlib inline
+import nb
+
+import sys
+sys.path.append("../../")
+from hippylib import *
+
+start = time.clock()
+
+logging.getLogger('FFC').setLevel(logging.WARNING)
+logging.getLogger('UFL').setLevel(logging.WARNING)
+set_log_active(False)
+
+ +
+
+
+ +
+
+
+
+
+
+

The linear source inversion

+
+
+
+
+
+
In [2]:
+
+
+
def pde_varf(u,a,p):
+    return k*inner(nabla_grad(u), nabla_grad(p))*dx \
+           + inner(nabla_grad(u), v*p)*dx \
+           + c*u*p*dx \
+           - a*p*dx
+
+def u_boundary(x, on_boundary):
+    return on_boundary and x[1] < DOLFIN_EPS
+
+def solve(nx,ny, targets, rel_noise, gamma, delta, verbose=True):
+    np.random.seed(seed=2)
+    mesh = UnitSquareMesh(nx, ny)
+    Vh1 = FunctionSpace(mesh, 'Lagrange', 1)
+    
+    Vh = [Vh1, Vh1, Vh1]
+    if verbose:
+        print "Number of dofs: STATE={0}, PARAMETER={1}, ADJOINT={2}".format(Vh[STATE].dim(), Vh[PARAMETER].dim(), Vh[ADJOINT].dim())
+
+
+    u_bdr = Expression("0.0")
+    u_bdr0 = Expression("0.0")
+    bc = DirichletBC(Vh[STATE], u_bdr, u_boundary)
+    bc0 = DirichletBC(Vh[STATE], u_bdr0, u_boundary)
+
+    atrue = interpolate( Expression("exp( -50*(x[0] - .5)*(x[0] - .5) - 50*(x[1] - .5)*(x[1] - .5))"), Vh[PARAMETER]).vector()
+    a0 = interpolate(Expression("0.0"), Vh[PARAMETER]).vector()
+    
+    pde = PDEVariationalProblem(Vh, pde_varf, bc, bc0)
+ 
+    if verbose:
+        print "Number of observation points: {0}".format(targets.shape[0])
+        
+    misfit = PointwiseStateObservation(Vh[STATE], targets)
+    
+    reg = LaplacianPrior(Vh[PARAMETER], gamma, delta)
+                    
+    #Generate synthetic observations
+    utrue = pde.generate_state()
+    x = [utrue, atrue, None]
+    pde.solveFwd(x[STATE], x, 1e-9)
+    misfit.B.mult(x[STATE], misfit.d)
+    MAX = misfit.d.norm("linf")
+    noise_std_dev = rel_noise * MAX
+    randn_perturb(misfit.d, noise_std_dev)
+    misfit.noise_variance = noise_std_dev*noise_std_dev
+
+    if verbose:
+        plt.figure(figsize=(18,4))
+        nb.plot(Function(Vh[PARAMETER], atrue), mytitle = "True source", subplot_loc=131)
+        nb.plot(Function(Vh[STATE], utrue), mytitle="True state", subplot_loc=132)
+        nb.plot_pts(targets, misfit.d,mytitle="Observations", subplot_loc=133)
+        plt.show()
+    
+    model = Model(pde, reg, misfit)
+    u = model.generate_vector(STATE)
+    a = a0.copy()
+    p = model.generate_vector(ADJOINT)
+    x = [u,a,p]
+    mg = model.generate_vector(PARAMETER)
+    model.solveFwd(u, x)
+    model.solveAdj(p, x)
+    model.evalGradientParameter(x, mg)
+    model.setPointForHessianEvaluations(x)
+
+    H = ReducedHessian(model, 1e-12)
+
+    solver = CGSolverSteihaug()
+    solver.set_operator(H)
+    solver.set_preconditioner( reg.Rsolver )
+    solver.parameters["print_level"] = -1
+    solver.parameters["rel_tolerance"] = 1e-9
+    solver.solve(a, -mg)
+
+    if solver.converged:
+        if verbose:
+            print "CG converged in ", solver.iter, " iterations."
+    else:
+        print "CG did not converged."
+        raise
+
+    model.solveFwd(u, x, 1e-12)
+ 
+    total_cost, reg_cost, misfit_cost = model.cost(x)
+
+    if verbose:
+        plt.figure(figsize=(18,4))
+        nb.plot(Function(Vh[PARAMETER], a), mytitle = "Reconstructed source", subplot_loc=131)
+        nb.plot(Function(Vh[STATE], u), mytitle="Reconstructed state", subplot_loc=132)
+        nb.plot_pts(targets, misfit.B*u - misfit.d, mytitle="Misfit", subplot_loc=133)
+        plt.show()
+
+    H.misfit_only = True
+    k_evec = 80
+    p_evec = 5
+    if verbose:
+        print "Double Pass Algorithm. Requested eigenvectors: {0}; Oversampling {1}.".format(k_evec,p_evec)
+    Omega = np.random.randn(a.array().shape[0], k_evec+p_evec)
+    d, U = doublePassG(H, reg.R, reg.Rsolver, Omega, k_evec)
+
+    if verbose:
+        plt.figure()
+        nb.plot_eigenvalues(d, mytitle="Generalized Eigenvalues")
+        nb.plot_eigenvectors(Vh[PARAMETER], U, mytitle="Eigenvectors", which=[0,1,2,5,10,15])
+        plt.show()
+        
+    return d, U, Vh[PARAMETER], solver.iter
+
+ +
+
+
+ +
+
+
+
+
+
+

Solution of the source inversion problem

+
+
+
+
+
+
In [3]:
+
+
+
ndim = 2
+nx = 32
+ny = 32
+
+ntargets = 300
+np.random.seed(seed=1)
+targets = np.random.uniform(0.1,0.9, [ntargets, ndim] )
+rel_noise = 0.01
+
+gamma = 70.
+delta = 1e-1
+
+k = Expression("1.0")
+v = Expression(("0.0", "0.0"))
+c = Expression("0.")
+
+d, U, Va, nit = solve(nx,ny, targets, rel_noise, gamma, delta)
+
+ +
+
+
+ +
+
+ + +
+
+
Number of dofs: STATE=1089, PARAMETER=1089, ADJOINT=1089
+Number of observation points: 300
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
CG converged in  49  iterations.
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
Double Pass Algorithm. Requested eigenvectors: 80; Oversampling 5.
+
+
+
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

Mesh independence of the spectrum of the preconditioned Hessian

+
+
+
+
+
+
In [4]:
+
+
+
gamma = 70.
+delta = 1e-1
+
+k = Expression("1.0")
+v = Expression(("0.0", "0.0"))
+c = Expression("0.")
+
+n = [16,32,64]
+d1, U1, Va1, niter1 = solve(n[0],n[0], targets, rel_noise, gamma, delta,verbose=False)
+d2, U2, Va2, niter2 = solve(n[1],n[1], targets, rel_noise, gamma, delta,verbose=False)
+d3, U3, Va3, niter3 = solve(n[2],n[2], targets, rel_noise, gamma, delta,verbose=False)
+
+print "Number of Iterations: ", niter1, niter2, niter3
+plt.figure(figsize=(18,4))
+nb.plot_eigenvalues(d1, mytitle="Eigenvalues Mesh {0} by {1}".format(n[0],n[0]), subplot_loc=131)
+nb.plot_eigenvalues(d2, mytitle="Eigenvalues Mesh {0} by {1}".format(n[1],n[1]), subplot_loc=132)
+nb.plot_eigenvalues(d3, mytitle="Eigenvalues Mesh {0} by {1}".format(n[2],n[2]), subplot_loc=133)
+
+nb.plot_eigenvectors(Va1, U1, mytitle="Mesh {0} by {1} Eigen".format(n[0],n[0]), which=[0,1,5])
+nb.plot_eigenvectors(Va2, U2, mytitle="Mesh {0} by {1} Eigen".format(n[1],n[1]), which=[0,1,5])
+nb.plot_eigenvectors(Va3, U3, mytitle="Mesh {0} by {1} Eigen".format(n[2],n[2]), which=[0,1,5])
+
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+
+
Number of Iterations:  43 49 49
+
+
+
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

Dependence on the noise level

We solve the problem for different noise levels. The higher the noise level the more important becomes the effect of the regularization.

+ +
+
+
+
+
+
In [5]:
+
+
+
gamma = 70.
+delta = 1e-1
+
+k = Expression("1.0")
+v = Expression(("0.0", "0.0"))
+c = Expression("0.")
+
+rel_noise = [1e-3,1e-2,1e-1]
+d1, U1, Va1, niter1 = solve(nx,ny, targets, rel_noise[0], gamma, delta,verbose=False)
+d2, U2, Va2, niter2 = solve(nx,ny, targets, rel_noise[1], gamma, delta,verbose=False)
+d3, U3, Va3, niter3 = solve(nx,ny, targets, rel_noise[2], gamma, delta,verbose=False)
+
+print "Number of Iterations: ", niter1, niter2, niter3
+plt.figure(figsize=(18,4))
+nb.plot_eigenvalues(d1, mytitle="Eigenvalues rel_noise {0:g}".format(rel_noise[0]), subplot_loc=131)
+nb.plot_eigenvalues(d2, mytitle="Eigenvalues rel_noise {0:g}".format(rel_noise[1]), subplot_loc=132)
+nb.plot_eigenvalues(d3, mytitle="Eigenvalues rel_noise {0:g}".format(rel_noise[2]), subplot_loc=133)
+
+nb.plot_eigenvectors(Va1, U1, mytitle="rel_noise {0:g} Eigen".format(rel_noise[0]), which=[0,1,5])
+nb.plot_eigenvectors(Va2, U2, mytitle="rel_noise {0:g} Eigen".format(rel_noise[1]), which=[0,1,5])
+nb.plot_eigenvectors(Va3, U3, mytitle="rel_noise {0:g} Eigen".format(rel_noise[2]), which=[0,1,5])
+
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+
+
Number of Iterations:  127 48 18
+
+
+
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

Dependence on the PDE coefficients

Assume a constant reaction term $c = 1$, and we consider different values for the diffusivity coefficient $k$.

+

The smaller the value of $k$ the slower the decay in the spectrum.

+ +
+
+
+
+
+
In [6]:
+
+
+
rel_noise = 0.01
+
+k = Expression("1.0")
+v = Expression(("0.0", "0.0"))
+c = Expression("1.0")
+
+d1, U1, Va1, niter1 = solve(nx,ny, targets, rel_noise, gamma, delta,verbose=False)
+k = Expression("0.1")
+d2, U2, Va2, niter2 = solve(nx,ny, targets, rel_noise, gamma, delta,verbose=False)
+k = Expression("0.01")
+d3, U3, Va3, niter3 = solve(nx,ny, targets, rel_noise, gamma, delta,verbose=False)
+
+print "Number of Iterations: ", niter1, niter2, niter3
+plt.figure(figsize=(18,4))
+nb.plot_eigenvalues(d1, mytitle="Eigenvalues k=1.0", subplot_loc=131)
+nb.plot_eigenvalues(d2, mytitle="Eigenvalues k=0.1", subplot_loc=132)
+nb.plot_eigenvalues(d3, mytitle="Eigenvalues k=0.01", subplot_loc=133)
+
+nb.plot_eigenvectors(Va1, U1, mytitle="k=1. Eigen", which=[0,1,5])
+nb.plot_eigenvectors(Va2, U2, mytitle="k=0.1 Eigen", which=[0,1,5])
+nb.plot_eigenvectors(Va3, U3, mytitle="k=0.01 Eigen", which=[0,1,5])
+
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+
+
Number of Iterations:  61 99 164
+
+
+
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+ + diff --git a/PoissonBayesian.ipynb b/PoissonBayesian.ipynb new file mode 100644 index 0000000..78ae547 --- /dev/null +++ b/PoissonBayesian.ipynb @@ -0,0 +1,506 @@ +{ + "metadata": { + "name": "" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$\\def\\data{\\bf d_\\rm{obs}}\n", + "\\def\\vec{\\bf}\n", + "\\def\\m{\\bf m}\n", + "\\def\\map{\\bf m_{\\text{MAP}}}\n", + "\\def\\postcov{\\bf \\Gamma_{\\text{post}}}\n", + "\\def\\prcov{\\bf \\Gamma_{\\text{prior}}}\n", + "\\def\\matrix{\\bf}\n", + "\\def\\Hmisfit{\\bf H_{\\text{misfit}}}\n", + "\\def\\HT{\\tilde{\\bf H}_{\\text{misfit}}}\n", + "\\def\\diag{diag}\n", + "\\def\\Vr{\\matrix V_r}\n", + "\\def\\Wr{\\matrix W_r}\n", + "\\def\\Ir{\\matrix I_r}\n", + "\\def\\Dr{\\matrix D_r}\n", + "\\def\\H{\\matrix H}\n", + "$$ \n", + "# Example: Bayesian quantification of parameter uncertainty:\n", + "## Estimating the (Gaussian) posterior pdf of the coeffcient parameter field in an elliptic PDE\n", + "\n", + "In this example we tackle the problem of quantifying the\n", + "uncertainty in the solution of an inverse problem governed by an\n", + "elliptic PDE via the Bayesian inference framework. \n", + "Hence, we state the inverse problem as a\n", + "problem of statistical inference over the space of uncertain\n", + "parameters, which are to be inferred from data and a physical\n", + "model. The resulting solution to the statistical inverse problem\n", + "is a posterior distribution that assigns to any candidate set of\n", + "parameter fields our belief (expressed as a probability) that a\n", + "member of this candidate set is the ``true'' parameter field that\n", + "gave rise to the observed data.\n", + "\n", + "For simplicity, in what follows we give finite-dimensional expressions (i.e., after\n", + "discretization of the parameter space) for the Bayesian\n", + "formulation of the inverse problem.\n", + "\n", + "### Bayes' Theorem:\n", + "\n", + "The posterior probability distribution combines the prior pdf\n", + "$\\pi_{\\text{prior}}(\\m)$ over the parameter space, which encodes\n", + "any knowledge or assumptions about the parameter space that we may\n", + "wish to impose before the data are considered, with a likelihood pdf\n", + "$\\pi_{\\text{like}}(\\vec{d}_{\\text{obs}} \\; | \\; \\m)$, which explicitly\n", + "represents the probability that a given set of parameters $\\m$\n", + "might give rise to the observed data $\\vec{d}_{\\text{obs}} \\in\n", + "\\mathbb{R}^m$, namely:\n", + "\n", + "$\n", + "\\begin{align}\n", + "\\pi_{\\text{post}}(\\m | \\data) \\propto\n", + "\\pi_{\\text{prior}}(\\m) \\pi_{\\text{like}}(\\data | \\m).\n", + "\\end{align}\n", + "$\n", + "\n", + "Note that the infinite-dimensional analog of Bayes' formula cannot be stated formulated using pdfs but requires Radon-Nikodym derivatives\n", + "\n", + "### Gaussian prior and noise:\n", + "\n", + "#### The prior:\n", + "\n", + "We consider a Gaussian prior with mean $\\vec m_{\\text prior}$ and covariance $\\bf \\Gamma_{\\text{prior}}$. The covariance is given by the discretization of the inverse of differential operator $\\mathcal{A}^{-2} = (-\\gamma \\Delta + \\delta I)^{-2}$, where $\\gamma$, $\\delta > 0$ control the correlation length and the variance of the prior operator. This choice of prior ensures that it is a trace-class operator, guaranteeing bounded pointwise variance and a well-posed infinite-dimensional Bayesian inverse problem\n", + "\n", + "#### The likelihood:\n", + "\n", + "$\n", + "\\data = \\bf{f}(\\m) + \\bf{e }, \\;\\;\\; \\bf{e} \\sim \\mathcal{N}(\\bf{0}, \\bf \\Gamma_{\\text{noise}} )\n", + "$\n", + "\n", + "$\n", + "\\pi_{\\text like}(\\data \\; | \\; \\m) = \\exp \\left( - \\tfrac{1}{2} (\\bf{f}(\\m) - \\data)^T \\bf \\Gamma_{\\text{noise}}^{-1} (\\bf{f}(\\m) - \\data)\\right)\n", + "$\n", + "\n", + "Here $\\bf f$ is the parameter-to-observable map that takes a parameter vector $\\m$ and maps\n", + "it to the space observation vector $\\data$.\n", + "\n", + "#### The posterior:\n", + "\n", + "$\n", + "\\pi_{\\text{post}}(\\m \\; | \\; \\data) \\propto \\exp \\left( - \\tfrac{1}{2} \\parallel \\bf{f}(\\m) - \\data \\parallel^{2}_{\\bf \\Gamma_{\\text{noise}}^{-1}} \\! - \\tfrac{1}{2}\\parallel \\m - \\m_{\\text prior} \\parallel^{2}_{\\bf \\Gamma_{\\text{prior}}^{-1}} \\right)\n", + "$\n", + "\n", + "### The Gaussian approximation of the posterior: $\\mathcal{N}(\\vec{\\map},\\bf \\Gamma_{\\text{post}})$\n", + "\n", + "The mean of this posterior distribution, $\\vec{\\map}$, is the\n", + "parameter vector maximizing the posterior, and\n", + "is known as the maximum a posteriori (MAP) point. It can be found\n", + "by minimizing the negative log of the posterior, which amounts to\n", + "solving a deterministic inverse problem) with appropriately weighted norms,\n", + "\n", + "$\n", + "\\map := \\underset{\\m}{\\arg \\min} \\; \\mathcal{J}(\\m) \\;:=\\;\n", + "\\Big( \n", + "-\\frac{1}{2} \\| \\bf f(\\m) - \\data \\|^2_{\\bf \\Gamma_{\\text{noise}}^{-1}} \n", + "-\\frac{1}{2} \\| \\m -\\m_{\\text prior} \\|^2_{\\bf \\Gamma_{\\text{prior}}^{-1}} \n", + "\\Big).\n", + "$\n", + "\n", + "The posterior covariance matrix is then given by the inverse of\n", + "the Hessian matrix of $\\mathcal{J}$ at $\\map$, namely\n", + "\n", + "$\n", + "\\bf \\Gamma_{\\text{post}} = \\left(\\Hmisfit(\\map) + \\bf \\Gamma_{\\text{prior}}^{-1} \\right)^{-1} \n", + "= \\left(\\prcov \\Hmisfit + \\matrix{I}\\right)^{-1}\\prcov\n", + "$\n", + "\n", + "#### The prior-preconditioned Hessian of the data misfit:\n", + "\n", + "$\n", + " \\HT := \\prcov \\Hmisfit\n", + "$\n", + "\n", + "#### The generalized eigenvalue problem:\n", + "\n", + "$\n", + " \\Hmisfit \\matrix{W} = \\prcov^{-1} \\matrix{W} \\matrix{\\Lambda},\n", + "$\n", + "\n", + "where $\\matrix{\\Lambda} = diag(\\lambda_i) \\in \\mathbb{R}^{n\\times n}$\n", + "contains the generalized eigenvalues and the columns of $\\matrix W\\in\n", + "\\mathbb R^{n\\times n}$ the generalized eigenvectors such that \n", + "$\\matrix{W}^T \\prcov^{-1} \\matrix{W} = \\matrix{I}$. Defining \n", + "$\\matrix V := \\prcov^{-1}\\matrix W$\n", + "\n", + "$\n", + "\\prcov \\Hmisfit = \\matrix{W} \\matrix{\\Lambda} \\matrix{V}^T.\n", + "$\n", + "\n", + "#### Randomized SVD algorithms to construct the approximate spectral decomposition: \n", + "\n", + "When the generalized eigenvalues $\\{\\lambda_i\\}$ decay rapidly, we can\n", + "extract a low-rank approximation of $\\HT$ by retaining only the $r$\n", + "largest eigenvalues and corresponding eigenvectors,\n", + "\n", + "$\n", + "\\HT \\approx \\matrix{W}_r \\matrix{\\Lambda}_r \\matrix{V}_r^T.\n", + "$\n", + "\n", + "Here, $\\matrix{W}_r \\in \\mathbb{R}^{n\\times r}$ contains only the $r$\n", + "eigenvectors of $\\HT$ that correspond to the $r$ largest eigenvalues,\n", + "which are assembled into the diagonal matrix $\\matrix{\\Lambda}_r = \\diag\n", + "(\\lambda_i) \\in \\mathbb{R}^{r \\times r}$, and\n", + "$\\matrix{V}_r=\\prcov^{-1} \\matrix{W}_r$.\n", + "\n", + "#### Invert with the Sherman-Morrison-Woodbury formula:\n", + "\n", + "$$\n", + "\\begin{align}\n", + " \\notag \\left(\\HT+ \\matrix{I}\\right)^{-1}\n", + " = \\matrix{I}-\\matrix{W}_r \\matrix{D}_r \\matrix{V}_r^T +\n", + " \\mathcal{O}\\left(\\sum_{i=r+1}^{n} \\frac{\\lambda_i}{\\lambda_i +\n", + " 1}\\right),\n", + "\\end{align}\n", + "$$\n", + "\n", + "where $\\matrix{D}_r :=\\diag(\\lambda_i/(\\lambda_i+1)) \\in\n", + "\\mathbb{R}^{r\\times r}$. The last term in this expression captures the\n", + "error due to truncation in terms of the discarded eigenvalues; this\n", + "provides a criterion for truncating the spectrum, namely that $r$ is\n", + "chosen such that $\\lambda_r$ is small relative to 1. \n", + "\n", + "#### The approximate posterior covariance:\n", + "\n", + "$$\n", + "\\postcov \\approx (\\matrix{I} - \\matrix{W}_r \\matrix{D}_r\n", + "\\matrix{V}_r^T) \\prcov = \n", + "\\prcov\n", + "- \\matrix{W}_r \\matrix{D}_r \\matrix{W}_r^T\n", + "$$\n", + "\n", + "#### Apply the inverse and square-root inverse Hessian to a vector (as needed for drawing samples from a Gaussian distribution with covariance $\\H^{-1}$)\n", + "\n", + "$$\n", + " \\H^{-1} \\bf v \\approx ( \\matrix{I}-\\Wr \\Dr \\Vr^T)\n", + " \\prcov \\bf v = \\big\\{ \\bf W_r \\big[ (\\matrix{\\Lambda}_r +\n", + " \\bf I_r)^{-1} - \\bf I_r \\big] \\Vr^T + \\bf I \\big\\} \\prcov \\bf v\n", + "$$\n", + " \n", + "$$\n", + " \\H^{-1/2} \\bf v \\approx \\big\\{ \\Wr \\big[ (\\matrix{\\Lambda}_r +\n", + " \\Ir)^{-1/2} - \\Ir \\big] \\Vr^T + \\bf I \\big\\} \\prcov^{1/2}\\bf v\n", + "$$\n", + "\n", + "### This tutorial shows:\n", + "\n", + "- convergence of the inexact Newton-CG algorithm\n", + "- low-rank-based approximation of the posterior covariance (built on a low-rank\n", + "approximation of the Hessian of the data misfit) \n", + "- how to construct the low-rank approximation of the Hessian of the data misfit\n", + "- how to apply the inverse and square-root inverse Hessian to a vector efficiently\n", + "- samples from the Gaussian approximation of the posterior\n", + "\n", + "### Goals:\n", + "\n", + "By the end of this notebook, you should be able to:\n", + "\n", + "- understand the Bayesian inverse framework\n", + "- visualise and understand the results\n", + "- modify the problem and code\n", + "\n", + "### Mathematical tools used:\n", + "\n", + "- Finite element method\n", + "- Derivation of gradiant and Hessian via the adjoint method\n", + "- inexact Newton-CG\n", + "- Armijo line search\n", + "- Bayes' formula\n", + "\n", + "### List of software used:\n", + "\n", + "- FEniCS, a parallel finite element element library for the discretization of partial differential equations\n", + "- PETSc, for scalable and efficient linear algebra operations and solvers\n", + "- Matplotlib, A great python package that I used for plotting many of the results\n", + "- Numpy, A python package for linear algebra. While extensive, this is mostly used to compute means and sums in this notebook." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import dolfin as dl\n", + "import sys\n", + "sys.path.append( \"../\" )\n", + "from hippylib import *\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "from PoissonModel import Poisson\n", + "\n", + "import nb\n", + "\n", + "import logging\n", + "logging.getLogger('FFC').setLevel(logging.WARNING)\n", + "logging.getLogger('UFL').setLevel(logging.WARNING)\n", + "dl.set_log_active(False)\n", + "\n", + "#uncomment this to visualize a list of all the methods available \n", + "#help(Poisson)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Set up the mesh and finite element spaces\n", + "\n", + "We compute a two dimensional mesh of a unit square with nx by ny elements.\n", + "We define a P2 finite element space for the *state* and *adjoint* variable and P1 for the *parameter*." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ndim = 2\n", + "nx = 64\n", + "ny = 64\n", + "mesh = dl.UnitSquareMesh(nx, ny)\n", + "Vh2 = dl.FunctionSpace(mesh, 'Lagrange', 2)\n", + "Vh1 = dl.FunctionSpace(mesh, 'Lagrange', 1)\n", + "Vh = [Vh2, Vh1, Vh2]\n", + "print \"Number of dofs: STATE={0}, PARAMETER={1}, ADJOINT={2}\".format(Vh[STATE].dim(), Vh[PARAMETER].dim(), Vh[ADJOINT].dim())" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Set up the location of observation, Prior Information, and model\n", + "\n", + "We observe at *ntargets* random locations. *rel_noise* is the signal to noise ratio." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ntargets = 300\n", + "np.random.seed(seed=1)\n", + "targets = np.random.uniform(0.1,0.9, [ntargets, ndim] )\n", + "print \"Number of observation points: {0}\".format(ntargets)\n", + "\n", + "gamma = 2\n", + "delta = 5\n", + "prior = BiLaplacianPrior(Vh[PARAMETER], gamma, delta)\n", + " \n", + "print \"Prior regularization: (delta - gamma*Laplacian)^2: delta={0}, gamma={1}\".format(delta, gamma) \n", + " \n", + "atrue_expression = dl.Expression('log(2+7*(pow(pow(x[0] - 0.5,2) + pow(x[1] - 0.5,2),0.5) > 0.2)) - log(10)')\n", + "prior_mean_expression = dl.Expression('log(9) - log(10)')\n", + " \n", + "atrue = dl.interpolate(atrue_expression, Vh[PARAMETER]).vector()\n", + "prior.mean = dl.interpolate(prior_mean_expression, Vh[PARAMETER]).vector()\n", + "\n", + "objs = [dl.Function(Vh[PARAMETER],atrue), dl.Function(Vh[PARAMETER],prior.mean)]\n", + "mytitles = [\"True Parameter\", \"Prior mean\"]\n", + "nb.multi1_plot(objs, mytitles)\n", + "plt.show()\n", + " \n", + "rel_noise = 0.01\n", + "model = Poisson(mesh, Vh, atrue, targets, prior, rel_noise)\n", + "\n", + "utrue = model.generate_vector(STATE)\n", + "model.solveFwd(utrue, [utrue, atrue])\n", + "\n", + "vmax = max( utrue.max(), model.u_o.max() )\n", + "vmin = min( utrue.min(), model.u_o.min() )\n", + "\n", + "plt.figure(figsize=(15,5))\n", + "nb.plot(dl.Function(Vh[STATE], utrue), mytitle=\"True State\", subplot_loc=121, vmin=vmin, vmax=vmax)\n", + "nb.plot_pts(targets, model.u_o, mytitle=\"Observations\", subplot_loc=122, vmin=vmin, vmax=vmax)\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Test the gradient and the Hessian of the model\n", + "\n", + "We test the gradient and the Hessian of the model using finite differences." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "a0 = dl.interpolate(dl.Expression(\"sin(x[0])\"), Vh[PARAMETER])\n", + "modelVerify(model, a0.vector(), 1e-4, 1e-6)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Compute the MAP point\n", + "\n", + "We used the globalized Newtown-CG method to compute the MAP point." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "a0 = prior.mean.copy()\n", + "solver = ReducedSpaceNewtonCG(model)\n", + "solver.parameters[\"rel_tolerance\"] = 1e-9\n", + "solver.parameters[\"abs_tolerance\"] = 1e-12\n", + "solver.parameters[\"max_iter\"] = 25\n", + "solver.parameters[\"inner_rel_tolerance\"] = 1e-15\n", + "solver.parameters[\"c_armijo\"] = 1e-4\n", + "solver.parameters[\"GN_iter\"] = 5\n", + " \n", + "x = solver.solve(a0)\n", + " \n", + "if solver.converged:\n", + " print \"\\nConverged in \", solver.it, \" iterations.\"\n", + "else:\n", + " print \"\\nNot Converged\"\n", + "\n", + "print \"Termination reason: \", solver.termination_reasons[solver.reason]\n", + "print \"Final gradient norm: \", solver.final_grad_norm\n", + "print \"Final cost: \", solver.final_cost\n", + "\n", + "plt.figure(figsize=(15,5))\n", + "nb.plot(dl.Function(Vh[STATE], x[STATE]), subplot_loc=121,mytitle=\"State\")\n", + "nb.plot(dl.Function(Vh[PARAMETER], x[PARAMETER]), subplot_loc=122,mytitle=\"Parameter\")\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Compute the low rank Gaussian approximation of the posterior\n", + "We used the *single pass* algorithm to compute a low-rank decomposition of the Hessian Misfit.\n", + "In particular, we solve\n", + "\n", + "$$ H_{\\rm misfit} u = \\lambda R u. $$\n", + "\n", + "The Figure shows the largest *k* generalized eigenvectors of the Hessian misfit.\n", + "The effective rank of the Hessian misfit is the number of eigenvalues above the red line (y=1).\n", + "The effective rank is independent of the mesh size." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "model.setPointForHessianEvaluations(x)\n", + "Hmisfit = ReducedHessian(model, solver.parameters[\"inner_rel_tolerance\"], gauss_newton_approx=False, misfit_only=True)\n", + "k = 20\n", + "p = 10\n", + "print \"Single/Double Pass Algorithm. Requested eigenvectors: {0}; Oversampling {1}.\".format(k,p)\n", + "Omega = np.random.randn(x[PARAMETER].array().shape[0], k+p)\n", + "#d, U = singlePassG(Hmisfit, prior.R, prior.Rsolver, Omega, k)\n", + "d, U = doublePassG(Hmisfit, prior.R, prior.Rsolver, Omega, k)\n", + "\n", + "posterior = GaussianLRPosterior(prior, d, U)\n", + "posterior.mean = x[PARAMETER]\n", + "\n", + "#d2, U2 = singlePass(Hmisfit, Omega, k)\n", + "\n", + "plt.plot(range(0,k), d, 'b*', range(0,k+1), np.ones(k+1), '-r')\n", + "plt.yscale('log')\n", + "plt.xlabel('number')\n", + "plt.ylabel('eigenvalue')\n", + "\n", + "nb.plot_eigenvectors(Vh[PARAMETER], U, mytitle=\"Eigenvector\", which=[0,1,2,5,10,15])" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prior and posterior pointwise variance fields" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "compute_trace = False\n", + "if compute_trace:\n", + " post_tr, prior_tr, corr_tr = posterior.trace(method=\"Estimator\", tol=5e-2, min_iter=20, max_iter=200)\n", + " print \"Posterior trace {0:5e}; Prior trace {1:5e}; Correction trace {2:5e}\".format(post_tr, prior_tr, corr_tr)\n", + "post_pw_variance, pr_pw_variance, corr_pw_variance = posterior.pointwise_variance(\"Exact\")\n", + "\n", + "objs = [dl.Function(Vh[PARAMETER], pr_pw_variance),\n", + " dl.Function(Vh[PARAMETER], post_pw_variance)]\n", + "mytitles = [\"Prior variance\", \"Posterior variance\"]\n", + "nb.multi1_plot(objs, mytitles, logscale=True)\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Generate samples from Prior and Posterior" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "nsamples = 5\n", + "noise = dl.Vector()\n", + "posterior.init_vector(noise,\"noise\")\n", + "noise_size = noise.array().shape[0]\n", + "s_prior = dl.Function(Vh[PARAMETER], name=\"sample_prior\")\n", + "s_post = dl.Function(Vh[PARAMETER], name=\"sample_post\")\n", + "\n", + "range_pr = 2*math.sqrt( pr_pw_variance.max() )\n", + "ps_max = 2*math.sqrt( post_pw_variance.max() ) + posterior.mean.max()\n", + "ps_min = -2*math.sqrt( post_pw_variance.max() ) + posterior.mean.min()\n", + "\n", + "for i in range(nsamples):\n", + " noise.set_local( np.random.randn( noise_size ) )\n", + " posterior.sample(noise, s_prior.vector(), s_post.vector())\n", + " plt.figure(figsize=(15,5))\n", + " nb.plot(s_prior, subplot_loc=121,mytitle=\"Prior sample\", vmin=-range_pr, vmax=range_pr)\n", + " nb.plot(s_post, subplot_loc=122,mytitle=\"Posterior sample\", vmin=ps_min, vmax=ps_max)\n", + " plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/PoissonDeterministic-InexactNewton.html b/PoissonDeterministic-InexactNewton.html new file mode 100644 index 0000000..f3269c9 --- /dev/null +++ b/PoissonDeterministic-InexactNewton.html @@ -0,0 +1,38336 @@ + + + +PoissonDeterministic-InexactNewton + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+
+
+

Coefficient field inversion in an elliptic partial differential equation

We consider the estimation of a coefficient in an elliptic partial +differential equation as a model problem. Depending on the +interpretation of the unknowns and the type of measurements, this +model problem arises, for instance, in inversion for groundwater flow +or heat conductivity. It can also be interpreted as finding a +membrane with a certain spatially varying stiffness. Let +$\Omega\subset\mathbb{R}^n$, $n\in\{1,2,3\}$ be an open, bounded +domain and consider the following problem:

+$$ +\min_{a} J(a):=\frac{1}{2}\int_\Omega (u-u_d)^2\, dx + \frac{\gamma}{2}\int_\Omega|\nabla a|^2\,dx, +$$

where $u$ is the solution of

+$$ +\begin{split} +\quad -\nabla\cdot(\exp(a)\nabla u) &= f \text{ in }\Omega,\\ +u &= 0 \text{ on }\partial\Omega. +\end{split} +$$

Here $a\in U_{ad}:=\{a\in H^1(\Omega) \bigcap L^{\infty}(\Omega)\}$ the unknown coefficient field, $u_d$ denotes (possibly noisy) data, $f\in H^{-1}(\Omega)$ a given force, and $\gamma\ge 0$ the regularization parameter.

+

The variational (or weak) form of the state equation:

Find $u\in H_0^1(\Omega)$ such that $(\exp(a)\nabla u,\nabla v) - (f,v) = 0, \text{ for all } v\in H_0^1(\Omega),$ +where $H_0^1(\Omega)$ is the space of functions vanishing on $\partial\Omega$ with square integrable derivatives. Here, $(\cdot\,,\cdot)$ denotes the $L^2$-inner product, i.e, for scalar functions $u,v$ defined on $\Omega$ we denote $(u,v) := \int_\Omega u(x) v(x) \,dx$.

+

Optimality System:

The Lagrangian functional $\mathscr{L}:H^1(\Omega)\times H_0^1(\Omega)\times H_0^1(\Omega)\rightarrow \mathbb{R}$, which we use as a tool to derive the optimality system, is given by

+$$ +\mathscr{L}(a,u,p):= \frac{1}{2}(u-u_d,u-u_d) + +\frac{\gamma}{2}(\nabla a, \nabla a) + (\exp(a)\nabla u,\nabla p) - (f,p). +$$

The Lagrange multiplier theory shows that, at a solution all variations of the Lagrangian functional with respect to all variables must vanish. These variations of $\mathscr{L}$ with respect to $(p,u,a)$ in directions $(\tilde{u}, \tilde{p}, \tilde{a})$ are given by

+$$ + \begin{alignat}{2} + \mathscr{L}_p(a,u,p)(\tilde{p}) &= (\exp(a)\nabla u, \nabla \tilde{p}) - + (f,\tilde{p}) &&= 0,\\ + \mathscr{L}_u(a,u,p)(\tilde{u}) &= (\exp(a)\nabla p, \nabla \tilde{u}) + + (u-u_d,\tilde{u}) && = 0,\\ + \mathscr{L}_a(a,u,p)(\tilde{a}) &= \gamma(\nabla a, \nabla \tilde{a}) + + (\tilde{a}\exp(a)\nabla u, \nabla p) &&= 0, + \end{alignat} +$$

where the variations $(\tilde{u}, \tilde{p}, \tilde{a})$ are taken from the same spaces as $(u,p,a)$.

+

The gradient of the cost functional $\mathcal{J}(a)$ therefore is

+$$ + \mathcal{G}(a)(\tilde a) = \gamma(\nabla a, \nabla \tilde{a}) + + (\tilde{a}\exp(a)\nabla u, \nabla \tilde{p}). +$$

Inexact Newton-CG:

Newton's method requires second-order variational derivatives of the Lagrangian . Written in abstract form, it computes an update direction $(\hat a_k, \hat u_k,\hat p_k)$ from the following Newton step for the Lagrangian functional:

+$$ +\mathscr{L}''(a_k, u_k, p_k)\left[(\tilde + a, \tilde u, \tilde p),(\hat a_k, \hat u_k, \hat p_k)\right] = +-\mathscr{L}'(a_k,u_k,p_k)(\tilde a, \tilde u, \tilde p), +$$

for all variations $(\tilde a, \tilde u, \tilde p)$, where $\mathscr{L}'$ and $\mathscr{L}''$ denote the first and +second variations of the Lagrangian. For the elliptic parameter inversion problem, this Newton step (written in variatonal form) is as follows: Find $(\hat u_k, \hat a_k,\hat p_k)$ as the solution of the linear system

+$$ + \begin{array}{llll} + (\hat{u}_k, \tilde u) &+ (\hat{a}_k \exp(a_k)\nabla p_k, \nabla + \tilde u) &+ (\exp(a_k) \nabla \tilde u, + \nabla \hat p_k) &= (u_d - u_k, \tilde u)- (\exp(a_k) \nabla + p_k, \nabla \tilde u)\\ + (\tilde a \exp(a_k) \nabla \hat u_k, \nabla p_k) &+ \gamma + (\nabla \hat a_k, \nabla \tilde a) + (\tilde a \hat a_k \exp(a_k)\nabla u, \nabla p) &+ (\tilde a + \exp(a_k) \nabla u_k, \nabla \hat p_k) &= - \gamma (\nabla a_k, \nabla\tilde a) - (\tilde + a \exp(a_k) \nabla u_k, \nabla p_k)\\ + (\exp(a_k) \nabla \hat u_k, \nabla \tilde p) &+ (\hat a_k \exp(a_k) \nabla u_k, \nabla + \tilde p) & &= - (\exp(a_k) \nabla u_k, + \nabla \tilde p) + (f, \tilde p), + \end{array} +$$

for all $(\tilde u, \tilde a, \tilde p)$.

+

Discrete Newton system:

$ +\def\tu{\tilde u} +\def\btu{\bf \tilde u} +\def\ta{\tilde a} +\def\bta{\bf \tilde a} +\def\tp{\tilde p} +\def\btp{\bf \tilde p} +\def\hu{\hat u} +\def\bhu{\bf \hat u} +\def\ha{\hat a} +\def\bha{\bf \hat a} +\def\hp{\hat p} +\def\bhp{\bf \hat p} +$ +The discretized Newton step: denote the vectors corresponding to the discretization of the functions $\ha_k,\hu_k, \hp_k$ by $\bf \bha_k, \bhu_k$ and $\bhp_k$. Then, the discretization of the above system is given by the following symmetric linear system:

+$$ + \begin{bmatrix} + \bf W_{\scriptsize\mbox{uu}} & \bf W_{\scriptsize\mbox{ua}} & \bf A^T \\ + \bf W_{\scriptsize\mbox{au}} & \bf R + \bf R_{\scriptsize\mbox{aa}}& \bf C^T \\ + \bf A & \bf C & 0 +\end{bmatrix} +\left[ + \begin{array}{c} + \bhu_k \\ + \bha_k \\ + \bhp_k + \end{array} \right] = +-\left[ + \begin{array}{ccc} + \bf{g}_u\\ + \bf{g}_a\\ + \bf{g}_p +\end{array} + \right], +$$

where $\bf W_{\scriptsize \mbox{uu}}$, $\bf W_{\scriptsize\mbox{ua}}$, $\bf W_{\scriptsize\mbox{au}}$, and $\bf R$ are the components of the Hessian matrix of the Lagrangian, $\bf A$ and $\bf C$ are the Jacobian of the state equation with respect to the state and the control variables, respectively and $\bf g_u$, $\bf g_a$, and $\bf g_p$ are the discrete gradients of the Lagrangian with respect to $\bf u $, $\bf a$ and $\bf p$, respectively.

+

Reduced Hessian apply:

To eliminate the incremental state and adjoint variables, $\bhu_k$ and $\bhp_k$, from the first and last equations we use

+$$ +\begin{align} +\bhu_k &= -\bf A^{-1} \bf C \, \bha_k,\\ +\bhp_k &= -\bf A^{-T} (\bf W_{\scriptsize\mbox{uu}} \bhu_k + +\bf W_{\scriptsize\mbox{ua}}\,\bha_k). +\end{align} +$$

This results in the following reduced linear system for the Newton step

+$$ + \bf H \, \bha_k = -\bf{g}_a, +$$

with the reduced Hessian $\bf H$ applied to a vector $\bha$ given by

+$$ + \bf H \bha = \underbrace{(\bf R + \bf R_{\scriptsize\mbox{aa}})}_{\text{Hessian of the regularization}} \bha + + \underbrace{(\bf C^{T}\bf A^{-T} (\bf W_{\scriptsize\mbox{uu}} + \bf A^{-1} \bf C - \bf W_{\scriptsize\mbox{ua}}) - + \bf W_{\scriptsize\mbox{au}} \bf A^{-1} + \bf C)}_{\text{Hessian of the data misfit}}\;\bha. +$$

Goals:

By the end of this notebook, you should be able to:

+
    +
  • solve the forward and adjoint Poisson equations
  • +
  • understand the inverse method framework
  • +
  • visualise and understand the results
  • +
  • modify the problem and code
  • +
+

Mathematical tools used:

    +
  • Finite element method
  • +
  • Derivation of gradiant and Hessian via the adjoint method
  • +
  • inexact Newton-CG
  • +
  • Armijo line search
  • +
+

List of software used:

    +
  • FEniCS, a parallel finite element element library for the discretization of partial differential equations
  • +
  • PETSc, for scalable and efficient linear algebra operations and solvers
  • +
  • Matplotlib, a python package used for plotting the results
  • +
  • Numpy, a python package for linear algebra
  • +
+ +
+
+
+
+
+
+
+
+

Set up

Import dependencies

+
+
+
+
+
+
In [1]:
+
+
+
from dolfin import *
+
+import numpy as np
+import time
+import logging
+
+import matplotlib.pyplot as plt
+%matplotlib inline
+import nb
+
+start = time.clock()
+
+logging.getLogger('FFC').setLevel(logging.WARNING)
+logging.getLogger('UFL').setLevel(logging.WARNING)
+set_log_active(False)
+
+np.random.seed(seed=1)
+
+ +
+
+
+ +
+
+
+
+
+
+

Model set up:

As in the introduction, the first thing we need to do is set up the numerical model. In this cell, we set the mesh, the finite element functions $u, p, g$ corresponding to state, adjoint and coefficient/gradient variables, and the corresponding test functions and the parameters for the optimization.

+ +
+
+
+
+
+
In [2]:
+
+
+
# create mesh and define function spaces
+nx = 64
+ny = 64
+mesh = UnitSquareMesh(nx, ny)
+Va = FunctionSpace(mesh, 'Lagrange', 1)
+Vu = FunctionSpace(mesh, 'Lagrange', 2)
+
+# The true and inverted parameter
+atrue = interpolate(Expression('log(2 + 7*(pow(pow(x[0] - 0.5,2) + pow(x[1] - 0.5,2),0.5) > 0.2))'),Va)
+a = interpolate(Expression("log(2.0)"),Va)
+
+# define function for state and adjoint
+u = Function(Vu)
+p = Function(Vu)
+
+# define Trial and Test Functions
+u_trial, p_trial, a_trial = TrialFunction(Vu), TrialFunction(Vu), TrialFunction(Va)
+u_test, p_test, a_test = TestFunction(Vu), TestFunction(Vu), TestFunction(Va)
+
+# initialize input functions
+f = Constant("1.0")
+u0 = Constant("0.0")
+
+
+# plot
+plt.figure(figsize=(15,5))
+nb.plot(mesh,subplot_loc=121, mytitle="Mesh", show_axis='on')
+nb.plot(atrue,subplot_loc=122, mytitle="True parameter field")
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
In [3]:
+
+
+
# set up dirichlet boundary conditions
+def boundary(x,on_boundary):
+    return on_boundary
+
+bc_state = DirichletBC(Vu, u0, boundary)
+bc_adj = DirichletBC(Vu, Constant(0.), boundary)
+
+ +
+
+
+ +
+
+
+
+
+
+

Set up synthetic observations:

    +
  • Propose a coefficient field $a_{\text true}$ shown above
  • +
  • The weak form of the pde: + Find $u\in H_0^1(\Omega)$ such that $\underbrace{(\exp(a_{\text true})\nabla u,\nabla v)}_{\; := \; a_{pde}} - \underbrace{(f,v)}_{\; := \;L_{pde}} = 0, \text{ for all } v\in H_0^1(\Omega)$.

    +
  • +
  • Perturb the solution: $u = u + \eta$, where $\eta \sim \mathcal{N}(0, \sigma)$

    +
  • +
+ +
+
+
+
+
+
In [4]:
+
+
+
# noise level
+noise_level = 0.05
+
+# weak form for setting up the synthetic observations
+a_goal = inner(exp(atrue) * nabla_grad(u_trial), nabla_grad(u_test)) * dx
+L_goal = f * u_test * dx
+
+# solve the forward/state problem to generate synthetic observations
+goal_A, goal_b = assemble_system(a_goal, L_goal, bc_state)
+
+utrue = Function(Vu)
+solve(goal_A, utrue.vector(), goal_b)
+
+ud = Function(Vu)
+ud.assign(utrue)
+
+# perturb state solution and create synthetic measurements ud
+# ud = u + ||u||/SNR * random.normal
+MAX = ud.vector().norm("linf")
+noise = Vector()
+goal_A.init_vector(noise,1)
+noise.set_local( noise_level * MAX * np.random.normal(0, 1, len(ud.vector().array())) )
+bc_adj.apply(noise)
+
+ud.vector().axpy(1., noise)
+
+# plot
+nb.multi1_plot([utrue, ud], ["State solution with atrue", "Synthetic observations"])
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

The cost function evaluation:

$$ +J(a):=\underbrace{\frac{1}{2}\int_\Omega (u-u_d)^2\, dx}_{\text misfit} + \underbrace{\frac{\gamma}{2}\int_\Omega|\nabla a|^2\,dx}_{\text reg} +$$

In the code below, $W$ and $R$ are symmetric positive definite matrices that stem from finite element discretization of the misfit and regularization component of the cost functional, respectively.

+ +
+
+
+
+
+
In [5]:
+
+
+
# Regularization parameter
+gamma = 1e-8
+
+# weak for for setting up the misfit and regularization compoment of the cost
+W_equ   = inner(u_trial, u_test) * dx
+R_equ   = gamma * inner(nabla_grad(a_trial), nabla_grad(a_test)) * dx
+
+W = assemble(W_equ)
+R = assemble(R_equ)
+
+# Define cost function
+def cost(u, ud, a, W, R):
+    diff = u.vector() - ud.vector()
+    reg = 0.5 * a.vector().inner(R*a.vector() ) 
+    misfit = 0.5 * diff.inner(W * diff)
+    return [reg + misfit, misfit, reg]
+
+ +
+
+
+ +
+
+
+
+
+
+

Setting up the state equations, right hand side for the adjoint and the necessary matrices:

$$ + \begin{array}{llll} + (\hat{u}_k, \tilde u) &+ (\hat{a}_k \exp(a_k)\nabla p_k, \nabla + \tilde u) &+ (\exp(a_k) \nabla \tilde u, + \nabla \hat p_k) &= (u_d - u_k, \tilde u)- (\exp(a_k) \nabla + p_k, \nabla \tilde u)\\ + (\tilde a \exp(a_k) \nabla \hat u_k, \nabla p_k) &+ \gamma + (\nabla \hat a_k, \nabla \tilde a) + (\tilde a \hat a_k \exp(a_k)\nabla u, \nabla p) &+ (\tilde a + \exp(a_k) \nabla u_k, \nabla \hat p_k) &= - \gamma (\nabla a_k, \nabla\tilde a) - (\tilde + a \exp(a_k) \nabla u_k, \nabla p_k)\\ + (\exp(a_k) \nabla \hat u_k, \nabla \tilde p) &+ (\hat a_k \exp(a_k) \nabla u_k, \nabla + \tilde p) & &= - (\exp(a_k) \nabla u_k, + \nabla \tilde p) + (f, \tilde p), + \end{array} +$$ +
+
+
+
+
+
In [6]:
+
+
+
# weak form for setting up the state equation
+a_state = inner(exp(a) * nabla_grad(u_trial), nabla_grad(u_test)) * dx
+L_state = f * u_test * dx
+
+# weak form for setting up the adjoint equation
+a_adj = inner(exp(a) * nabla_grad(p_trial), nabla_grad(p_test)) * dx
+L_adj = -inner(u - ud, p_test) * dx
+
+# weak form for setting up matrices
+Wua_equ = inner(exp(a) * a_trial * nabla_grad(p_test), nabla_grad(p)) * dx
+C_equ   = inner(exp(a) * a_trial * nabla_grad(u), nabla_grad(u_test)) * dx
+Raa_equ = inner(exp(a) * a_trial * a_test *  nabla_grad(u),  nabla_grad(p)) * dx
+
+M_equ   = inner(a_trial, a_test) * dx
+
+# assemble matrix M
+M = assemble(M_equ)
+
+ +
+
+
+ +
+
+
+
+
+
+

Initial guess

We solve the state equation and compute the cost functional for the initial guess of the parameter a_ini

+ +
+
+
+
+
+
In [7]:
+
+
+
# solve state equation
+state_A, state_b = assemble_system (a_state, L_state, bc_state)
+solve (state_A, u.vector(), state_b)
+
+# evaluate cost
+[cost_old, misfit_old, reg_old] = cost(u, ud, a, W, R)
+
+# plot
+plt.figure(figsize=(15,5))
+nb.plot(a,subplot_loc=121, mytitle="a_ini", vmin=atrue.vector().min(), vmax=atrue.vector().max())
+nb.plot(u,subplot_loc=122, mytitle="u(a_ini)")
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

The reduced Hessian apply to a vector v:

Here we describe how to apply the reduced Hessian operator to a vector v. For an opportune choice of the regularization, the reduced Hessian operator evaluated in a neighborhood of the solution is positive define, whereas far from the solution the reduced Hessian may be indefinite. On the constrary, the Gauss-Newton approximation of the Hessian is always positive defined.

+

For this reason, it is beneficial to perform a few initial Gauss-Newton steps (5 in this particular example) to accelerate the convergence of the inexact Newton-CG algorithm.

+

The Hessian apply reads: +$$ +\begin{align} +\bhu &= -\bf A^{-1} \bf C \bf v\, & \text{linearized forward}\\ +\bhp &= -\bf A^{-T} (\bf W_{\scriptsize\mbox{uu}} \bhu + +\bf W_{\scriptsize\mbox{ua}}\,\bha) & \text{adjoint}\\ +\bf H \bf v &= (\bf R + \bf R_{\scriptsize\mbox{aa}})\bf v + \bf C^T \bhp + \bf W_{\scriptsize\mbox{au}} \bhu. +\end{align} +$$

+

The Gauss-Newton Hessian apply is obtained by dropping the second derivatives operators $\bf W_{\scriptsize\mbox{ua}}\,\bha$, $\bf R_{\scriptsize\mbox{aa}}\bf v$, and $\bf W_{\scriptsize\mbox{au}} \bhu$: +$$ +\begin{align} +\bhu &= -\bf A^{-1} \bf C \bf v\, & \text{linearized forward}\\ +\bhp &= -\bf A^{-T} \bf W_{\scriptsize\mbox{uu}} \bhu & \text{adjoint}\\ +\bf H_{\rm GN} \bf v &= \bf R \bf v + \bf C^T \bhp. +\end{align} +$$

+ +
+
+
+
+
+
In [8]:
+
+
+
# define (Gauss-Newton) Hessian apply H * v
+def Hess_GN (v, R, C, A, adj_A, W):
+    rhs = -(C * v)
+    bc_adj.apply(rhs)
+    solve (A, du, rhs)
+    rhs = - (W * du)
+    bc_adj.apply(rhs)
+    solve (adj_A, dp, rhs)
+    CT_dp = Vector()
+    C.init_vector(CT_dp, 1)
+    C.transpmult(dp, CT_dp)
+    H_V = R * v + CT_dp
+    return H_V
+
+# define (Newton) Hessian apply H * v
+def Hess_Newton (v, R, Raa, C, A, adj_A, W, Wua):
+    rhs = -(C * v)
+    bc_adj.apply(rhs)
+    solve (A, du, rhs)
+    rhs = -(W * du) -  Wua * v
+    bc_adj.apply(rhs)
+    solve (adj_A, dp, rhs)
+    CT_dp = Vector()
+    C.init_vector(CT_dp, 1)
+    C.transpmult(dp, CT_dp)
+    Wua_du = Vector()
+    Wua.init_vector(Wua_du, 1)
+    Wua.transpmult(du, Wua_du)
+    H_V = R*v + Raa*v + CT_dp + Wua_du
+    return H_V
+
+# Creat Class MyLinearOperator to perform Hessian function
+class MyLinearOperator(LinearOperator):
+    cgiter = 0
+    def __init__(self, R, Raa, C, A, adj_A, W, Wua):
+        LinearOperator.__init__(self, a_delta, a_delta)
+        self.R = R
+        self.Raa = Raa
+        self.C = C
+        self.A = A
+        self.adj_A = adj_A
+        self.W = W
+        self.Wua = Wua
+
+    # Hessian performed on x, output as generic vector y
+    def mult(self, x, y):
+        self.cgiter += 1
+        y.zero()
+        if iter <= 6:
+            y.axpy(1., Hess_GN (x, self.R, self.C, self.A, self.adj_A, self.W) )
+        else:
+            y.axpy(1., Hess_Newton (x, self.R, self.Raa, self.C, self.A, self.adj_A, self.W, self.Wua) )
+
+ +
+
+
+ +
+
+
+
+
+
+

The inexact Newton-CG optimization with Armijo line search:

We solve the constrained optimization problem using the inexact Newton-CG method with Armijo line search.

+

The stopping criterion is based on a relative reduction of the norm of the gradient (i.e. $\frac{\|g_{n}\|}{\|g_{0}\|} \leq \tau$).

+

First, we compute the gradient by solving the state and adjoint equation for the current parameter $a$, and then substituing the current state $u$, parameter $a$ and adjoint $p$ variables in the weak form expression of the gradient: +$$ (g, \tilde{a}) = \gamma(\nabla a, \nabla \tilde{a}) +(\tilde{a}\nabla u, \nabla p).$$

+

Then, we compute the Newton direction $\delta a$ by iteratively solving ${\bf H} {\delta a} = - {\bf g}$. +The Newton system is solved inexactly by early termination of conjugate gradient iterations via Eisenstat–Walker (to prevent oversolving) and Steihaug (to avoid negative curvature) criteria.

+

Finally, the Armijo line search uses backtracking to find $\alpha$ such that a sufficient reduction in the cost functional is achieved. +More specifically, we use backtracking to find $\alpha$ such that: +$$J( a + \alpha \delta a ) \leq J(a) + \alpha c_{\rm armijo} (\delta a,g). $$

+ +
+
+
+
+
+
In [9]:
+
+
+
# define parameters for the optimization
+tol = 1e-8
+c = 1e-4
+maxiter = 12
+plot_on = True
+
+# initialize iter counters
+iter = 1
+total_cg_iter = 0
+converged = False
+
+# initializations
+g, a_delta = Vector(), Vector()
+R.init_vector(a_delta,0)
+R.init_vector(g,0)
+
+du, dp = Vector(), Vector()
+W.init_vector(du,1)
+W.init_vector(dp,0)
+
+a_prev, a_diff = Function(Va), Function(Va)
+
+print "Nit   CGit   cost          misfit        reg           sqrt(-G*D)    ||grad||       alpha  tolcg"
+
+while iter <  maxiter and not converged:
+
+    # assemble matrix C
+    C =  assemble(C_equ)
+
+    # solve the adoint problem
+    adjoint_A, adjoint_RHS = assemble_system(a_adj, L_adj, bc_adj)
+    solve(adjoint_A, p.vector(), adjoint_RHS)
+
+    # assemble W_ua and R
+    Wua = assemble (Wua_equ)
+    Raa = assemble (Raa_equ)
+
+    # evaluate the  gradient
+    CT_p = Vector()
+    C.init_vector(CT_p,1)
+    C.transpmult(p.vector(), CT_p)
+    MG = CT_p + R * a.vector()
+    solve(M, g, MG)
+
+    # calculate the norm of the gradient
+    grad2 = g.inner(MG)
+    gradnorm = sqrt(grad2)
+
+    # set the CG tolerance (use Eisenstat–Walker termination criterion)
+    if iter == 1:
+        gradnorm_ini = gradnorm
+    tolcg = min(0.5, sqrt(gradnorm/gradnorm_ini))
+
+    # define the Hessian apply operator (with preconditioner)
+    Hess_Apply = MyLinearOperator(R, Raa, C, state_A, adjoint_A, W, Wua )
+    P = R + gamma * M
+    solver = PETScKrylovSolver("cg", "amg")
+    solver.set_operators(Hess_Apply, P)
+    solver.parameters["relative_tolerance"] = tolcg
+    #solver.parameters["error_on_nonconvergence"] = False
+    solver.parameters["nonzero_initial_guess"] = False
+
+    # solve the Newton system H a_delta = - MG
+    solver.solve(a_delta, -MG)
+    total_cg_iter += Hess_Apply.cgiter
+    
+    # linesearch
+    alpha = 1
+    descent = 0
+    no_backtrack = 0
+    a_prev.assign(a)
+    while descent == 0 and no_backtrack < 10:
+        a.vector().axpy(alpha, a_delta )
+
+        # solve the state/forward problem
+        state_A, state_b = assemble_system(a_state, L_state, bc_state)
+        solve(state_A, u.vector(), state_b)
+
+        # evaluate cost
+        [cost_new, misfit_new, reg_new] = cost(u, ud, a, W, R)
+
+        # check if Armijo conditions are satisfied
+        if cost_new < cost_old + alpha * c * MG.inner(a_delta):
+            cost_old = cost_new
+            descent = 1
+        else:
+            no_backtrack += 1
+            alpha *= 0.5
+            a.assign(a_prev)  # reset a
+
+    # calculate sqrt(-G * D)
+    graddir = sqrt(- MG.inner(a_delta) )
+
+    sp = ""
+    print "%2d %2s %2d %3s %8.5e %1s %8.5e %1s %8.5e %1s %8.5e %1s %8.5e %1s %5.2f %1s %5.3e" % \
+        (iter, sp, Hess_Apply.cgiter, sp, cost_new, sp, misfit_new, sp, reg_new, sp, \
+         graddir, sp, gradnorm, sp, alpha, sp, tolcg)
+
+    if plot_on:
+        nb.multi1_plot([a,u,p], ["a","u","p"], same_colorbar=False)
+        plt.show()
+    
+    # check for convergence
+    if gradnorm < tol and iter > 1:
+        converged = True
+        print "Newton's method converged in ",iter,"  iterations"
+        print "Total number of CG iterations: ", total_cg_iter
+        
+    iter += 1
+    
+if not converged:
+    print "Newton's method did not converge in ", maxiter, " iterations"
+
+print "Time elapsed: ", time.clock()-start
+
+ +
+
+
+ +
+
+ + +
+
+
Nit   CGit   cost          misfit        reg           sqrt(-G*D)    ||grad||       alpha  tolcg
+ 1     1     1.12741e-05   1.12740e-05   5.75936e-12   1.56536e-02   3.79427e-04    1.00   5.000e-01
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
 2     1     7.82535e-07   7.82519e-07   1.58332e-11   4.68280e-03   5.35275e-05    1.00   3.756e-01
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
 3     1     3.13333e-07   3.13312e-07   2.11376e-11   9.71692e-04   7.15226e-06    1.00   1.373e-01
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
 4     2     2.17811e-07   2.03078e-07   1.47325e-08   4.17187e-04   1.01345e-06    1.00   5.168e-02
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
 5     1     2.14705e-07   1.99965e-07   1.47398e-08   7.88224e-05   6.69907e-07    1.00   4.202e-02
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
 6     7     1.81553e-07   1.46430e-07   3.51226e-08   2.39438e-04   4.43884e-07    1.00   3.420e-02
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
 7     1     1.81335e-07   1.46206e-07   3.51285e-08   2.08560e-05   1.50796e-07    1.00   1.994e-02
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
 8    10     1.80335e-07   1.38570e-07   4.17649e-08   4.57338e-05   7.61066e-08    1.00   1.416e-02
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
 9     3     1.80332e-07   1.38803e-07   4.15283e-08   2.72301e-06   7.83729e-09    1.00   4.545e-03
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
Newton's method converged in  9   iterations
+Total number of CG iterations:  27
+Time elapsed:  24.951243
+
+
+
+ +
+
+ +
+
+
+
In [10]:
+
+
+
nb.multi1_plot([atrue, a], ["atrue", "a"])
+nb.multi1_plot([u,p], ["u","p"], same_colorbar=False)
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+ + diff --git a/PoissonDeterministic-InexactNewton.ipynb b/PoissonDeterministic-InexactNewton.ipynb new file mode 100644 index 0000000..a207885 --- /dev/null +++ b/PoissonDeterministic-InexactNewton.ipynb @@ -0,0 +1,715 @@ +{ + "metadata": { + "name": "" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Coefficient field inversion in an elliptic partial differential equation\n", + "\n", + "We consider the estimation of a coefficient in an elliptic partial\n", + "differential equation as a model problem. Depending on the\n", + "interpretation of the unknowns and the type of measurements, this\n", + "model problem arises, for instance, in inversion for groundwater flow\n", + "or heat conductivity. It can also be interpreted as finding a\n", + "membrane with a certain spatially varying stiffness. Let\n", + "$\\Omega\\subset\\mathbb{R}^n$, $n\\in\\{1,2,3\\}$ be an open, bounded\n", + "domain and consider the following problem:\n", + "\n", + "$$\n", + "\\min_{a} J(a):=\\frac{1}{2}\\int_\\Omega (u-u_d)^2\\, dx + \\frac{\\gamma}{2}\\int_\\Omega|\\nabla a|^2\\,dx,\n", + "$$\n", + "\n", + "where $u$ is the solution of\n", + "\n", + "$$\n", + "\\begin{split}\n", + "\\quad -\\nabla\\cdot(\\exp(a)\\nabla u) &= f \\text{ in }\\Omega,\\\\\n", + "u &= 0 \\text{ on }\\partial\\Omega.\n", + "\\end{split}\n", + "$$\n", + "\n", + "Here $a\\in U_{ad}:=\\{a\\in H^1(\\Omega) \\bigcap L^{\\infty}(\\Omega)\\}$ the unknown coefficient field, $u_d$ denotes (possibly noisy) data, $f\\in H^{-1}(\\Omega)$ a given force, and $\\gamma\\ge 0$ the regularization parameter.\n", + "\n", + "### The variational (or weak) form of the state equation:\n", + "\n", + "Find $u\\in H_0^1(\\Omega)$ such that $(\\exp(a)\\nabla u,\\nabla v) - (f,v) = 0, \\text{ for all } v\\in H_0^1(\\Omega),$\n", + "where $H_0^1(\\Omega)$ is the space of functions vanishing on $\\partial\\Omega$ with square integrable derivatives. Here, $(\\cdot\\,,\\cdot)$ denotes the $L^2$-inner product, i.e, for scalar functions $u,v$ defined on $\\Omega$ we denote $(u,v) := \\int_\\Omega u(x) v(x) \\,dx$.\n", + "\n", + "### Optimality System:\n", + "\n", + "The Lagrangian functional $\\mathscr{L}:H^1(\\Omega)\\times H_0^1(\\Omega)\\times H_0^1(\\Omega)\\rightarrow \\mathbb{R}$, which we use as a tool to derive the optimality system, is given by\n", + "\n", + "$$\n", + "\\mathscr{L}(a,u,p):= \\frac{1}{2}(u-u_d,u-u_d) +\n", + "\\frac{\\gamma}{2}(\\nabla a, \\nabla a) + (\\exp(a)\\nabla u,\\nabla p) - (f,p).\n", + "$$\n", + "\n", + "The Lagrange multiplier theory shows that, at a solution all variations of the Lagrangian functional with respect to all variables must vanish. These variations of $\\mathscr{L}$ with respect to $(p,u,a)$ in directions $(\\tilde{u}, \\tilde{p}, \\tilde{a})$ are given by\n", + "\n", + "$$\n", + " \\begin{alignat}{2}\n", + " \\mathscr{L}_p(a,u,p)(\\tilde{p}) &= (\\exp(a)\\nabla u, \\nabla \\tilde{p}) -\n", + " (f,\\tilde{p}) &&= 0,\\\\\n", + " \\mathscr{L}_u(a,u,p)(\\tilde{u}) &= (\\exp(a)\\nabla p, \\nabla \\tilde{u}) +\n", + " (u-u_d,\\tilde{u}) && = 0,\\\\\n", + " \\mathscr{L}_a(a,u,p)(\\tilde{a}) &= \\gamma(\\nabla a, \\nabla \\tilde{a}) +\n", + " (\\tilde{a}\\exp(a)\\nabla u, \\nabla p) &&= 0,\n", + " \\end{alignat}\n", + "$$\n", + "\n", + "where the variations $(\\tilde{u}, \\tilde{p}, \\tilde{a})$ are taken from the same spaces as $(u,p,a)$. \n", + "\n", + "The gradient of the cost functional $\\mathcal{J}(a)$ therefore is\n", + "\n", + "$$\n", + " \\mathcal{G}(a)(\\tilde a) = \\gamma(\\nabla a, \\nabla \\tilde{a}) +\n", + " (\\tilde{a}\\exp(a)\\nabla u, \\nabla \\tilde{p}).\n", + "$$\n", + "\n", + "### Inexact Newton-CG:\n", + "\n", + "Newton's method requires second-order variational derivatives of the Lagrangian . Written in abstract form, it computes an update direction $(\\hat a_k, \\hat u_k,\\hat p_k)$ from the following Newton step for the Lagrangian functional:\n", + "\n", + "$$\n", + "\\mathscr{L}''(a_k, u_k, p_k)\\left[(\\tilde\n", + " a, \\tilde u, \\tilde p),(\\hat a_k, \\hat u_k, \\hat p_k)\\right] =\n", + "-\\mathscr{L}'(a_k,u_k,p_k)(\\tilde a, \\tilde u, \\tilde p),\n", + "$$\n", + "\n", + "for all variations $(\\tilde a, \\tilde u, \\tilde p)$, where $\\mathscr{L}'$ and $\\mathscr{L}''$ denote the first and\n", + "second variations of the Lagrangian. For the elliptic parameter inversion problem, this Newton step (written in variatonal form) is as follows: Find $(\\hat u_k, \\hat a_k,\\hat p_k)$ as the solution of the linear system\n", + "\n", + "$$\n", + " \\begin{array}{llll}\n", + " (\\hat{u}_k, \\tilde u) &+ (\\hat{a}_k \\exp(a_k)\\nabla p_k, \\nabla\n", + " \\tilde u) &+ (\\exp(a_k) \\nabla \\tilde u,\n", + " \\nabla \\hat p_k) &= (u_d - u_k, \\tilde u)- (\\exp(a_k) \\nabla\n", + " p_k, \\nabla \\tilde u)\\\\\n", + " (\\tilde a \\exp(a_k) \\nabla \\hat u_k, \\nabla p_k) &+ \\gamma\n", + " (\\nabla \\hat a_k, \\nabla \\tilde a) + (\\tilde a \\hat a_k \\exp(a_k)\\nabla u, \\nabla p) &+ (\\tilde a\n", + " \\exp(a_k) \\nabla u_k, \\nabla \\hat p_k) &= - \\gamma (\\nabla a_k, \\nabla\\tilde a) - (\\tilde\n", + " a \\exp(a_k) \\nabla u_k, \\nabla p_k)\\\\\n", + " (\\exp(a_k) \\nabla \\hat u_k, \\nabla \\tilde p) &+ (\\hat a_k \\exp(a_k) \\nabla u_k, \\nabla\n", + " \\tilde p) & &= - (\\exp(a_k) \\nabla u_k,\n", + " \\nabla \\tilde p) + (f, \\tilde p),\n", + " \\end{array}\n", + "$$\n", + "\n", + "for all $(\\tilde u, \\tilde a, \\tilde p)$.\n", + "\n", + "### Discrete Newton system:\n", + "$\n", + "\\def\\tu{\\tilde u}\n", + "\\def\\btu{\\bf \\tilde u}\n", + "\\def\\ta{\\tilde a}\n", + "\\def\\bta{\\bf \\tilde a}\n", + "\\def\\tp{\\tilde p}\n", + "\\def\\btp{\\bf \\tilde p}\n", + "\\def\\hu{\\hat u}\n", + "\\def\\bhu{\\bf \\hat u}\n", + "\\def\\ha{\\hat a}\n", + "\\def\\bha{\\bf \\hat a}\n", + "\\def\\hp{\\hat p}\n", + "\\def\\bhp{\\bf \\hat p}\n", + "$\n", + "The discretized Newton step: denote the vectors corresponding to the discretization of the functions $\\ha_k,\\hu_k, \\hp_k$ by $\\bf \\bha_k, \\bhu_k$ and $\\bhp_k$. Then, the discretization of the above system is given by the following symmetric linear system:\n", + "\n", + "$$\n", + " \\begin{bmatrix}\n", + " \\bf W_{\\scriptsize\\mbox{uu}} & \\bf W_{\\scriptsize\\mbox{ua}} & \\bf A^T \\\\\n", + " \\bf W_{\\scriptsize\\mbox{au}} & \\bf R + \\bf R_{\\scriptsize\\mbox{aa}}& \\bf C^T \\\\\n", + " \\bf A & \\bf C & 0\n", + "\\end{bmatrix}\n", + "\\left[\n", + " \\begin{array}{c}\n", + " \\bhu_k \\\\\n", + " \\bha_k \\\\\n", + " \\bhp_k\n", + " \\end{array} \\right] =\n", + "-\\left[\n", + " \\begin{array}{ccc}\n", + " \\bf{g}_u\\\\\n", + " \\bf{g}_a\\\\\n", + " \\bf{g}_p\n", + "\\end{array}\n", + " \\right],\n", + "$$\n", + "\n", + "where $\\bf W_{\\scriptsize \\mbox{uu}}$, $\\bf W_{\\scriptsize\\mbox{ua}}$, $\\bf W_{\\scriptsize\\mbox{au}}$, and $\\bf R$ are the components of the Hessian matrix of the Lagrangian, $\\bf A$ and $\\bf C$ are the Jacobian of the state equation with respect to the state and the control variables, respectively and $\\bf g_u$, $\\bf g_a$, and $\\bf g_p$ are the discrete gradients of the Lagrangian with respect to $\\bf u $, $\\bf a$ and $\\bf p$, respectively.\n", + "\n", + "### Reduced Hessian apply:\n", + "\n", + "To eliminate the incremental state and adjoint variables, $\\bhu_k$ and $\\bhp_k$, from the first and last equations we use\n", + "\n", + "$$\n", + "\\begin{align}\n", + "\\bhu_k &= -\\bf A^{-1} \\bf C \\, \\bha_k,\\\\\n", + "\\bhp_k &= -\\bf A^{-T} (\\bf W_{\\scriptsize\\mbox{uu}} \\bhu_k +\n", + "\\bf W_{\\scriptsize\\mbox{ua}}\\,\\bha_k).\n", + "\\end{align}\n", + "$$\n", + "\n", + "This results in the following reduced linear system for the Newton step\n", + "\n", + "$$\n", + " \\bf H \\, \\bha_k = -\\bf{g}_a,\n", + "$$\n", + "\n", + "with the reduced Hessian $\\bf H$ applied to a vector $\\bha$ given by\n", + "\n", + "$$\n", + " \\bf H \\bha = \\underbrace{(\\bf R + \\bf R_{\\scriptsize\\mbox{aa}})}_{\\text{Hessian of the regularization}} \\bha +\n", + " \\underbrace{(\\bf C^{T}\\bf A^{-T} (\\bf W_{\\scriptsize\\mbox{uu}}\n", + " \\bf A^{-1} \\bf C - \\bf W_{\\scriptsize\\mbox{ua}}) -\n", + " \\bf W_{\\scriptsize\\mbox{au}} \\bf A^{-1}\n", + " \\bf C)}_{\\text{Hessian of the data misfit}}\\;\\bha.\n", + "$$\n", + "\n", + "### Goals:\n", + "\n", + "By the end of this notebook, you should be able to:\n", + "\n", + "- solve the forward and adjoint Poisson equations\n", + "- understand the inverse method framework\n", + "- visualise and understand the results\n", + "- modify the problem and code\n", + "\n", + "### Mathematical tools used:\n", + "\n", + "- Finite element method\n", + "- Derivation of gradiant and Hessian via the adjoint method\n", + "- inexact Newton-CG\n", + "- Armijo line search\n", + "\n", + "### List of software used:\n", + "\n", + "- FEniCS, a parallel finite element element library for the discretization of partial differential equations\n", + "- PETSc, for scalable and efficient linear algebra operations and solvers\n", + "- Matplotlib, a python package used for plotting the results\n", + "- Numpy, a python package for linear algebra" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up\n", + "\n", + "### Import dependencies" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from dolfin import *\n", + "\n", + "import numpy as np\n", + "import time\n", + "import logging\n", + "\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import nb\n", + "\n", + "start = time.clock()\n", + "\n", + "logging.getLogger('FFC').setLevel(logging.WARNING)\n", + "logging.getLogger('UFL').setLevel(logging.WARNING)\n", + "set_log_active(False)\n", + "\n", + "np.random.seed(seed=1)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model set up:\n", + "\n", + "As in the introduction, the first thing we need to do is set up the numerical model. In this cell, we set the mesh, the finite element functions $u, p, g$ corresponding to state, adjoint and coefficient/gradient variables, and the corresponding test functions and the parameters for the optimization." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# create mesh and define function spaces\n", + "nx = 64\n", + "ny = 64\n", + "mesh = UnitSquareMesh(nx, ny)\n", + "Va = FunctionSpace(mesh, 'Lagrange', 1)\n", + "Vu = FunctionSpace(mesh, 'Lagrange', 2)\n", + "\n", + "# The true and inverted parameter\n", + "atrue = interpolate(Expression('log(2 + 7*(pow(pow(x[0] - 0.5,2) + pow(x[1] - 0.5,2),0.5) > 0.2))'),Va)\n", + "a = interpolate(Expression(\"log(2.0)\"),Va)\n", + "\n", + "# define function for state and adjoint\n", + "u = Function(Vu)\n", + "p = Function(Vu)\n", + "\n", + "# define Trial and Test Functions\n", + "u_trial, p_trial, a_trial = TrialFunction(Vu), TrialFunction(Vu), TrialFunction(Va)\n", + "u_test, p_test, a_test = TestFunction(Vu), TestFunction(Vu), TestFunction(Va)\n", + "\n", + "# initialize input functions\n", + "f = Constant(\"1.0\")\n", + "u0 = Constant(\"0.0\")\n", + "\n", + "\n", + "# plot\n", + "plt.figure(figsize=(15,5))\n", + "nb.plot(mesh,subplot_loc=121, mytitle=\"Mesh\", show_axis='on')\n", + "nb.plot(atrue,subplot_loc=122, mytitle=\"True parameter field\")\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# set up dirichlet boundary conditions\n", + "def boundary(x,on_boundary):\n", + " return on_boundary\n", + "\n", + "bc_state = DirichletBC(Vu, u0, boundary)\n", + "bc_adj = DirichletBC(Vu, Constant(0.), boundary)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up synthetic observations:\n", + "\n", + "- Propose a coefficient field $a_{\\text true}$ shown above\n", + "- The weak form of the pde: \n", + " Find $u\\in H_0^1(\\Omega)$ such that $\\underbrace{(\\exp(a_{\\text true})\\nabla u,\\nabla v)}_{\\; := \\; a_{pde}} - \\underbrace{(f,v)}_{\\; := \\;L_{pde}} = 0, \\text{ for all } v\\in H_0^1(\\Omega)$.\n", + "\n", + "- Perturb the solution: $u = u + \\eta$, where $\\eta \\sim \\mathcal{N}(0, \\sigma)$" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# noise level\n", + "noise_level = 0.05\n", + "\n", + "# weak form for setting up the synthetic observations\n", + "a_goal = inner(exp(atrue) * nabla_grad(u_trial), nabla_grad(u_test)) * dx\n", + "L_goal = f * u_test * dx\n", + "\n", + "# solve the forward/state problem to generate synthetic observations\n", + "goal_A, goal_b = assemble_system(a_goal, L_goal, bc_state)\n", + "\n", + "utrue = Function(Vu)\n", + "solve(goal_A, utrue.vector(), goal_b)\n", + "\n", + "ud = Function(Vu)\n", + "ud.assign(utrue)\n", + "\n", + "# perturb state solution and create synthetic measurements ud\n", + "# ud = u + ||u||/SNR * random.normal\n", + "MAX = ud.vector().norm(\"linf\")\n", + "noise = Vector()\n", + "goal_A.init_vector(noise,1)\n", + "noise.set_local( noise_level * MAX * np.random.normal(0, 1, len(ud.vector().array())) )\n", + "bc_adj.apply(noise)\n", + "\n", + "ud.vector().axpy(1., noise)\n", + "\n", + "# plot\n", + "nb.multi1_plot([utrue, ud], [\"State solution with atrue\", \"Synthetic observations\"])\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The cost function evaluation:\n", + "\n", + "$$\n", + "J(a):=\\underbrace{\\frac{1}{2}\\int_\\Omega (u-u_d)^2\\, dx}_{\\text misfit} + \\underbrace{\\frac{\\gamma}{2}\\int_\\Omega|\\nabla a|^2\\,dx}_{\\text reg}\n", + "$$\n", + "\n", + "In the code below, $W$ and $R$ are symmetric positive definite matrices that stem from finite element discretization of the misfit and regularization component of the cost functional, respectively." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# Regularization parameter\n", + "gamma = 1e-8\n", + "\n", + "# weak for for setting up the misfit and regularization compoment of the cost\n", + "W_equ = inner(u_trial, u_test) * dx\n", + "R_equ = gamma * inner(nabla_grad(a_trial), nabla_grad(a_test)) * dx\n", + "\n", + "W = assemble(W_equ)\n", + "R = assemble(R_equ)\n", + "\n", + "# Define cost function\n", + "def cost(u, ud, a, W, R):\n", + " diff = u.vector() - ud.vector()\n", + " reg = 0.5 * a.vector().inner(R*a.vector() ) \n", + " misfit = 0.5 * diff.inner(W * diff)\n", + " return [reg + misfit, misfit, reg]" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Setting up the state equations, right hand side for the adjoint and the necessary matrices:\n", + "\n", + "$$\n", + " \\begin{array}{llll}\n", + " (\\hat{u}_k, \\tilde u) &+ (\\hat{a}_k \\exp(a_k)\\nabla p_k, \\nabla\n", + " \\tilde u) &+ (\\exp(a_k) \\nabla \\tilde u,\n", + " \\nabla \\hat p_k) &= (u_d - u_k, \\tilde u)- (\\exp(a_k) \\nabla\n", + " p_k, \\nabla \\tilde u)\\\\\n", + " (\\tilde a \\exp(a_k) \\nabla \\hat u_k, \\nabla p_k) &+ \\gamma\n", + " (\\nabla \\hat a_k, \\nabla \\tilde a) + (\\tilde a \\hat a_k \\exp(a_k)\\nabla u, \\nabla p) &+ (\\tilde a\n", + " \\exp(a_k) \\nabla u_k, \\nabla \\hat p_k) &= - \\gamma (\\nabla a_k, \\nabla\\tilde a) - (\\tilde\n", + " a \\exp(a_k) \\nabla u_k, \\nabla p_k)\\\\\n", + " (\\exp(a_k) \\nabla \\hat u_k, \\nabla \\tilde p) &+ (\\hat a_k \\exp(a_k) \\nabla u_k, \\nabla\n", + " \\tilde p) & &= - (\\exp(a_k) \\nabla u_k,\n", + " \\nabla \\tilde p) + (f, \\tilde p),\n", + " \\end{array}\n", + "$$\n" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# weak form for setting up the state equation\n", + "a_state = inner(exp(a) * nabla_grad(u_trial), nabla_grad(u_test)) * dx\n", + "L_state = f * u_test * dx\n", + "\n", + "# weak form for setting up the adjoint equation\n", + "a_adj = inner(exp(a) * nabla_grad(p_trial), nabla_grad(p_test)) * dx\n", + "L_adj = -inner(u - ud, p_test) * dx\n", + "\n", + "# weak form for setting up matrices\n", + "Wua_equ = inner(exp(a) * a_trial * nabla_grad(p_test), nabla_grad(p)) * dx\n", + "C_equ = inner(exp(a) * a_trial * nabla_grad(u), nabla_grad(u_test)) * dx\n", + "Raa_equ = inner(exp(a) * a_trial * a_test * nabla_grad(u), nabla_grad(p)) * dx\n", + "\n", + "M_equ = inner(a_trial, a_test) * dx\n", + "\n", + "# assemble matrix M\n", + "M = assemble(M_equ)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initial guess\n", + "We solve the state equation and compute the cost functional for the initial guess of the parameter ``a_ini``" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# solve state equation\n", + "state_A, state_b = assemble_system (a_state, L_state, bc_state)\n", + "solve (state_A, u.vector(), state_b)\n", + "\n", + "# evaluate cost\n", + "[cost_old, misfit_old, reg_old] = cost(u, ud, a, W, R)\n", + "\n", + "# plot\n", + "plt.figure(figsize=(15,5))\n", + "nb.plot(a,subplot_loc=121, mytitle=\"a_ini\", vmin=atrue.vector().min(), vmax=atrue.vector().max())\n", + "nb.plot(u,subplot_loc=122, mytitle=\"u(a_ini)\")\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The reduced Hessian apply to a vector v:\n", + "\n", + "Here we describe how to apply the reduced Hessian operator to a vector v. For an opportune choice of the regularization, the reduced Hessian operator evaluated in a neighborhood of the solution is positive define, whereas far from the solution the reduced Hessian may be indefinite. On the constrary, the Gauss-Newton approximation of the Hessian is always positive defined.\n", + "\n", + "For this reason, it is beneficial to perform a few initial Gauss-Newton steps (5 in this particular example) to accelerate the convergence of the inexact Newton-CG algorithm.\n", + "\n", + "The Hessian apply reads:\n", + "$$\n", + "\\begin{align}\n", + "\\bhu &= -\\bf A^{-1} \\bf C \\bf v\\, & \\text{linearized forward}\\\\\n", + "\\bhp &= -\\bf A^{-T} (\\bf W_{\\scriptsize\\mbox{uu}} \\bhu +\n", + "\\bf W_{\\scriptsize\\mbox{ua}}\\,\\bha) & \\text{adjoint}\\\\\n", + "\\bf H \\bf v &= (\\bf R + \\bf R_{\\scriptsize\\mbox{aa}})\\bf v + \\bf C^T \\bhp + \\bf W_{\\scriptsize\\mbox{au}} \\bhu.\n", + "\\end{align}\n", + "$$\n", + "\n", + "The Gauss-Newton Hessian apply is obtained by dropping the second derivatives operators $\\bf W_{\\scriptsize\\mbox{ua}}\\,\\bha$, $\\bf R_{\\scriptsize\\mbox{aa}}\\bf v$, and $\\bf W_{\\scriptsize\\mbox{au}} \\bhu$:\n", + "$$\n", + "\\begin{align}\n", + "\\bhu &= -\\bf A^{-1} \\bf C \\bf v\\, & \\text{linearized forward}\\\\\n", + "\\bhp &= -\\bf A^{-T} \\bf W_{\\scriptsize\\mbox{uu}} \\bhu & \\text{adjoint}\\\\\n", + "\\bf H_{\\rm GN} \\bf v &= \\bf R \\bf v + \\bf C^T \\bhp.\n", + "\\end{align}\n", + "$$\n", + "\n" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# define (Gauss-Newton) Hessian apply H * v\n", + "def Hess_GN (v, R, C, A, adj_A, W):\n", + " rhs = -(C * v)\n", + " bc_adj.apply(rhs)\n", + " solve (A, du, rhs)\n", + " rhs = - (W * du)\n", + " bc_adj.apply(rhs)\n", + " solve (adj_A, dp, rhs)\n", + " CT_dp = Vector()\n", + " C.init_vector(CT_dp, 1)\n", + " C.transpmult(dp, CT_dp)\n", + " H_V = R * v + CT_dp\n", + " return H_V\n", + "\n", + "# define (Newton) Hessian apply H * v\n", + "def Hess_Newton (v, R, Raa, C, A, adj_A, W, Wua):\n", + " rhs = -(C * v)\n", + " bc_adj.apply(rhs)\n", + " solve (A, du, rhs)\n", + " rhs = -(W * du) - Wua * v\n", + " bc_adj.apply(rhs)\n", + " solve (adj_A, dp, rhs)\n", + " CT_dp = Vector()\n", + " C.init_vector(CT_dp, 1)\n", + " C.transpmult(dp, CT_dp)\n", + " Wua_du = Vector()\n", + " Wua.init_vector(Wua_du, 1)\n", + " Wua.transpmult(du, Wua_du)\n", + " H_V = R*v + Raa*v + CT_dp + Wua_du\n", + " return H_V\n", + "\n", + "# Creat Class MyLinearOperator to perform Hessian function\n", + "class MyLinearOperator(LinearOperator):\n", + " cgiter = 0\n", + " def __init__(self, R, Raa, C, A, adj_A, W, Wua):\n", + " LinearOperator.__init__(self, a_delta, a_delta)\n", + " self.R = R\n", + " self.Raa = Raa\n", + " self.C = C\n", + " self.A = A\n", + " self.adj_A = adj_A\n", + " self.W = W\n", + " self.Wua = Wua\n", + "\n", + " # Hessian performed on x, output as generic vector y\n", + " def mult(self, x, y):\n", + " self.cgiter += 1\n", + " y.zero()\n", + " if iter <= 6:\n", + " y.axpy(1., Hess_GN (x, self.R, self.C, self.A, self.adj_A, self.W) )\n", + " else:\n", + " y.axpy(1., Hess_Newton (x, self.R, self.Raa, self.C, self.A, self.adj_A, self.W, self.Wua) )" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The inexact Newton-CG optimization with Armijo line search:\n", + "\n", + "We solve the constrained optimization problem using the inexact Newton-CG method with Armijo line search.\n", + "\n", + "The stopping criterion is based on a relative reduction of the norm of the gradient (i.e. $\\frac{\\|g_{n}\\|}{\\|g_{0}\\|} \\leq \\tau$).\n", + "\n", + "First, we compute the gradient by solving the state and adjoint equation for the current parameter $a$, and then substituing the current state $u$, parameter $a$ and adjoint $p$ variables in the weak form expression of the gradient:\n", + "$$ (g, \\tilde{a}) = \\gamma(\\nabla a, \\nabla \\tilde{a}) +(\\tilde{a}\\nabla u, \\nabla p).$$\n", + "\n", + "Then, we compute the Newton direction $\\delta a$ by iteratively solving ${\\bf H} {\\delta a} = - {\\bf g}$.\n", + "The Newton system is solved inexactly by early termination of conjugate gradient iterations via Eisenstat\u2013Walker (to prevent oversolving) and Steihaug (to avoid negative curvature) criteria.\n", + "\n", + "Finally, the Armijo line search uses backtracking to find $\\alpha$ such that a sufficient reduction in the cost functional is achieved.\n", + "More specifically, we use backtracking to find $\\alpha$ such that:\n", + "$$J( a + \\alpha \\delta a ) \\leq J(a) + \\alpha c_{\\rm armijo} (\\delta a,g). $$" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# define parameters for the optimization\n", + "tol = 1e-8\n", + "c = 1e-4\n", + "maxiter = 12\n", + "plot_on = True\n", + "\n", + "# initialize iter counters\n", + "iter = 1\n", + "total_cg_iter = 0\n", + "converged = False\n", + "\n", + "# initializations\n", + "g, a_delta = Vector(), Vector()\n", + "R.init_vector(a_delta,0)\n", + "R.init_vector(g,0)\n", + "\n", + "du, dp = Vector(), Vector()\n", + "W.init_vector(du,1)\n", + "W.init_vector(dp,0)\n", + "\n", + "a_prev, a_diff = Function(Va), Function(Va)\n", + "\n", + "print \"Nit CGit cost misfit reg sqrt(-G*D) ||grad|| alpha tolcg\"\n", + "\n", + "while iter < maxiter and not converged:\n", + "\n", + " # assemble matrix C\n", + " C = assemble(C_equ)\n", + "\n", + " # solve the adoint problem\n", + " adjoint_A, adjoint_RHS = assemble_system(a_adj, L_adj, bc_adj)\n", + " solve(adjoint_A, p.vector(), adjoint_RHS)\n", + "\n", + " # assemble W_ua and R\n", + " Wua = assemble (Wua_equ)\n", + " Raa = assemble (Raa_equ)\n", + "\n", + " # evaluate the gradient\n", + " CT_p = Vector()\n", + " C.init_vector(CT_p,1)\n", + " C.transpmult(p.vector(), CT_p)\n", + " MG = CT_p + R * a.vector()\n", + " solve(M, g, MG)\n", + "\n", + " # calculate the norm of the gradient\n", + " grad2 = g.inner(MG)\n", + " gradnorm = sqrt(grad2)\n", + "\n", + " # set the CG tolerance (use Eisenstat\u2013Walker termination criterion)\n", + " if iter == 1:\n", + " gradnorm_ini = gradnorm\n", + " tolcg = min(0.5, sqrt(gradnorm/gradnorm_ini))\n", + "\n", + " # define the Hessian apply operator (with preconditioner)\n", + " Hess_Apply = MyLinearOperator(R, Raa, C, state_A, adjoint_A, W, Wua )\n", + " P = R + gamma * M\n", + " solver = PETScKrylovSolver(\"cg\", \"amg\")\n", + " solver.set_operators(Hess_Apply, P)\n", + " solver.parameters[\"relative_tolerance\"] = tolcg\n", + " #solver.parameters[\"error_on_nonconvergence\"] = False\n", + " solver.parameters[\"nonzero_initial_guess\"] = False\n", + "\n", + " # solve the Newton system H a_delta = - MG\n", + " solver.solve(a_delta, -MG)\n", + " total_cg_iter += Hess_Apply.cgiter\n", + " \n", + " # linesearch\n", + " alpha = 1\n", + " descent = 0\n", + " no_backtrack = 0\n", + " a_prev.assign(a)\n", + " while descent == 0 and no_backtrack < 10:\n", + " a.vector().axpy(alpha, a_delta )\n", + "\n", + " # solve the state/forward problem\n", + " state_A, state_b = assemble_system(a_state, L_state, bc_state)\n", + " solve(state_A, u.vector(), state_b)\n", + "\n", + " # evaluate cost\n", + " [cost_new, misfit_new, reg_new] = cost(u, ud, a, W, R)\n", + "\n", + " # check if Armijo conditions are satisfied\n", + " if cost_new < cost_old + alpha * c * MG.inner(a_delta):\n", + " cost_old = cost_new\n", + " descent = 1\n", + " else:\n", + " no_backtrack += 1\n", + " alpha *= 0.5\n", + " a.assign(a_prev) # reset a\n", + "\n", + " # calculate sqrt(-G * D)\n", + " graddir = sqrt(- MG.inner(a_delta) )\n", + "\n", + " sp = \"\"\n", + " print \"%2d %2s %2d %3s %8.5e %1s %8.5e %1s %8.5e %1s %8.5e %1s %8.5e %1s %5.2f %1s %5.3e\" % \\\n", + " (iter, sp, Hess_Apply.cgiter, sp, cost_new, sp, misfit_new, sp, reg_new, sp, \\\n", + " graddir, sp, gradnorm, sp, alpha, sp, tolcg)\n", + "\n", + " if plot_on:\n", + " nb.multi1_plot([a,u,p], [\"a\",\"u\",\"p\"], same_colorbar=False)\n", + " plt.show()\n", + " \n", + " # check for convergence\n", + " if gradnorm < tol and iter > 1:\n", + " converged = True\n", + " print \"Newton's method converged in \",iter,\" iterations\"\n", + " print \"Total number of CG iterations: \", total_cg_iter\n", + " \n", + " iter += 1\n", + " \n", + "if not converged:\n", + " print \"Newton's method did not converge in \", maxiter, \" iterations\"\n", + "\n", + "print \"Time elapsed: \", time.clock()-start" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "nb.multi1_plot([atrue, a], [\"atrue\", \"a\"])\n", + "nb.multi1_plot([u,p], [\"u\",\"p\"], same_colorbar=False)\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/PoissonDeterministic-InexactNewton.py b/PoissonDeterministic-InexactNewton.py new file mode 100644 index 0000000..aff8678 --- /dev/null +++ b/PoissonDeterministic-InexactNewton.py @@ -0,0 +1,319 @@ +# Coefficient field inversion in an elliptic partial differential equation +# +# Consider the following problem: +# +# min_a J(a):=1/2 int_Omega (u-ud)^2 dx +gamma/2 int_Omega | grad a|^2 dx +# +# where u is the solution of +# +# -div (exp{a} grad u) = f in Omega, +# u = 0 on partial Omega. +# +# Here a the unknown coefficient field, ud denotes (possibly noisy) data, $f\in H^{-1}(Omega)$ a given force, and $ gamma >= 0$ the regularization parameter. + + +from dolfin import * + +import numpy as np +import time +import logging + +import matplotlib.pyplot as plt +import nb + +start = time.clock() + +logging.getLogger('FFC').setLevel(logging.WARNING) +logging.getLogger('UFL').setLevel(logging.WARNING) +set_log_active(False) + +np.random.seed(seed=1) + +# create mesh and define function spaces +nx = 64 +ny = 64 +mesh = UnitSquareMesh(nx, ny) +Va = FunctionSpace(mesh, 'Lagrange', 1) +Vu = FunctionSpace(mesh, 'Lagrange', 2) + +# The true and inverted parameter +atrue = interpolate(Expression('log(2 + 7*(pow(pow(x[0] - 0.5,2) + pow(x[1] - 0.5,2),0.5) > 0.2))'),Va) +a = interpolate(Expression("log(2.0)"),Va) + +# define function for state and adjoint +u = Function(Vu) +p = Function(Vu) + +# define Trial and Test Functions +u_trial, p_trial, a_trial = TrialFunction(Vu), TrialFunction(Vu), TrialFunction(Va) +u_test, p_test, a_test = TestFunction(Vu), TestFunction(Vu), TestFunction(Va) + +# initialize input functions +f = Constant("1.0") +u0 = Constant("0.0") + + +# plot +plt.figure(figsize=(15,5)) +nb.plot(mesh,subplot_loc=121, mytitle="Mesh", show_axis='on') +nb.plot(atrue,subplot_loc=122, mytitle="True parameter field") + + +# set up dirichlet boundary conditions +def boundary(x,on_boundary): + return on_boundary + +bc_state = DirichletBC(Vu, u0, boundary) +bc_adj = DirichletBC(Vu, Constant(0.), boundary) + +# noise level +noise_level = 0.05 + +# weak form for setting up the synthetic observations +a_goal = inner(exp(atrue) * nabla_grad(u_trial), nabla_grad(u_test)) * dx +L_goal = f * u_test * dx + +# solve the forward/state problem to generate synthetic observations +goal_A, goal_b = assemble_system(a_goal, L_goal, bc_state) + +utrue = Function(Vu) +solve(goal_A, utrue.vector(), goal_b) + +ud = Function(Vu) +ud.assign(utrue) + +# perturb state solution and create synthetic measurements ud +# ud = u + ||u||/SNR * random.normal +MAX = ud.vector().norm("linf") +noise = Vector() +goal_A.init_vector(noise,1) +noise.set_local( noise_level * MAX * np.random.normal(0, 1, len(ud.vector().array())) ) +bc_adj.apply(noise) + +ud.vector().axpy(1., noise) + +# plot +nb.multi1_plot([utrue, ud], ["State solution with atrue", "Synthetic observations"]) + + +# Regularization parameter +gamma = 1e-8 + +# weak for for setting up the misfit and regularization compoment of the cost +W_equ = inner(u_trial, u_test) * dx +R_equ = gamma * inner(nabla_grad(a_trial), nabla_grad(a_test)) * dx + +W = assemble(W_equ) +R = assemble(R_equ) + +# Define cost function +def cost(u, ud, a, W, R): + diff = u.vector() - ud.vector() + reg = 0.5 * a.vector().inner(R*a.vector() ) + misfit = 0.5 * diff.inner(W * diff) + return [reg + misfit, misfit, reg] + +# weak form for setting up the state equation +a_state = inner(exp(a) * nabla_grad(u_trial), nabla_grad(u_test)) * dx +L_state = f * u_test * dx + +# weak form for setting up the adjoint equation +a_adj = inner(exp(a) * nabla_grad(p_trial), nabla_grad(p_test)) * dx +L_adj = -inner(u - ud, p_test) * dx + +# weak form for setting up matrices +Wua_equ = inner(exp(a) * a_trial * nabla_grad(p_test), nabla_grad(p)) * dx +C_equ = inner(exp(a) * a_trial * nabla_grad(u), nabla_grad(u_test)) * dx +Raa_equ = inner(exp(a) * a_trial * a_test * nabla_grad(u), nabla_grad(p)) * dx + +M_equ = inner(a_trial, a_test) * dx + +# assemble matrix M +M = assemble(M_equ) + +# solve state equation +state_A, state_b = assemble_system (a_state, L_state, bc_state) +solve (state_A, u.vector(), state_b) + +# evaluate cost +[cost_old, misfit_old, reg_old] = cost(u, ud, a, W, R) + +# plot +plt.figure(figsize=(15,5)) +nb.plot(a,subplot_loc=121, mytitle="a_ini", vmin=atrue.vector().min(), vmax=atrue.vector().max()) +nb.plot(u,subplot_loc=122, mytitle="u(a_ini)") + + +# define (Gauss-Newton) Hessian apply H * v +def Hess_GN (v, R, C, A, adj_A, W): + rhs = -(C * v) + bc_adj.apply(rhs) + solve (A, du, rhs) + rhs = - (W * du) + bc_adj.apply(rhs) + solve (adj_A, dp, rhs) + CT_dp = Vector() + C.init_vector(CT_dp, 1) + C.transpmult(dp, CT_dp) + H_V = R * v + CT_dp + return H_V + +# define (Newton) Hessian apply H * v +def Hess_Newton (v, R, Raa, C, A, adj_A, W, Wua): + rhs = -(C * v) + bc_adj.apply(rhs) + solve (A, du, rhs) + rhs = -(W * du) - Wua * v + bc_adj.apply(rhs) + solve (adj_A, dp, rhs) + CT_dp = Vector() + C.init_vector(CT_dp, 1) + C.transpmult(dp, CT_dp) + Wua_du = Vector() + Wua.init_vector(Wua_du, 1) + Wua.transpmult(du, Wua_du) + H_V = R*v + Raa*v + CT_dp + Wua_du + return H_V + +# Creat Class MyLinearOperator to perform Hessian function +class MyLinearOperator(LinearOperator): + cgiter = 0 + def __init__(self, R, Raa, C, A, adj_A, W, Wua): + LinearOperator.__init__(self, a_delta, a_delta) + self.R = R + self.Raa = Raa + self.C = C + self.A = A + self.adj_A = adj_A + self.W = W + self.Wua = Wua + + # Hessian performed on x, output as generic vector y + def mult(self, x, y): + self.cgiter += 1 + y.zero() + if iter <= 6: + y.axpy(1., Hess_GN (x, self.R, self.C, self.A, self.adj_A, self.W) ) + else: + y.axpy(1., Hess_Newton (x, self.R, self.Raa, self.C, self.A, self.adj_A, self.W, self.Wua) ) + +# define parameters for the optimization +tol = 1e-8 +c = 1e-4 +maxiter = 12 +plot_on = False + +# initialize iter counters +iter = 1 +total_cg_iter = 0 +converged = False + +# initializations +g, a_delta = Vector(), Vector() +R.init_vector(a_delta,0) +R.init_vector(g,0) + +du, dp = Vector(), Vector() +W.init_vector(du,1) +W.init_vector(dp,0) + +a_prev, a_diff = Function(Va), Function(Va) + +print "Nit CGit cost misfit reg sqrt(-G*D) ||grad|| alpha tolcg" + +while iter < maxiter and not converged: + + # assemble matrix C + C = assemble(C_equ) + + # solve the adoint problem + adjoint_A, adjoint_RHS = assemble_system(a_adj, L_adj, bc_adj) + solve(adjoint_A, p.vector(), adjoint_RHS) + + # assemble W_ua and R + Wua = assemble (Wua_equ) + Raa = assemble (Raa_equ) + + # evaluate the gradient + CT_p = Vector() + C.init_vector(CT_p,1) + C.transpmult(p.vector(), CT_p) + MG = CT_p + R * a.vector() + solve(M, g, MG) + + # calculate the norm of the gradient + grad2 = g.inner(MG) + gradnorm = sqrt(grad2) + + # set the CG tolerance (use Eisenstat - Walker termination criterion) + if iter == 1: + gradnorm_ini = gradnorm + tolcg = min(0.5, sqrt(gradnorm/gradnorm_ini)) + + # define the Hessian apply operator (with preconditioner) + Hess_Apply = MyLinearOperator(R, Raa, C, state_A, adjoint_A, W, Wua ) + P = R + gamma * M + solver = PETScKrylovSolver("cg", "amg") + solver.set_operators(Hess_Apply, P) + solver.parameters["relative_tolerance"] = tolcg + #solver.parameters["error_on_nonconvergence"] = False + solver.parameters["nonzero_initial_guess"] = False + + # solve the Newton system H a_delta = - MG + solver.solve(a_delta, -MG) + total_cg_iter += Hess_Apply.cgiter + + # linesearch + alpha = 1 + descent = 0 + no_backtrack = 0 + a_prev.assign(a) + while descent == 0 and no_backtrack < 10: + a.vector().axpy(alpha, a_delta ) + + # solve the state/forward problem + state_A, state_b = assemble_system(a_state, L_state, bc_state) + solve(state_A, u.vector(), state_b) + + # evaluate cost + [cost_new, misfit_new, reg_new] = cost(u, ud, a, W, R) + + # check if Armijo conditions are satisfied + if cost_new < cost_old + alpha * c * MG.inner(a_delta): + cost_old = cost_new + descent = 1 + else: + no_backtrack += 1 + alpha *= 0.5 + a.assign(a_prev) # reset a + + # calculate sqrt(-G * D) + graddir = sqrt(- MG.inner(a_delta) ) + + sp = "" + print "%2d %2s %2d %3s %8.5e %1s %8.5e %1s %8.5e %1s %8.5e %1s %8.5e %1s %5.2f %1s %5.3e" % \ + (iter, sp, Hess_Apply.cgiter, sp, cost_new, sp, misfit_new, sp, reg_new, sp, \ + graddir, sp, gradnorm, sp, alpha, sp, tolcg) + + if plot_on: + nb.multi1_plot([a,u,p], ["a","u","p"], same_colorbar=False) + + + # check for convergence + if gradnorm < tol and iter > 1: + converged = True + print "Newton's method converged in ",iter," iterations" + print "Total number of CG iterations: ", total_cg_iter + + iter += 1 + +if not converged: + print "Newton's method did not converge in ", maxiter, " iterations" + +print "Time elapsed: ", time.clock()-start + +nb.multi1_plot([atrue, a], ["atrue", "a"]) +nb.multi1_plot([u,p], ["u","p"], same_colorbar=False) + +plt.show() + diff --git a/PoissonDeterministic-SD.html b/PoissonDeterministic-SD.html new file mode 100644 index 0000000..f352b68 --- /dev/null +++ b/PoissonDeterministic-SD.html @@ -0,0 +1,30807 @@ + + + +PoissonDeterministic-SD + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+
+
+

Example: Coefficient field inversion in an elliptic partial differential equation

We consider the estimation of a coefficient in an elliptic partial +differential equation as a first model problem. Depending on the +interpretation of the unknowns and the type of measurements, this +model problem arises, for instance, in inversion for groundwater flow +or heat conductivity. It can also be interpreted as finding a +membrane with a certain spatially varying stiffness. Let +$\Omega\subset\mathbb{R}^n$, $n\in\{1,2,3\}$ be an open, bounded +domain and consider the following problem:

+$$ +\min_{a} J(a):=\frac{1}{2}\int_\Omega (u-u_d)^2\, dx + \frac{\gamma}{2}\int_\Omega|\nabla a|^2\,dx, +$$

where $u$ is the solution of

+$$ +\begin{split} +\quad -\nabla\cdot(a\nabla u) &= f \text{ in }\Omega,\\ +u &= 0 \text{ on }\partial\Omega. +\end{split} +$$

Here $a\in U_{ad}:=\{a\in L^{\infty}(\Omega)\}$ the unknown coefficient field, $u_d$ denotes (possibly noisy) data, $f\in H^{-1}(\Omega)$ a given force, and $\gamma\ge 0$ the regularization parameter.

+

The variational (or weak) form of the state equation:

Find $u\in H_0^1(\Omega)$ such that $(a\nabla u,\nabla v) - (f,v) = 0, \text{ for all } v\in H_0^1(\Omega),$ +where $H_0^1(\Omega)$ is the space of functions vanishing on $\partial\Omega$ with square integrable derivatives. Here, $(\cdot\,,\cdot)$ denotes the $L^2$-inner product, i.e, for scalar functions $u,v$ defined on $\Omega$ we denote $(u,v) := \int_\Omega u(x) v(x) \,dx$.

+

Optimality System:

The Lagrangian functional $\mathscr{L}:L^\infty(\Omega)\times H_0^1(\Omega)\times H_0^1(\Omega)\rightarrow \mathbb{R}$, which we use as a tool to derive the optimality system, is given by

+$$ +\mathscr{L}(a,u,p):= \frac{1}{2}(u-u_d,u-u_d) + +\frac{\gamma}{2}(\nabla a, \nabla a) + (a\nabla u,\nabla p) - (f,p). +$$

The Lagrange multiplier theory shows that, at a solution all variations of the Lagrangian functional with respect to all variables must vanish. These variations of $\mathscr{L}$ with respect to $(p,u,a)$ in directions $(\tilde{u}, \tilde{p}, \tilde{a})$ are given by

+$$ + \begin{alignat}{2} + \mathscr{L}_p(a,u,p)(\tilde{p}) &= (a\nabla u, \nabla \tilde{p}) - + (f,\tilde{p}) &&= 0,\\ + \mathscr{L}_u(a,u,p)(\tilde{u}) &= (a\nabla p, \nabla \tilde{u}) + + (u-u_d,\tilde{u}) && = 0,\\ + \mathscr{L}_a(a,u,p)(\tilde{a}) &= \gamma(\nabla a, \nabla \tilde{a}) + + (\tilde{a}\nabla u, \nabla p) &&= 0, + \end{alignat} +$$

where the variations $(\tilde{u}, \tilde{p}, \tilde{a})$ are taken from the same spaces as $(u,p,a)$.

+

The gradient of the cost functional $\mathcal{J}(a)$ therefore is

+$$ + \mathcal{G}(a)(\tilde a) = \gamma(\nabla a, \nabla \tilde{a}) + + (\tilde{a}\nabla u, \nabla \tilde{p}). +$$

Goals:

By the end of this notebook, you should be able to:

+
    +
  • solve the forward and adjoint Poisson equations
  • +
  • understand the inverse method framework
  • +
  • visualise and understand the results
  • +
  • modify the problem and code
  • +
+

Mathematical tools used:

    +
  • Finite element method
  • +
  • Derivation of gradiant via the adjoint method
  • +
  • Armijo line search
  • +
+

List of software used:

    +
  • FEniCS, a parallel finite element element library for the discretization of partial differential equations
  • +
  • PETSc, for scalable and efficient linear algebra operations and solvers
  • +
  • Matplotlib, a python package used for plotting the results
  • +
  • Numpy, a python package for linear algebra
  • +
+ +
+
+
+
+
+
+
+
+

Set up

Import dependencies

+
+
+
+
+
+
In [1]:
+
+
+
from dolfin import *
+
+import numpy as np
+import time
+import logging
+
+import matplotlib.pyplot as plt
+%matplotlib inline
+import nb
+
+start = time.clock()
+
+logging.getLogger('FFC').setLevel(logging.WARNING)
+logging.getLogger('UFL').setLevel(logging.WARNING)
+set_log_active(False)
+
+np.random.seed(seed=1)
+
+ +
+
+
+ +
+
+
+
+
+
+

Model set up:

As in the introduction, the first thing we need to do is to set up the numerical model.

+

In this cell, we set the mesh mesh, the finite element spaces Va and Vu corresponding to the parameter space and state/adjoint space, respectively. In particular, we use linear finite elements for the parameter space, and quadratic elements for the state/adjoint space.

+

The true parameter atrue is the finite element interpolant of the function +$$ a_{\rm true} = \left\{ \begin{array}{l} 4 \; \forall \,(x,y) \, {\rm s.t.}\, \sqrt{ (x-.5)^2 + (y-.5)^2} \leq 0.2 \\ 8 \; {\rm otherwise}. \end{array}\right. $$

+

The forcing term f and the boundary conditions u0 for the forward problem are +$$ f = 1 \; \forall {\bf x} \in \Omega, \quad u = 0 \; \forall {\bf x} \in \partial \Omega. $$

+ +
+
+
+
+
+
In [2]:
+
+
+
# create mesh and define function spaces
+nx = 32
+ny = 32
+mesh = UnitSquareMesh(nx, ny)
+Va = FunctionSpace(mesh, 'Lagrange', 1)
+Vu = FunctionSpace(mesh, 'Lagrange', 2)
+
+# The true and inverted parameter
+atrue = interpolate(Expression('8. - 4.*(pow(x[0] - 0.5,2) + pow(x[1] - 0.5,2) < pow(0.2,2))'), Va)
+a = interpolate(Expression("4."),Va)
+
+# define function for state and adjoint
+u = Function(Vu)
+p = Function(Vu)
+
+# define Trial and Test Functions
+u_trial, p_trial, a_trial = TrialFunction(Vu), TrialFunction(Vu), TrialFunction(Va)
+u_test, p_test, a_test = TestFunction(Vu), TestFunction(Vu), TestFunction(Va)
+
+# initialize input functions
+f = Constant("1.0")
+u0 = Constant("0.0")
+
+# plot
+plt.figure(figsize=(15,5))
+nb.plot(mesh,subplot_loc=121, mytitle="Mesh", show_axis='on')
+nb.plot(atrue,subplot_loc=122, mytitle="True parameter field")
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
In [3]:
+
+
+
# set up dirichlet boundary conditions
+def boundary(x,on_boundary):
+    return on_boundary
+
+bc_state = DirichletBC(Vu, u0, boundary)
+bc_adj = DirichletBC(Vu, Constant(0.), boundary)
+
+ +
+
+
+ +
+
+
+
+
+
+

The cost functional evaluation:

$$ +J(a):=\underbrace{\frac{1}{2}\int_\Omega (u-u_d)^2\, dx}_{\text misfit} + \underbrace{\frac{\gamma}{2}\int_\Omega|\nabla a|^2\,dx}_{\text reg} +$$

In the code below, $W$ and $R$ are symmetric positive definite matrices that stem from finite element discretization of the misfit and regularization component of the cost functional, respectively.

+ +
+
+
+
+
+
In [4]:
+
+
+
# Regularization parameter
+gamma = 1e-10
+
+# weak for for setting up the misfit and regularization compoment of the cost
+W_equ   = inner(u_trial, u_test) * dx
+R_equ   = gamma * inner(nabla_grad(a_trial), nabla_grad(a_test)) * dx
+
+W = assemble(W_equ)
+R = assemble(R_equ)
+
+# Define cost function
+def cost(u, ud, a, W, R):
+    diff = u.vector() - ud.vector()
+    reg = 0.5 * a.vector().inner(R*a.vector() ) 
+    misfit = 0.5 * diff.inner(W * diff)
+    return [reg + misfit, misfit, reg]
+
+ +
+
+
+ +
+
+
+
+
+
+

Set up synthetic observations:

To generate the synthetic observation we first solve the PDE for the state variable utrue corresponding to the true parameter atrue. +More specifically, we solve the variational problem

+

Find $u\in H_0^1(\Omega)$ such that $\underbrace{(a_{\text true} \nabla u,\nabla v)}_{\; := \; a_{\rm goal}} - \underbrace{(f,v)}_{\; := \;L_{\rm goal}} = 0, \text{ for all } v\in H_0^1(\Omega)$.

+

Then we perturb the true state variable and write the observation ud as +$$ u_{d} = u_{\rm true} + \eta, \quad {\rm where} \; \eta \sim \mathcal{N}(0, \sigma^2).$$ +Here the standard variation $\sigma$ is proportional to noise_level.

+ +
+
+
+
+
+
In [5]:
+
+
+
# noise level
+noise_level = 0.01
+
+# weak form for setting up the synthetic observations
+a_goal = inner( atrue * nabla_grad(u_trial), nabla_grad(u_test)) * dx
+L_goal = f * u_test * dx
+
+# solve the forward/state problem to generate synthetic observations
+goal_A, goal_b = assemble_system(a_goal, L_goal, bc_state)
+
+utrue = Function(Vu)
+solve(goal_A, utrue.vector(), goal_b)
+
+ud = Function(Vu)
+ud.assign(utrue)
+
+# perturb state solution and create synthetic measurements ud
+# ud = u + ||u||/SNR * random.normal
+MAX = ud.vector().norm("linf")
+noise = Vector()
+goal_A.init_vector(noise,1)
+noise.set_local( noise_level * MAX * np.random.normal(0, 1, len(ud.vector().array())) )
+bc_adj.apply(noise)
+
+ud.vector().axpy(1., noise)
+
+# plot
+nb.multi1_plot([utrue, ud], ["State solution with atrue", "Synthetic observations"])
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

Setting up the state equations, right hand side for the adjoint and the necessary matrices:

$$ + \begin{alignat}{2} + \mathscr{L}_p(a,u,p)(\tilde{p}) &= (a\nabla u, \nabla \tilde{p}) - + (f,\tilde{p}) &&= 0,\\ + \mathscr{L}_u(a,u,p)(\tilde{u}) &= (a\nabla p, \nabla \tilde{u}) + + (u-u_d,\tilde{u}) && = 0,\\ + \mathscr{L}_a(a,u,p)(\tilde{a}) &= \gamma(\nabla a, \nabla \tilde{a}) + + (\tilde{a}\nabla u, \nabla p) &&= 0, + \end{alignat} +$$ +
+
+
+
+
+
In [6]:
+
+
+
# weak form for setting up the state equation
+a_state = inner( a * nabla_grad(u_trial), nabla_grad(u_test)) * dx
+L_state = f * u_test * dx
+
+# weak form for setting up the adjoint equations
+a_adj = inner( a * nabla_grad(p_trial), nabla_grad(p_test) ) * dx
+L_adjoint = -inner(u - ud, u_test) * dx
+
+
+# weak form for setting up matrices
+CT_equ   = inner(a_test * nabla_grad(u), nabla_grad(p_trial)) * dx
+M_equ   = inner(a_trial, a_test) * dx
+
+
+# assemble matrices M
+M = assemble(M_equ)
+
+ +
+
+
+ +
+
+
+
+
+
+

Initial guess

We solve the state equation and compute the cost functional for the initial guess of the parameter a_ini

+ +
+
+
+
+
+
In [7]:
+
+
+
# solve state equation
+A, state_b = assemble_system (a_state, L_state, bc_state)
+solve (A, u.vector(), state_b)
+
+# evaluate cost
+[cost_old, misfit_old, reg_old] = cost(u, ud, a, W, R)
+
+# plot
+plt.figure(figsize=(15,5))
+nb.plot(a,subplot_loc=121, mytitle="a_ini", vmin=atrue.vector().min(), vmax=atrue.vector().max())
+nb.plot(u,subplot_loc=122, mytitle="u(a_ini)")
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

The steepest descent with Armijo line search:

We solve the constrained optimization problem using the steepest descent method with Armijo line search.

+

The stopping criterion is based on a relative reduction of the norm of the gradient (i.e. $\frac{\|g_{n}\|}{\|g_{0}\|} \leq \tau$).

+

The gradient is computed by solving the state and adjoint equation for the current parameter $a$, and then substituing the current state $u$, parameter $a$ and adjoint $p$ variables in the weak form expression of the gradient: +$$ (g, \tilde{a}) = \gamma(\nabla a, \nabla \tilde{a}) +(\tilde{a}\nabla u, \nabla p).$$

+

The Armijo line search uses backtracking to find $\alpha$ such that a sufficient reduction in the cost functional is achieved. +More specifically, we use backtracking to find $\alpha$ such that: +$$J( a - \alpha g ) \leq J(a) - \alpha c_{\rm armijo} (g,g). $$

+ +
+
+
+
+
+
In [8]:
+
+
+
# define parameters for the optimization
+tol = 1e-4
+maxiter = 1000
+plot_any = 30
+c_armijo = 1e-5
+
+# initialize iter counters
+iter = 1
+converged = False
+
+# initializations
+g = Vector()
+R.init_vector(g,0)
+
+a_prev = Function(Va)
+
+print "Nit  cost          misfit        reg         ||grad||       alpha  N backtrack"
+
+while iter <  maxiter and not converged:
+
+    # assemble matrix C
+    CT =  assemble(CT_equ)
+
+    # solve the adoint problem
+    adj_A, adjoint_RHS = assemble_system(a_adj, L_adjoint, bc_adj)
+    solve(adj_A, p.vector(), adjoint_RHS)
+
+    # evaluate the  gradient
+    MG = CT*p.vector() + R * a.vector()
+    solve(M, g, MG)
+
+    # calculate the norm of the gradient
+    grad_norm2 = g.inner(MG)
+    gradnorm = sqrt(grad_norm2)
+    
+    if iter == 1:
+        gradnorm0 = gradnorm
+
+    # linesearch
+    it_backtrack = 0
+    a_prev.assign(a)
+    alpha = 8.e5
+    backtrack_converged = False
+    for it_backtrack in range(20):
+        
+        a.vector().axpy(-alpha, g )
+
+        # solve the state/forward problem
+        state_A, state_b = assemble_system(a_state, L_state, bc_state)
+        solve(state_A, u.vector(), state_b)
+
+        # evaluate cost
+        [cost_new, misfit_new, reg_new] = cost(u, ud, a, W, R)
+
+        # check if Armijo conditions are satisfied
+        if cost_new < cost_old - alpha * c_armijo * grad_norm2:
+            cost_old = cost_new
+            backtrack_converged = True
+            break
+        else:
+            alpha *= 0.5
+            a.assign(a_prev)  # reset a
+            
+    if backtrack_converged == False:
+        print "Backtracking failed. A sufficient descent direction was not found"
+        converged = False
+        break
+
+    sp = ""
+    print "%3d %1s %8.5e %1s %8.5e %1s %8.5e %1s %8.5e %1s %8.5e %1s %3d" % \
+        (iter, sp, cost_new, sp, misfit_new, sp, reg_new, sp, \
+        gradnorm, sp, alpha, sp, it_backtrack)
+
+    if (iter % plot_any)==0 :
+        nb.multi1_plot([a,u,p], ["a","u","p"], same_colorbar=False)
+        plt.show()
+    
+    # check for convergence
+    if gradnorm < tol*gradnorm0 and iter > 1:
+        converged = True
+        print "Steepest descent converged in ",iter,"  iterations"
+        
+    iter += 1
+    
+if not converged:
+    print "Steepest descent did not converge in ", maxiter, " iterations"
+
+print "Time elapsed: ", time.clock()-start
+
+ +
+
+
+ +
+
+ + +
+
+
Nit  cost          misfit        reg         ||grad||       alpha  N backtrack
+  1   4.12912e-06   3.92307e-06   2.06047e-07   1.52436e-05   8.00000e+05     0
+  2   3.91185e-06   3.71829e-06   1.93568e-07   5.19644e-07   8.00000e+05     0
+  3   3.68081e-06   3.49382e-06   1.86995e-07   5.31748e-07   8.00000e+05     0
+  4   3.42693e-06   3.24444e-06   1.82488e-07   5.55925e-07   8.00000e+05     0
+  5   3.14133e-06   2.96148e-06   1.79850e-07   5.87562e-07   8.00000e+05     0
+  6   2.80841e-06   2.62878e-06   1.79630e-07   6.30743e-07   8.00000e+05     0
+  7   2.40676e-06   2.22305e-06   1.83714e-07   6.94667e-07   8.00000e+05     0
+  8   2.34319e-06   2.15417e-06   1.89016e-07   7.08385e-07   4.00000e+05     1
+  9   2.31098e-06   2.11357e-06   1.97411e-07   3.75179e-06   1.00000e+05     3
+ 10   2.13356e-06   1.93649e-06   1.97071e-07   7.18427e-07   4.00000e+05     1
+ 11   2.11625e-06   1.92502e-06   1.91230e-07   4.93037e-07   8.00000e+05     0
+ 12   2.05497e-06   1.78969e-06   2.65277e-07   1.78443e-04   3.12500e+03     8
+ 13   1.92720e-06   1.71949e-06   2.07711e-07   8.13006e-07   8.00000e+05     0
+ 14   1.70543e-06   1.50941e-06   1.96016e-07   6.80580e-07   8.00000e+05     0
+ 15   1.66465e-06   1.48760e-06   1.77047e-07   1.07661e-06   4.00000e+05     1
+ 16   1.47612e-06   1.30469e-06   1.71430e-07   5.77832e-07   8.00000e+05     0
+ 17   1.36002e-06   1.19914e-06   1.60884e-07   4.45571e-07   8.00000e+05     0
+ 18   1.24872e-06   1.08875e-06   1.59972e-07   4.21676e-07   8.00000e+05     0
+ 19   1.15024e-06   9.99942e-07   1.50295e-07   3.94140e-07   8.00000e+05     0
+ 20   1.05586e-06   9.07913e-07   1.47947e-07   3.71410e-07   8.00000e+05     0
+ 21   9.70218e-07   8.29733e-07   1.40485e-07   3.44479e-07   8.00000e+05     0
+ 22   8.90357e-07   7.54311e-07   1.36046e-07   3.31161e-07   8.00000e+05     0
+ 23   8.16142e-07   6.85712e-07   1.30430e-07   3.19565e-07   8.00000e+05     0
+ 24   7.47075e-07   6.21787e-07   1.25289e-07   3.10626e-07   8.00000e+05     0
+ 25   6.82912e-07   5.62863e-07   1.20049e-07   3.02190e-07   8.00000e+05     0
+ 26   6.23367e-07   5.08512e-07   1.14855e-07   2.94583e-07   8.00000e+05     0
+ 27   5.68215e-07   4.58481e-07   1.09734e-07   2.87751e-07   8.00000e+05     0
+ 28   5.17271e-07   4.12554e-07   1.04718e-07   2.81753e-07   8.00000e+05     0
+ 29   4.70373e-07   3.70538e-07   9.98351e-08   2.76704e-07   8.00000e+05     0
+ 30   4.27359e-07   3.32249e-07   9.51099e-08   2.72797e-07   8.00000e+05     0
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
 31   3.88066e-07   2.97485e-07   9.05809e-08   2.70270e-07   8.00000e+05     0
+ 32   3.52335e-07   2.66043e-07   8.62920e-08   2.69387e-07   8.00000e+05     0
+ 33   3.20014e-07   2.37711e-07   8.23029e-08   2.70432e-07   8.00000e+05     0
+ 34   2.90956e-07   2.12280e-07   7.86755e-08   2.73711e-07   8.00000e+05     0
+ 35   2.65025e-07   1.89536e-07   7.54887e-08   2.79549e-07   8.00000e+05     0
+ 36   2.42096e-07   1.69271e-07   7.28250e-08   2.88285e-07   8.00000e+05     0
+ 37   2.22062e-07   1.51267e-07   7.07950e-08   3.00272e-07   8.00000e+05     0
+ 38   2.04845e-07   1.35325e-07   6.95204e-08   3.15880e-07   8.00000e+05     0
+ 39   1.90406e-07   1.21230e-07   6.91760e-08   3.35524e-07   8.00000e+05     0
+ 40   1.78769e-07   1.08800e-07   6.99697e-08   3.59695e-07   8.00000e+05     0
+ 41   1.70056e-07   9.78351e-08   7.22214e-08   3.89027e-07   8.00000e+05     0
+ 42   1.64532e-07   8.81857e-08   7.63467e-08   4.24380e-07   8.00000e+05     0
+ 43   1.62689e-07   7.96695e-08   8.30192e-08   4.66956e-07   8.00000e+05     0
+ 44   1.09608e-07   7.57331e-08   3.38748e-08   5.18458e-07   4.00000e+05     1
+ 45   9.99636e-08   6.86609e-08   3.13027e-08   1.18019e-07   8.00000e+05     0
+ 46   9.14369e-08   6.24236e-08   2.90134e-08   1.15172e-07   8.00000e+05     0
+ 47   8.39581e-08   5.69038e-08   2.70543e-08   1.14751e-07   8.00000e+05     0
+ 48   7.75046e-08   5.20159e-08   2.54887e-08   1.17878e-07   8.00000e+05     0
+ 49   7.21219e-08   4.76686e-08   2.44533e-08   1.26083e-07   8.00000e+05     0
+ 50   6.79608e-08   4.38072e-08   2.41536e-08   1.41234e-07   8.00000e+05     0
+ 51   6.53431e-08   4.03538e-08   2.49893e-08   1.65438e-07   8.00000e+05     0
+ 52   6.48721e-08   3.72823e-08   2.75898e-08   2.01039e-07   8.00000e+05     0
+ 53   5.29760e-08   3.58666e-08   1.71095e-08   2.50847e-07   4.00000e+05     1
+ 54   4.93072e-08   3.32488e-08   1.60584e-08   7.86109e-08   8.00000e+05     0
+ 55   4.61339e-08   3.08946e-08   1.52393e-08   8.10389e-08   8.00000e+05     0
+ 56   4.34895e-08   2.87804e-08   1.47090e-08   8.73400e-08   8.00000e+05     0
+ 57   4.14649e-08   2.68696e-08   1.45952e-08   9.89440e-08   8.00000e+05     0
+ 58   4.02444e-08   2.51518e-08   1.50926e-08   1.17402e-07   8.00000e+05     0
+ 59   4.01684e-08   2.35901e-08   1.65782e-08   1.44415e-07   8.00000e+05     0
+ 60   3.39598e-08   2.28733e-08   1.10865e-08   1.82015e-07   4.00000e+05     1
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
 61   3.21095e-08   2.15268e-08   1.05827e-08   5.64340e-08   8.00000e+05     0
+ 62   3.05061e-08   2.03066e-08   1.01995e-08   5.86188e-08   8.00000e+05     0
+ 63   2.91718e-08   1.91945e-08   9.97732e-09   6.36927e-08   8.00000e+05     0
+ 64   2.81601e-08   1.81860e-08   9.97414e-09   7.26932e-08   8.00000e+05     0
+ 65   2.75758e-08   1.72622e-08   1.03137e-08   8.67461e-08   8.00000e+05     0
+ 66   2.52903e-08   1.68371e-08   8.45323e-09   1.07098e-07   4.00000e+05     1
+ 67   2.41971e-08   1.60318e-08   8.16533e-09   4.07976e-08   8.00000e+05     0
+ 68   2.32259e-08   1.52965e-08   7.92940e-09   4.11483e-08   8.00000e+05     0
+ 69   2.23797e-08   1.46217e-08   7.75796e-09   4.29822e-08   8.00000e+05     0
+ 70   2.16720e-08   1.40049e-08   7.66706e-09   4.69475e-08   8.00000e+05     0
+ 71   2.11336e-08   1.34363e-08   7.69733e-09   5.38000e-08   8.00000e+05     0
+ 72   2.08235e-08   1.29169e-08   7.90656e-09   6.43647e-08   8.00000e+05     0
+ 73   1.95633e-08   1.26726e-08   6.89072e-09   7.95665e-08   4.00000e+05     1
+ 74   1.89622e-08   1.22120e-08   6.75019e-09   3.02314e-08   8.00000e+05     0
+ 75   1.84229e-08   1.17865e-08   6.63632e-09   3.06064e-08   8.00000e+05     0
+ 76   1.79476e-08   1.13947e-08   6.55290e-09   3.20583e-08   8.00000e+05     0
+ 77   1.75446e-08   1.10313e-08   6.51335e-09   3.50594e-08   8.00000e+05     0
+ 78   1.72313e-08   1.06966e-08   6.53468e-09   4.01661e-08   8.00000e+05     0
+ 79   1.70404e-08   1.03844e-08   6.65602e-09   4.79938e-08   8.00000e+05     0
+ 80   1.70306e-08   1.00976e-08   6.93308e-09   5.92358e-08   8.00000e+05     0
+ 81   1.59819e-08   9.96077e-09   6.02113e-09   7.47370e-08   4.00000e+05     1
+ 82   1.56597e-08   9.70246e-09   5.95721e-09   2.33711e-08   8.00000e+05     0
+ 83   1.53711e-08   9.46142e-09   5.90970e-09   2.44529e-08   8.00000e+05     0
+ 84   1.51209e-08   9.23773e-09   5.88321e-09   2.66411e-08   8.00000e+05     0
+ 85   1.49189e-08   9.02799e-09   5.89091e-09   3.03508e-08   8.00000e+05     0
+ 86   1.47831e-08   8.83360e-09   5.94955e-09   3.60495e-08   8.00000e+05     0
+ 87   1.47459e-08   8.65009e-09   6.09581e-09   4.42650e-08   8.00000e+05     0
+ 88   1.41562e-08   8.56446e-09   5.59173e-09   5.56322e-08   4.00000e+05     1
+ 89   1.39477e-08   8.39885e-09   5.54886e-09   1.84078e-08   8.00000e+05     0
+ 90   1.37575e-08   8.24407e-09   5.51347e-09   1.91622e-08   8.00000e+05     0
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
 91   1.35883e-08   8.09823e-09   5.49008e-09   2.06910e-08   8.00000e+05     0
+ 92   1.34455e-08   7.96200e-09   5.48348e-09   2.33065e-08   8.00000e+05     0
+ 93   1.33390e-08   7.83295e-09   5.50604e-09   2.73696e-08   8.00000e+05     0
+ 94   1.32864e-08   7.71274e-09   5.57369e-09   3.32885e-08   8.00000e+05     0
+ 95   1.29492e-08   7.65481e-09   5.29437e-09   4.15454e-08   4.00000e+05     1
+ 96   1.28040e-08   7.54425e-09   5.25971e-09   1.49745e-08   8.00000e+05     0
+ 97   1.26691e-08   7.43963e-09   5.22951e-09   1.54623e-08   8.00000e+05     0
+ 98   1.25462e-08   7.34123e-09   5.20498e-09   1.64799e-08   8.00000e+05     0
+ 99   1.24381e-08   7.24771e-09   5.19042e-09   1.82609e-08   8.00000e+05     0
+100   1.23503e-08   7.15991e-09   5.19043e-09   2.10861e-08   8.00000e+05     0
+101   1.22924e-08   7.07595e-09   5.21646e-09   2.52764e-08   8.00000e+05     0
+102   1.22809e-08   6.99750e-09   5.28337e-09   3.12032e-08   8.00000e+05     0
+103   1.19874e-08   6.95928e-09   5.02815e-09   3.93214e-08   4.00000e+05     1
+104   1.18863e-08   6.88630e-09   5.00000e-09   1.28397e-08   8.00000e+05     0
+105   1.17922e-08   6.81674e-09   4.97549e-09   1.34801e-08   8.00000e+05     0
+106   1.17068e-08   6.75100e-09   4.95582e-09   1.46450e-08   8.00000e+05     0
+107   1.16330e-08   6.68805e-09   4.94496e-09   1.65510e-08   8.00000e+05     0
+108   1.15760e-08   6.62875e-09   4.94723e-09   1.94525e-08   8.00000e+05     0
+109   1.15447e-08   6.57154e-09   4.97312e-09   2.36416e-08   8.00000e+05     0
+110   1.13735e-08   6.54459e-09   4.82887e-09   2.94647e-08   4.00000e+05     1
+111   1.12945e-08   6.49168e-09   4.80287e-09   1.09383e-08   8.00000e+05     0
+112   1.12200e-08   6.44140e-09   4.77863e-09   1.13145e-08   8.00000e+05     0
+113   1.11508e-08   6.39319e-09   4.75759e-09   1.20418e-08   8.00000e+05     0
+114   1.10884e-08   6.34747e-09   4.74091e-09   1.32827e-08   8.00000e+05     0
+115   1.10356e-08   6.30339e-09   4.73221e-09   1.52379e-08   8.00000e+05     0
+116   1.09973e-08   6.26179e-09   4.73549e-09   1.81403e-08   8.00000e+05     0
+117   1.09817e-08   6.22134e-09   4.76034e-09   2.22600e-08   8.00000e+05     0
+118   1.08310e-08   6.20229e-09   4.62875e-09   2.79232e-08   4.00000e+05     1
+119   1.07707e-08   6.16468e-09   4.60602e-09   9.71667e-09   8.00000e+05     0
+120   1.07137e-08   6.12884e-09   4.58489e-09   1.01456e-08   8.00000e+05     0
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
121   1.06609e-08   6.09428e-09   4.56667e-09   1.09206e-08   8.00000e+05     0
+122   1.06139e-08   6.06144e-09   4.55243e-09   1.21955e-08   8.00000e+05     0
+123   1.05751e-08   6.02958e-09   4.54548e-09   1.41562e-08   8.00000e+05     0
+124   1.05490e-08   5.99949e-09   4.54949e-09   1.70176e-08   8.00000e+05     0
+125   1.05432e-08   5.97001e-09   4.57319e-09   2.10316e-08   8.00000e+05     0
+126   1.04095e-08   5.95615e-09   4.45338e-09   2.65075e-08   4.00000e+05     1
+127   1.03623e-08   5.92865e-09   4.43361e-09   8.73646e-09   8.00000e+05     0
+128   1.03177e-08   5.90238e-09   4.41531e-09   9.19677e-09   8.00000e+05     0
+129   1.02766e-08   5.87693e-09   4.39966e-09   9.99658e-09   8.00000e+05     0
+130   1.02404e-08   5.85271e-09   4.38766e-09   1.12807e-08   8.00000e+05     0
+131   1.02114e-08   5.82909e-09   4.38232e-09   1.32215e-08   8.00000e+05     0
+132   1.01938e-08   5.80679e-09   4.38700e-09   1.60186e-08   8.00000e+05     0
+133   1.01147e-08   5.79572e-09   4.31896e-09   1.99082e-08   4.00000e+05     1
+134   1.00753e-08   5.77446e-09   4.30080e-09   7.65990e-09   8.00000e+05     0
+135   1.00376e-08   5.75387e-09   4.28378e-09   7.91563e-09   8.00000e+05     0
+136   1.00023e-08   5.73418e-09   4.26810e-09   8.39475e-09   8.00000e+05     0
+137   9.96988e-09   5.71502e-09   4.25486e-09   9.20547e-09   8.00000e+05     0
+138   9.94172e-09   5.69678e-09   4.24494e-09   1.04837e-08   8.00000e+05     0
+139   9.91998e-09   5.67889e-09   4.24109e-09   1.23898e-08   8.00000e+05     0
+140   9.90840e-09   5.66202e-09   4.24638e-09   1.51096e-08   8.00000e+05     0
+141   9.83844e-09   5.65359e-09   4.18486e-09   1.88654e-08   4.00000e+05     1
+142   9.80675e-09   5.63742e-09   4.16933e-09   6.93785e-09   8.00000e+05     0
+143   9.77655e-09   5.62172e-09   4.15484e-09   7.20822e-09   8.00000e+05     0
+144   9.74825e-09   5.60668e-09   4.14157e-09   7.69826e-09   8.00000e+05     0
+145   9.72251e-09   5.59199e-09   4.13051e-09   8.51101e-09   8.00000e+05     0
+146   9.70049e-09   5.57801e-09   4.12249e-09   9.77389e-09   8.00000e+05     0
+147   9.68421e-09   5.56423e-09   4.11998e-09   1.16360e-08   8.00000e+05     0
+148   9.67705e-09   5.55125e-09   4.12579e-09   1.42709e-08   8.00000e+05     0
+149   9.61498e-09   5.54472e-09   4.07026e-09   1.78886e-08   4.00000e+05     1
+150   9.58933e-09   5.53223e-09   4.05710e-09   6.30682e-09   8.00000e+05     0
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
151   9.56494e-09   5.52006e-09   4.04488e-09   6.58724e-09   8.00000e+05     0
+152   9.54217e-09   5.50840e-09   4.03377e-09   7.08297e-09   8.00000e+05     0
+153   9.52164e-09   5.49697e-09   4.02467e-09   7.89179e-09   8.00000e+05     0
+154   9.50440e-09   5.48610e-09   4.01831e-09   9.13292e-09   8.00000e+05     0
+155   9.49229e-09   5.47533e-09   4.01696e-09   1.09452e-08   8.00000e+05     0
+156   9.48839e-09   5.46522e-09   4.02317e-09   1.34912e-08   8.00000e+05     0
+157   9.43319e-09   5.46009e-09   3.97310e-09   1.69693e-08   4.00000e+05     1
+158   9.41235e-09   5.45031e-09   3.96204e-09   5.74841e-09   8.00000e+05     0
+159   9.39258e-09   5.44075e-09   3.95182e-09   6.03570e-09   8.00000e+05     0
+160   9.37420e-09   5.43160e-09   3.94260e-09   6.53342e-09   8.00000e+05     0
+161   9.35779e-09   5.42259e-09   3.93519e-09   7.33405e-09   8.00000e+05     0
+162   9.34430e-09   5.41404e-09   3.93027e-09   8.54904e-09   8.00000e+05     0
+163   9.33541e-09   5.40552e-09   3.92989e-09   1.03078e-08   8.00000e+05     0
+164   9.33392e-09   5.39755e-09   3.93637e-09   1.27628e-08   8.00000e+05     0
+165   9.28473e-09   5.39347e-09   3.89126e-09   1.61019e-08   4.00000e+05     1
+166   9.26775e-09   5.38573e-09   3.88202e-09   5.25028e-09   8.00000e+05     0
+167   9.25168e-09   5.37815e-09   3.87353e-09   5.54211e-09   8.00000e+05     0
+168   9.23683e-09   5.37089e-09   3.86594e-09   6.03902e-09   8.00000e+05     0
+169   9.22370e-09   5.36372e-09   3.85998e-09   6.82830e-09   8.00000e+05     0
+170   9.21318e-09   5.35692e-09   3.85626e-09   8.01399e-09   8.00000e+05     0
+171   9.20678e-09   5.35011e-09   3.85667e-09   9.71684e-09   8.00000e+05     0
+172   9.17766e-09   5.34693e-09   3.83073e-09   1.20802e-08   4.00000e+05     1
+173   9.16317e-09   5.34051e-09   3.82266e-09   4.64321e-09   8.00000e+05     0
+174   9.14930e-09   5.33433e-09   3.81497e-09   4.80358e-09   8.00000e+05     0
+175   9.13622e-09   5.32826e-09   3.80796e-09   5.09812e-09   8.00000e+05     0
+176   9.12420e-09   5.32245e-09   3.80175e-09   5.59205e-09   8.00000e+05     0
+177   9.11370e-09   5.31669e-09   3.79701e-09   6.36754e-09   8.00000e+05     0
+178   9.10553e-09   5.31125e-09   3.79429e-09   7.52168e-09   8.00000e+05     0
+179   9.10107e-09   5.30577e-09   3.79530e-09   9.16725e-09   8.00000e+05     0
+180   9.07529e-09   5.30322e-09   3.77207e-09   1.14392e-08   4.00000e+05     1
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
181   9.06341e-09   5.29805e-09   3.76537e-09   4.23710e-09   8.00000e+05     0
+182   9.05208e-09   5.29307e-09   3.75900e-09   4.40153e-09   8.00000e+05     0
+183   9.04141e-09   5.28818e-09   3.75324e-09   4.69727e-09   8.00000e+05     0
+184   9.03168e-09   5.28350e-09   3.74818e-09   5.18644e-09   8.00000e+05     0
+185   9.02330e-09   5.27884e-09   3.74445e-09   5.94620e-09   8.00000e+05     0
+186   9.01700e-09   5.27446e-09   3.74254e-09   7.06715e-09   8.00000e+05     0
+187   9.01403e-09   5.27001e-09   3.74402e-09   8.65475e-09   8.00000e+05     0
+188   8.99116e-09   5.26796e-09   3.72320e-09   1.08363e-08   4.00000e+05     1
+189   8.98142e-09   5.26377e-09   3.71765e-09   3.87124e-09   8.00000e+05     0
+190   8.97214e-09   5.25975e-09   3.71239e-09   4.03868e-09   8.00000e+05     0
+191   8.96345e-09   5.25577e-09   3.70767e-09   4.33437e-09   8.00000e+05     0
+192   8.95556e-09   5.25198e-09   3.70358e-09   4.81729e-09   8.00000e+05     0
+193   8.94888e-09   5.24820e-09   3.70069e-09   5.55981e-09   8.00000e+05     0
+194   8.94407e-09   5.24464e-09   3.69943e-09   6.64641e-09   8.00000e+05     0
+195   8.94225e-09   5.24102e-09   3.70123e-09   8.17589e-09   8.00000e+05     0
+196   8.92193e-09   5.23935e-09   3.68257e-09   1.02684e-08   4.00000e+05     1
+197   8.91393e-09   5.23595e-09   3.67799e-09   3.54097e-09   8.00000e+05     0
+198   8.90633e-09   5.23267e-09   3.67366e-09   3.71054e-09   8.00000e+05     0
+199   8.89924e-09   5.22943e-09   3.66980e-09   4.00510e-09   8.00000e+05     0
+200   8.89286e-09   5.22635e-09   3.66651e-09   4.48053e-09   8.00000e+05     0
+201   8.88755e-09   5.22326e-09   3.66430e-09   5.20457e-09   8.00000e+05     0
+202   8.88391e-09   5.22036e-09   3.66355e-09   6.25608e-09   8.00000e+05     0
+203   8.88297e-09   5.21740e-09   3.66557e-09   7.72769e-09   8.00000e+05     0
+204   8.86489e-09   5.21604e-09   3.64885e-09   9.73305e-09   4.00000e+05     1
+205   8.85832e-09   5.21326e-09   3.64506e-09   3.24237e-09   8.00000e+05     0
+206   8.85209e-09   5.21059e-09   3.64150e-09   3.41327e-09   8.00000e+05     0
+207   8.84630e-09   5.20793e-09   3.63837e-09   3.70577e-09   8.00000e+05     0
+208   8.84115e-09   5.20542e-09   3.63573e-09   4.17265e-09   8.00000e+05     0
+209   8.83694e-09   5.20288e-09   3.63406e-09   4.87728e-09   8.00000e+05     0
+210   8.83423e-09   5.20052e-09   3.63372e-09   5.89326e-09   8.00000e+05     0
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
211   8.83395e-09   5.19808e-09   3.63588e-09   7.30761e-09   8.00000e+05     0
+212   8.81785e-09   5.19698e-09   3.62087e-09   9.22790e-09   4.00000e+05     1
+213   8.81245e-09   5.19469e-09   3.61776e-09   2.97202e-09   8.00000e+05     0
+214   8.80734e-09   5.19250e-09   3.61484e-09   3.14357e-09   8.00000e+05     0
+215   8.80261e-09   5.19032e-09   3.61229e-09   3.43318e-09   8.00000e+05     0
+216   8.79845e-09   5.18826e-09   3.61019e-09   3.89063e-09   8.00000e+05     0
+217   8.79514e-09   5.18617e-09   3.60896e-09   4.57513e-09   8.00000e+05     0
+218   8.79316e-09   5.18424e-09   3.60892e-09   5.55543e-09   8.00000e+05     0
+219   8.78366e-09   5.18323e-09   3.60043e-09   6.91337e-09   4.00000e+05     1
+220   8.77902e-09   5.18133e-09   3.59769e-09   2.63254e-09   8.00000e+05     0
+221   8.77457e-09   5.17945e-09   3.59512e-09   2.72698e-09   8.00000e+05     0
+222   8.77038e-09   5.17765e-09   3.59273e-09   2.89856e-09   8.00000e+05     0
+223   8.76653e-09   5.17586e-09   3.59067e-09   3.18458e-09   8.00000e+05     0
+224   8.76317e-09   5.17417e-09   3.58900e-09   3.63186e-09   8.00000e+05     0
+225   8.76057e-09   5.17244e-09   3.58813e-09   4.29570e-09   8.00000e+05     0
+226   8.75918e-09   5.17086e-09   3.58832e-09   5.24040e-09   8.00000e+05     0
+227   8.75076e-09   5.17003e-09   3.58073e-09   6.54302e-09   4.00000e+05     1
+228   8.74693e-09   5.16846e-09   3.57847e-09   2.40880e-09   8.00000e+05     0
+229   8.74328e-09   5.16691e-09   3.57636e-09   2.50462e-09   8.00000e+05     0
+230   8.73984e-09   5.16544e-09   3.57440e-09   2.67570e-09   8.00000e+05     0
+231   8.73669e-09   5.16395e-09   3.57274e-09   2.95750e-09   8.00000e+05     0
+232   8.73399e-09   5.16256e-09   3.57143e-09   3.39400e-09   8.00000e+05     0
+233   8.73197e-09   5.16114e-09   3.57083e-09   4.03683e-09   8.00000e+05     0
+234   8.73103e-09   5.15984e-09   3.57119e-09   4.94617e-09   8.00000e+05     0
+235   8.72356e-09   5.15915e-09   3.56441e-09   6.19471e-09   4.00000e+05     1
+236   8.72040e-09   5.15786e-09   3.56255e-09   2.20588e-09   8.00000e+05     0
+237   8.71739e-09   5.15658e-09   3.56081e-09   2.30267e-09   8.00000e+05     0
+238   8.71457e-09   5.15536e-09   3.55921e-09   2.47275e-09   8.00000e+05     0
+239   8.71201e-09   5.15414e-09   3.55787e-09   2.74977e-09   8.00000e+05     0
+240   8.70984e-09   5.15299e-09   3.55685e-09   3.17499e-09   8.00000e+05     0
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+
241   8.70828e-09   5.15181e-09   3.55647e-09   3.79662e-09   8.00000e+05     0
+242   8.70769e-09   5.15075e-09   3.55694e-09   4.67102e-09   8.00000e+05     0
+243   8.70106e-09   5.15017e-09   3.55089e-09   5.86683e-09   4.00000e+05     1
+244   8.69845e-09   5.14911e-09   3.54935e-09   2.02168e-09   8.00000e+05     0
+245   8.69597e-09   5.14805e-09   3.54792e-09   2.11906e-09   8.00000e+05     0
+246   8.69366e-09   5.14705e-09   3.54661e-09   2.28771e-09   8.00000e+05     0
+247   8.69158e-09   5.14603e-09   3.54554e-09   2.55948e-09   8.00000e+05     0
+248   8.68984e-09   5.14509e-09   3.54475e-09   2.97304e-09   8.00000e+05     0
+249   8.68865e-09   5.14411e-09   3.54454e-09   3.57338e-09   8.00000e+05     0
+250   8.68833e-09   5.14324e-09   3.54509e-09   4.41338e-09   8.00000e+05     0
+251   8.68243e-09   5.14276e-09   3.53968e-09   5.55793e-09   4.00000e+05     1
+252   8.68028e-09   5.14188e-09   3.53840e-09   1.85435e-09   8.00000e+05     0
+253   8.67824e-09   5.14100e-09   3.53724e-09   1.95198e-09   8.00000e+05     0
+254   8.67634e-09   5.14017e-09   3.53616e-09   2.11881e-09   8.00000e+05     0
+255   8.67465e-09   5.13933e-09   3.53531e-09   2.38494e-09   8.00000e+05     0
+256   8.67326e-09   5.13856e-09   3.53470e-09   2.78653e-09   8.00000e+05     0
+257   8.67236e-09   5.13774e-09   3.53462e-09   3.36562e-09   8.00000e+05     0
+258   8.67225e-09   5.13703e-09   3.53523e-09   4.17188e-09   8.00000e+05     0
+259   8.66700e-09   5.13663e-09   3.53038e-09   5.26671e-09   4.00000e+05     1
+260   8.66523e-09   5.13590e-09   3.52933e-09   1.70224e-09   8.00000e+05     0
+261   8.66354e-09   5.13518e-09   3.52837e-09   1.79982e-09   8.00000e+05     0
+262   8.66199e-09   5.13449e-09   3.52749e-09   1.96448e-09   8.00000e+05     0
+263   8.66061e-09   5.13379e-09   3.52682e-09   2.22461e-09   8.00000e+05     0
+264   8.65951e-09   5.13316e-09   3.52636e-09   2.61402e-09   8.00000e+05     0
+265   8.65885e-09   5.13248e-09   3.52637e-09   3.17200e-09   8.00000e+05     0
+266   8.65575e-09   5.13218e-09   3.52356e-09   3.94526e-09   4.00000e+05     1
+267   8.65421e-09   5.13155e-09   3.52266e-09   1.51032e-09   8.00000e+05     0
+Steepest descent converged in  267   iterations
+Time elapsed:  29.952854
+
+
+
+ +
+
+ +
+
+
+
In [9]:
+
+
+
nb.multi1_plot([atrue, a], ["atrue", "a"])
+nb.multi1_plot([u,p], ["u","p"], same_colorbar=False)
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+ + diff --git a/PoissonDeterministic-SD.ipynb b/PoissonDeterministic-SD.ipynb new file mode 100644 index 0000000..c816fd5 --- /dev/null +++ b/PoissonDeterministic-SD.ipynb @@ -0,0 +1,498 @@ +{ + "metadata": { + "name": "" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example: Coefficient field inversion in an elliptic partial differential equation\n", + "\n", + "We consider the estimation of a coefficient in an elliptic partial\n", + "differential equation as a first model problem. Depending on the\n", + "interpretation of the unknowns and the type of measurements, this\n", + "model problem arises, for instance, in inversion for groundwater flow\n", + "or heat conductivity. It can also be interpreted as finding a\n", + "membrane with a certain spatially varying stiffness. Let\n", + "$\\Omega\\subset\\mathbb{R}^n$, $n\\in\\{1,2,3\\}$ be an open, bounded\n", + "domain and consider the following problem:\n", + "\n", + "$$\n", + "\\min_{a} J(a):=\\frac{1}{2}\\int_\\Omega (u-u_d)^2\\, dx + \\frac{\\gamma}{2}\\int_\\Omega|\\nabla a|^2\\,dx,\n", + "$$\n", + "\n", + "where $u$ is the solution of\n", + "\n", + "$$\n", + "\\begin{split}\n", + "\\quad -\\nabla\\cdot(a\\nabla u) &= f \\text{ in }\\Omega,\\\\\n", + "u &= 0 \\text{ on }\\partial\\Omega.\n", + "\\end{split}\n", + "$$\n", + "\n", + "Here $a\\in U_{ad}:=\\{a\\in L^{\\infty}(\\Omega)\\}$ the unknown coefficient field, $u_d$ denotes (possibly noisy) data, $f\\in H^{-1}(\\Omega)$ a given force, and $\\gamma\\ge 0$ the regularization parameter.\n", + "\n", + "### The variational (or weak) form of the state equation:\n", + "\n", + "Find $u\\in H_0^1(\\Omega)$ such that $(a\\nabla u,\\nabla v) - (f,v) = 0, \\text{ for all } v\\in H_0^1(\\Omega),$\n", + "where $H_0^1(\\Omega)$ is the space of functions vanishing on $\\partial\\Omega$ with square integrable derivatives. Here, $(\\cdot\\,,\\cdot)$ denotes the $L^2$-inner product, i.e, for scalar functions $u,v$ defined on $\\Omega$ we denote $(u,v) := \\int_\\Omega u(x) v(x) \\,dx$.\n", + "\n", + "### Optimality System:\n", + "\n", + "The Lagrangian functional $\\mathscr{L}:L^\\infty(\\Omega)\\times H_0^1(\\Omega)\\times H_0^1(\\Omega)\\rightarrow \\mathbb{R}$, which we use as a tool to derive the optimality system, is given by\n", + "\n", + "$$\n", + "\\mathscr{L}(a,u,p):= \\frac{1}{2}(u-u_d,u-u_d) +\n", + "\\frac{\\gamma}{2}(\\nabla a, \\nabla a) + (a\\nabla u,\\nabla p) - (f,p).\n", + "$$\n", + "\n", + "The Lagrange multiplier theory shows that, at a solution all variations of the Lagrangian functional with respect to all variables must vanish. These variations of $\\mathscr{L}$ with respect to $(p,u,a)$ in directions $(\\tilde{u}, \\tilde{p}, \\tilde{a})$ are given by\n", + "\n", + "$$\n", + " \\begin{alignat}{2}\n", + " \\mathscr{L}_p(a,u,p)(\\tilde{p}) &= (a\\nabla u, \\nabla \\tilde{p}) -\n", + " (f,\\tilde{p}) &&= 0,\\\\\n", + " \\mathscr{L}_u(a,u,p)(\\tilde{u}) &= (a\\nabla p, \\nabla \\tilde{u}) +\n", + " (u-u_d,\\tilde{u}) && = 0,\\\\\n", + " \\mathscr{L}_a(a,u,p)(\\tilde{a}) &= \\gamma(\\nabla a, \\nabla \\tilde{a}) +\n", + " (\\tilde{a}\\nabla u, \\nabla p) &&= 0,\n", + " \\end{alignat}\n", + "$$\n", + "\n", + "where the variations $(\\tilde{u}, \\tilde{p}, \\tilde{a})$ are taken from the same spaces as $(u,p,a)$. \n", + "\n", + "The gradient of the cost functional $\\mathcal{J}(a)$ therefore is\n", + "\n", + "$$\n", + " \\mathcal{G}(a)(\\tilde a) = \\gamma(\\nabla a, \\nabla \\tilde{a}) +\n", + " (\\tilde{a}\\nabla u, \\nabla \\tilde{p}).\n", + "$$\n", + "\n", + "### Goals:\n", + "\n", + "By the end of this notebook, you should be able to:\n", + "\n", + "- solve the forward and adjoint Poisson equations\n", + "- understand the inverse method framework\n", + "- visualise and understand the results\n", + "- modify the problem and code\n", + "\n", + "### Mathematical tools used:\n", + "\n", + "- Finite element method\n", + "- Derivation of gradiant via the adjoint method\n", + "- Armijo line search\n", + "\n", + "### List of software used:\n", + "\n", + "- FEniCS, a parallel finite element element library for the discretization of partial differential equations\n", + "- PETSc, for scalable and efficient linear algebra operations and solvers\n", + "- Matplotlib, a python package used for plotting the results\n", + "- Numpy, a python package for linear algebra" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up\n", + "\n", + "### Import dependencies" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from dolfin import *\n", + "\n", + "import numpy as np\n", + "import time\n", + "import logging\n", + "\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import nb\n", + "\n", + "start = time.clock()\n", + "\n", + "logging.getLogger('FFC').setLevel(logging.WARNING)\n", + "logging.getLogger('UFL').setLevel(logging.WARNING)\n", + "set_log_active(False)\n", + "\n", + "np.random.seed(seed=1)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model set up:\n", + "\n", + "As in the introduction, the first thing we need to do is to set up the numerical model.\n", + "\n", + "In this cell, we set the mesh ``mesh``, the finite element spaces ``Va`` and ``Vu`` corresponding to the parameter space and state/adjoint space, respectively. In particular, we use linear finite elements for the parameter space, and quadratic elements for the state/adjoint space.\n", + "\n", + "The true parameter ``atrue`` is the finite element interpolant of the function\n", + "$$ a_{\\rm true} = \\left\\{ \\begin{array}{l} 4 \\; \\forall \\,(x,y) \\, {\\rm s.t.}\\, \\sqrt{ (x-.5)^2 + (y-.5)^2} \\leq 0.2 \\\\ 8 \\; {\\rm otherwise}. \\end{array}\\right. $$\n", + "\n", + "The forcing term ``f`` and the boundary conditions ``u0`` for the forward problem are\n", + "$$ f = 1 \\; \\forall {\\bf x} \\in \\Omega, \\quad u = 0 \\; \\forall {\\bf x} \\in \\partial \\Omega. $$" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# create mesh and define function spaces\n", + "nx = 32\n", + "ny = 32\n", + "mesh = UnitSquareMesh(nx, ny)\n", + "Va = FunctionSpace(mesh, 'Lagrange', 1)\n", + "Vu = FunctionSpace(mesh, 'Lagrange', 2)\n", + "\n", + "# The true and inverted parameter\n", + "atrue = interpolate(Expression('8. - 4.*(pow(x[0] - 0.5,2) + pow(x[1] - 0.5,2) < pow(0.2,2))'), Va)\n", + "a = interpolate(Expression(\"4.\"),Va)\n", + "\n", + "# define function for state and adjoint\n", + "u = Function(Vu)\n", + "p = Function(Vu)\n", + "\n", + "# define Trial and Test Functions\n", + "u_trial, p_trial, a_trial = TrialFunction(Vu), TrialFunction(Vu), TrialFunction(Va)\n", + "u_test, p_test, a_test = TestFunction(Vu), TestFunction(Vu), TestFunction(Va)\n", + "\n", + "# initialize input functions\n", + "f = Constant(\"1.0\")\n", + "u0 = Constant(\"0.0\")\n", + "\n", + "# plot\n", + "plt.figure(figsize=(15,5))\n", + "nb.plot(mesh,subplot_loc=121, mytitle=\"Mesh\", show_axis='on')\n", + "nb.plot(atrue,subplot_loc=122, mytitle=\"True parameter field\")\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# set up dirichlet boundary conditions\n", + "def boundary(x,on_boundary):\n", + " return on_boundary\n", + "\n", + "bc_state = DirichletBC(Vu, u0, boundary)\n", + "bc_adj = DirichletBC(Vu, Constant(0.), boundary)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The cost functional evaluation:\n", + "\n", + "$$\n", + "J(a):=\\underbrace{\\frac{1}{2}\\int_\\Omega (u-u_d)^2\\, dx}_{\\text misfit} + \\underbrace{\\frac{\\gamma}{2}\\int_\\Omega|\\nabla a|^2\\,dx}_{\\text reg}\n", + "$$\n", + "\n", + "In the code below, $W$ and $R$ are symmetric positive definite matrices that stem from finite element discretization of the misfit and regularization component of the cost functional, respectively." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# Regularization parameter\n", + "gamma = 1e-10\n", + "\n", + "# weak for for setting up the misfit and regularization compoment of the cost\n", + "W_equ = inner(u_trial, u_test) * dx\n", + "R_equ = gamma * inner(nabla_grad(a_trial), nabla_grad(a_test)) * dx\n", + "\n", + "W = assemble(W_equ)\n", + "R = assemble(R_equ)\n", + "\n", + "# Define cost function\n", + "def cost(u, ud, a, W, R):\n", + " diff = u.vector() - ud.vector()\n", + " reg = 0.5 * a.vector().inner(R*a.vector() ) \n", + " misfit = 0.5 * diff.inner(W * diff)\n", + " return [reg + misfit, misfit, reg]" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up synthetic observations:\n", + "\n", + "To generate the synthetic observation we first solve the PDE for the state variable ``utrue`` corresponding to the true parameter ``atrue``.\n", + "More specifically, we solve the variational problem\n", + "\n", + "Find $u\\in H_0^1(\\Omega)$ such that $\\underbrace{(a_{\\text true} \\nabla u,\\nabla v)}_{\\; := \\; a_{\\rm goal}} - \\underbrace{(f,v)}_{\\; := \\;L_{\\rm goal}} = 0, \\text{ for all } v\\in H_0^1(\\Omega)$.\n", + "\n", + "Then we perturb the true state variable and write the observation ``ud`` as\n", + "$$ u_{d} = u_{\\rm true} + \\eta, \\quad {\\rm where} \\; \\eta \\sim \\mathcal{N}(0, \\sigma^2).$$\n", + "Here the standard variation $\\sigma$ is proportional to ``noise_level``." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# noise level\n", + "noise_level = 0.01\n", + "\n", + "# weak form for setting up the synthetic observations\n", + "a_goal = inner( atrue * nabla_grad(u_trial), nabla_grad(u_test)) * dx\n", + "L_goal = f * u_test * dx\n", + "\n", + "# solve the forward/state problem to generate synthetic observations\n", + "goal_A, goal_b = assemble_system(a_goal, L_goal, bc_state)\n", + "\n", + "utrue = Function(Vu)\n", + "solve(goal_A, utrue.vector(), goal_b)\n", + "\n", + "ud = Function(Vu)\n", + "ud.assign(utrue)\n", + "\n", + "# perturb state solution and create synthetic measurements ud\n", + "# ud = u + ||u||/SNR * random.normal\n", + "MAX = ud.vector().norm(\"linf\")\n", + "noise = Vector()\n", + "goal_A.init_vector(noise,1)\n", + "noise.set_local( noise_level * MAX * np.random.normal(0, 1, len(ud.vector().array())) )\n", + "bc_adj.apply(noise)\n", + "\n", + "ud.vector().axpy(1., noise)\n", + "\n", + "# plot\n", + "nb.multi1_plot([utrue, ud], [\"State solution with atrue\", \"Synthetic observations\"])\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setting up the state equations, right hand side for the adjoint and the necessary matrices:\n", + "\n", + "$$\n", + " \\begin{alignat}{2}\n", + " \\mathscr{L}_p(a,u,p)(\\tilde{p}) &= (a\\nabla u, \\nabla \\tilde{p}) -\n", + " (f,\\tilde{p}) &&= 0,\\\\\n", + " \\mathscr{L}_u(a,u,p)(\\tilde{u}) &= (a\\nabla p, \\nabla \\tilde{u}) +\n", + " (u-u_d,\\tilde{u}) && = 0,\\\\\n", + " \\mathscr{L}_a(a,u,p)(\\tilde{a}) &= \\gamma(\\nabla a, \\nabla \\tilde{a}) +\n", + " (\\tilde{a}\\nabla u, \\nabla p) &&= 0,\n", + " \\end{alignat}\n", + "$$" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# weak form for setting up the state equation\n", + "a_state = inner( a * nabla_grad(u_trial), nabla_grad(u_test)) * dx\n", + "L_state = f * u_test * dx\n", + "\n", + "# weak form for setting up the adjoint equations\n", + "a_adj = inner( a * nabla_grad(p_trial), nabla_grad(p_test) ) * dx\n", + "L_adjoint = -inner(u - ud, u_test) * dx\n", + "\n", + "\n", + "# weak form for setting up matrices\n", + "CT_equ = inner(a_test * nabla_grad(u), nabla_grad(p_trial)) * dx\n", + "M_equ = inner(a_trial, a_test) * dx\n", + "\n", + "\n", + "# assemble matrices M\n", + "M = assemble(M_equ)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initial guess\n", + "We solve the state equation and compute the cost functional for the initial guess of the parameter ``a_ini``" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# solve state equation\n", + "A, state_b = assemble_system (a_state, L_state, bc_state)\n", + "solve (A, u.vector(), state_b)\n", + "\n", + "# evaluate cost\n", + "[cost_old, misfit_old, reg_old] = cost(u, ud, a, W, R)\n", + "\n", + "# plot\n", + "plt.figure(figsize=(15,5))\n", + "nb.plot(a,subplot_loc=121, mytitle=\"a_ini\", vmin=atrue.vector().min(), vmax=atrue.vector().max())\n", + "nb.plot(u,subplot_loc=122, mytitle=\"u(a_ini)\")\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The steepest descent with Armijo line search:\n", + "\n", + "We solve the constrained optimization problem using the steepest descent method with Armijo line search.\n", + "\n", + "The stopping criterion is based on a relative reduction of the norm of the gradient (i.e. $\\frac{\\|g_{n}\\|}{\\|g_{0}\\|} \\leq \\tau$).\n", + "\n", + "The gradient is computed by solving the state and adjoint equation for the current parameter $a$, and then substituing the current state $u$, parameter $a$ and adjoint $p$ variables in the weak form expression of the gradient:\n", + "$$ (g, \\tilde{a}) = \\gamma(\\nabla a, \\nabla \\tilde{a}) +(\\tilde{a}\\nabla u, \\nabla p).$$\n", + "\n", + "The Armijo line search uses backtracking to find $\\alpha$ such that a sufficient reduction in the cost functional is achieved.\n", + "More specifically, we use backtracking to find $\\alpha$ such that:\n", + "$$J( a - \\alpha g ) \\leq J(a) - \\alpha c_{\\rm armijo} (g,g). $$\n" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# define parameters for the optimization\n", + "tol = 1e-4\n", + "maxiter = 1000\n", + "plot_any = 30\n", + "c_armijo = 1e-5\n", + "\n", + "# initialize iter counters\n", + "iter = 1\n", + "converged = False\n", + "\n", + "# initializations\n", + "g = Vector()\n", + "R.init_vector(g,0)\n", + "\n", + "a_prev = Function(Va)\n", + "\n", + "print \"Nit cost misfit reg ||grad|| alpha N backtrack\"\n", + "\n", + "while iter < maxiter and not converged:\n", + "\n", + " # assemble matrix C\n", + " CT = assemble(CT_equ)\n", + "\n", + " # solve the adoint problem\n", + " adj_A, adjoint_RHS = assemble_system(a_adj, L_adjoint, bc_adj)\n", + " solve(adj_A, p.vector(), adjoint_RHS)\n", + "\n", + " # evaluate the gradient\n", + " MG = CT*p.vector() + R * a.vector()\n", + " solve(M, g, MG)\n", + "\n", + " # calculate the norm of the gradient\n", + " grad_norm2 = g.inner(MG)\n", + " gradnorm = sqrt(grad_norm2)\n", + " \n", + " if iter == 1:\n", + " gradnorm0 = gradnorm\n", + "\n", + " # linesearch\n", + " it_backtrack = 0\n", + " a_prev.assign(a)\n", + " alpha = 8.e5\n", + " backtrack_converged = False\n", + " for it_backtrack in range(20):\n", + " \n", + " a.vector().axpy(-alpha, g )\n", + "\n", + " # solve the state/forward problem\n", + " state_A, state_b = assemble_system(a_state, L_state, bc_state)\n", + " solve(state_A, u.vector(), state_b)\n", + "\n", + " # evaluate cost\n", + " [cost_new, misfit_new, reg_new] = cost(u, ud, a, W, R)\n", + "\n", + " # check if Armijo conditions are satisfied\n", + " if cost_new < cost_old - alpha * c_armijo * grad_norm2:\n", + " cost_old = cost_new\n", + " backtrack_converged = True\n", + " break\n", + " else:\n", + " alpha *= 0.5\n", + " a.assign(a_prev) # reset a\n", + " \n", + " if backtrack_converged == False:\n", + " print \"Backtracking failed. A sufficient descent direction was not found\"\n", + " converged = False\n", + " break\n", + "\n", + " sp = \"\"\n", + " print \"%3d %1s %8.5e %1s %8.5e %1s %8.5e %1s %8.5e %1s %8.5e %1s %3d\" % \\\n", + " (iter, sp, cost_new, sp, misfit_new, sp, reg_new, sp, \\\n", + " gradnorm, sp, alpha, sp, it_backtrack)\n", + "\n", + " if (iter % plot_any)==0 :\n", + " nb.multi1_plot([a,u,p], [\"a\",\"u\",\"p\"], same_colorbar=False)\n", + " plt.show()\n", + " \n", + " # check for convergence\n", + " if gradnorm < tol*gradnorm0 and iter > 1:\n", + " converged = True\n", + " print \"Steepest descent converged in \",iter,\" iterations\"\n", + " \n", + " iter += 1\n", + " \n", + "if not converged:\n", + " print \"Steepest descent did not converge in \", maxiter, \" iterations\"\n", + "\n", + "print \"Time elapsed: \", time.clock()-start" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "nb.multi1_plot([atrue, a], [\"atrue\", \"a\"])\n", + "nb.multi1_plot([u,p], [\"u\",\"p\"], same_colorbar=False)\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/PoissonDeterministic-SD.py b/PoissonDeterministic-SD.py new file mode 100644 index 0000000..03d7750 --- /dev/null +++ b/PoissonDeterministic-SD.py @@ -0,0 +1,246 @@ +# Coefficient field inversion in an elliptic partial differential equation +# +# Consider the following problem: +# +# min_a J(a):=1/2 int_Omega (u-ud)^2 dx +gamma/2 int_Omega | grad a|^2 dx +# +# where u is the solution of +# +# -div (a grad u) = f in Omega, +# u = 0 on partial Omega. +# +# Here a the unknown coefficient field, ud denotes (possibly noisy) data, $f\in H^{-1}(Omega)$ a given force, and $ gamma >= 0$ the regularization parameter. + +# 1. Import dependencies + +from dolfin import * + +import numpy as np +import time +import logging + +import matplotlib.pyplot as plt +import nb + +start = time.clock() + +logging.getLogger('FFC').setLevel(logging.WARNING) +logging.getLogger('UFL').setLevel(logging.WARNING) +set_log_active(False) + +np.random.seed(seed=1) + + + +# 2. Model set up: + +# create mesh and define function spaces +nx = 32 +ny = 32 +mesh = UnitSquareMesh(nx, ny) +Va = FunctionSpace(mesh, 'Lagrange', 1) +Vu = FunctionSpace(mesh, 'Lagrange', 2) + +# The true and inverted parameter +atrue = interpolate(Expression('8. - 4.*(pow(x[0] - 0.5,2) + pow(x[1] - 0.5,2) < pow(0.2,2))'), Va) +a = interpolate(Expression("4."),Va) + +# define function for state and adjoint +u = Function(Vu) +p = Function(Vu) + +# define Trial and Test Functions +u_trial, p_trial, a_trial = TrialFunction(Vu), TrialFunction(Vu), TrialFunction(Va) +u_test, p_test, a_test = TestFunction(Vu), TestFunction(Vu), TestFunction(Va) + +# initialize input functions +f = Constant("1.0") +u0 = Constant("0.0") + +# plot +plt.figure(figsize=(15,5)) +nb.plot(mesh,subplot_loc=121, mytitle="Mesh", show_axis='on') +nb.plot(atrue,subplot_loc=122, mytitle="True parameter field") + +# set up dirichlet boundary conditions +def boundary(x,on_boundary): + return on_boundary + +bc_state = DirichletBC(Vu, u0, boundary) +bc_adj = DirichletBC(Vu, Constant(0.), boundary) + +# 3. The cost functional evaluation: + +# Regularization parameter +gamma = 1e-10 + +# weak for for setting up the misfit and regularization compoment of the cost +W_equ = inner(u_trial, u_test) * dx +R_equ = gamma * inner(nabla_grad(a_trial), nabla_grad(a_test)) * dx + +W = assemble(W_equ) +R = assemble(R_equ) + +# Define cost function +def cost(u, ud, a, W, R): + diff = u.vector() - ud.vector() + reg = 0.5 * a.vector().inner(R*a.vector() ) + misfit = 0.5 * diff.inner(W * diff) + return [reg + misfit, misfit, reg] + +# 4. Set up synthetic observations: + +# noise level +noise_level = 0.01 + +# weak form for setting up the synthetic observations +a_goal = inner( atrue * nabla_grad(u_trial), nabla_grad(u_test)) * dx +L_goal = f * u_test * dx + +# solve the forward/state problem to generate synthetic observations +goal_A, goal_b = assemble_system(a_goal, L_goal, bc_state) + +utrue = Function(Vu) +solve(goal_A, utrue.vector(), goal_b) + +ud = Function(Vu) +ud.assign(utrue) + +# perturb state solution and create synthetic measurements ud +# ud = u + ||u||/SNR * random.normal +MAX = ud.vector().norm("linf") +noise = Vector() +goal_A.init_vector(noise,1) +noise.set_local( noise_level * MAX * np.random.normal(0, 1, len(ud.vector().array())) ) +bc_adj.apply(noise) + +ud.vector().axpy(1., noise) + +# plot +nb.multi1_plot([utrue, ud], ["State solution with atrue", "Synthetic observations"]) + +# 5. Setting up the state equations, right hand side for the adjoint and the necessary matrices: + +# weak form for setting up the state equation +a_state = inner( a * nabla_grad(u_trial), nabla_grad(u_test)) * dx +L_state = f * u_test * dx + +# weak form for setting up the adjoint equations +a_adj = inner( a * nabla_grad(p_trial), nabla_grad(p_test) ) * dx +L_adjoint = -inner(u - ud, u_test) * dx + + +# weak form for setting up matrices +CT_equ = inner(a_test * nabla_grad(u), nabla_grad(p_trial)) * dx +M_equ = inner(a_trial, a_test) * dx + + +# assemble matrices M +M = assemble(M_equ) + +# + +# 6. Initial guess + +# solve state equation +A, state_b = assemble_system (a_state, L_state, bc_state) +solve (A, u.vector(), state_b) + +# evaluate cost +[cost_old, misfit_old, reg_old] = cost(u, ud, a, W, R) + +# plot +plt.figure(figsize=(15,5)) +nb.plot(a,subplot_loc=121, mytitle="a_ini", vmin=atrue.vector().min(), vmax=atrue.vector().max()) +nb.plot(u,subplot_loc=122, mytitle="u(a_ini)") + +# 7. The steepest descent with Armijo line search + +# define parameters for the optimization +tol = 1e-4 +maxiter = 1000 +c_armijo = 1e-5 + +# initialize iter counters +iter = 1 +converged = False + +# initializations +g = Vector() +R.init_vector(g,0) + +a_prev = Function(Va) + +print "Nit cost misfit reg ||grad|| alpha N backtrack" + +while iter < maxiter and not converged: + + # assemble matrix C + CT = assemble(CT_equ) + + # solve the adoint problem + adj_A, adjoint_RHS = assemble_system(a_adj, L_adjoint, bc_adj) + solve(adj_A, p.vector(), adjoint_RHS) + + # evaluate the gradient + MG = CT*p.vector() + R * a.vector() + solve(M, g, MG) + + # calculate the norm of the gradient + grad_norm2 = g.inner(MG) + gradnorm = sqrt(grad_norm2) + + if iter == 1: + gradnorm0 = gradnorm + + # linesearch + it_backtrack = 0 + a_prev.assign(a) + alpha = 8.e5 + backtrack_converged = False + for it_backtrack in range(20): + + a.vector().axpy(-alpha, g ) + + # solve the state/forward problem + state_A, state_b = assemble_system(a_state, L_state, bc_state) + solve(state_A, u.vector(), state_b) + + # evaluate cost + [cost_new, misfit_new, reg_new] = cost(u, ud, a, W, R) + + # check if Armijo conditions are satisfied + if cost_new < cost_old - alpha * c_armijo * grad_norm2: + cost_old = cost_new + backtrack_converged = True + break + else: + alpha *= 0.5 + a.assign(a_prev) # reset a + + if backtrack_converged == False: + print "Backtracking failed. A sufficient descent direction was not found" + converged = False + break + + sp = "" + print "%3d %1s %8.5e %1s %8.5e %1s %8.5e %1s %8.5e %1s %8.5e %1s %3d" % \ + (iter, sp, cost_new, sp, misfit_new, sp, reg_new, sp, \ + gradnorm, sp, alpha, sp, it_backtrack) + + # check for convergence + if gradnorm < tol*gradnorm0 and iter > 1: + converged = True + print "Steepest descent converged in ",iter," iterations" + + iter += 1 + +if not converged: + print "Steepest descent did not converge in ", maxiter, " iterations" + +print "Time elapsed: ", time.clock()-start + +nb.multi1_plot([atrue, a], ["atrue", "a"]) +nb.multi1_plot([u,p], ["u","p"], same_colorbar=False) +plt.show() + diff --git a/PoissonDeterministic.ipynb b/PoissonDeterministic.ipynb new file mode 100644 index 0000000..e2b0482 --- /dev/null +++ b/PoissonDeterministic.ipynb @@ -0,0 +1,640 @@ +{ + "metadata": { + "name": "" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example: Coefficient field inversion in an elliptic partial differential equation\n", + "\n", + "We consider the estimation of a coefficient in an elliptic partial\n", + "differential equation as a first model problem. Depending on the\n", + "interpretation of the unknowns and the type of measurements, this\n", + "model problem arises, for instance, in inversion for groundwater flow\n", + "or heat conductivity. It can also be interpreted as finding a\n", + "membrane with a certain spatially varying stiffness. Let\n", + "$\\Omega\\subset\\mathbb{R}^n$, $n\\in\\{1,2,3\\}$ be an open, bounded\n", + "domain and consider the following problem:\n", + "\n", + "$$\n", + "\\min_{a} J(a):=\\frac{1}{2}\\int_\\Omega (u-u_d)^2\\, dx + \\frac{\\gamma}{2}\\int_\\Omega|\\nabla a|^2\\,dx,\n", + "$$\n", + "\n", + "where $u$ is the solution of\n", + "\n", + "$$\n", + "\\begin{split}\n", + "\\quad -\\nabla\\cdot(\\exp(a)\\nabla u) &= f \\text{ in }\\Omega,\\\\\n", + "u &= 0 \\text{ on }\\partial\\Omega.\n", + "\\end{split}\n", + "$$\n", + "\n", + "Here $a\\in U_{ad}:=\\{a\\in L^{\\infty}(\\Omega)\\}$ the unknown coefficient field, $u_d$ denotes (possibly noisy) data, $f\\in H^{-1}(\\Omega)$ a given force, and $\\gamma\\ge 0$ the regularization parameter.\n", + "\n", + "### The variational (or weak) form of the state equation:\n", + "\n", + "Find $u\\in H_0^1(\\Omega)$ such that $(\\exp(a)\\nabla u,\\nabla v) - (f,v) = 0, \\text{ for all } v\\in H_0^1(\\Omega),$\n", + "where $H_0^1(\\Omega)$ is the space of functions vanishing on $\\partial\\Omega$ with square integrable derivatives. Here, $(\\cdot\\,,\\cdot)$ denotes the $L^2$-inner product, i.e, for scalar functions $u,v$ defined on $\\Omega$ we denote $(u,v) := \\int_\\Omega u(x) v(x) \\,dx$.\n", + "\n", + "### Optimality System:\n", + "\n", + "The Lagrangian functional $\\mathscr{L}:L^\\infty(\\Omega)\\times H_0^1(\\Omega)\\times H_0^1(\\Omega)\\rightarrow \\mathbb{R}$, which we use as a tool to derive the optimality system, is given by\n", + "\n", + "$$\n", + "\\mathscr{L}(a,u,p):= \\frac{1}{2}(u-u_d,u-u_d) +\n", + "\\frac{\\gamma}{2}(\\nabla a, \\nabla a) + (\\exp(a)\\nabla u,\\nabla p) - (f,p).\n", + "$$\n", + "\n", + "The Lagrange multiplier theory shows that, at a solution all variations of the Lagrangian functional with respect to all variables must vanish. These variations of $\\mathscr{L}$ with respect to $(p,u,a)$ in directions $(\\tilde{u}, \\tilde{p}, \\tilde{a})$ are given by\n", + "\n", + "$$\n", + " \\begin{alignat}{2}\n", + " \\mathscr{L}_p(a,u,p)(\\tilde{p}) &= (\\exp(a)\\nabla u, \\nabla \\tilde{p}) -\n", + " (f,\\tilde{p}) &&= 0,\\\\\n", + " \\mathscr{L}_u(a,u,p)(\\tilde{u}) &= (\\exp(a)\\nabla p, \\nabla \\tilde{u}) +\n", + " (u-u_d,\\tilde{u}) && = 0,\\\\\n", + " \\mathscr{L}_a(a,u,p)(\\tilde{a}) &= \\gamma(\\nabla a, \\nabla \\tilde{a}) +\n", + " (\\tilde{a}\\exp(a)\\nabla u, \\nabla p) &&= 0,\n", + " \\end{alignat}\n", + "$$\n", + "\n", + "where the variations $(\\tilde{u}, \\tilde{p}, \\tilde{a})$ are taken from the same spaces as $(u,p,a)$. \n", + "\n", + "The gradient of the cost functional $\\mathcal{J}(a)$ therefore is\n", + "\n", + "$$\n", + " \\mathcal{G}(a)(\\tilde a) = \\gamma(\\nabla a, \\nabla \\tilde{a}) +\n", + " (\\tilde{a}\\exp(a)\\nabla u, \\nabla \\tilde{p}).\n", + "$$\n", + "\n", + "### Inexact Newton-CG:\n", + "\n", + "Newton's method requires second-order variational derivatives of the Lagrangian . Written in abstract form, it computes an update direction $(\\hat a_k, \\hat u_k,\\hat p_k)$ from the following Newton step for the Lagrangian functional:\n", + "\n", + "$$\n", + "\\mathscr{L}''(a_k, u_k, p_k)\\left[(\\tilde\n", + " a, \\tilde u, \\tilde p),(\\hat a_k, \\hat u_k, \\hat p_k)\\right] =\n", + "-\\mathscr{L}'(a_k,u_k,p_k)(\\tilde a, \\tilde u, \\tilde p),\n", + "$$\n", + "\n", + "for all variations $(\\tilde a, \\tilde u, \\tilde p)$, where $\\mathscr{L}'$ and $\\mathscr{L}''$ denote the first and\n", + "second variations of the Lagrangian. For the elliptic parameter inversion problem, this Newton step (written in variatonal form) is as follows: Find $(\\hat u_k, \\hat a_k,\\hat p_k)$ as the solution of the linear system\n", + "\n", + "$$\n", + " \\begin{array}{llll}\n", + " (\\hat{u}_k, \\tilde u) &+ (\\hat{a}_k \\exp(a_k)\\nabla p_k, \\nabla\n", + " \\tilde u) &+ (\\exp(a_k) \\nabla \\tilde u,\n", + " \\nabla \\hat p_k) &= (u_d - u_k, \\tilde u)- (\\exp(a_k) \\nabla\n", + " p_k, \\nabla \\tilde u)\\\\\n", + " (\\tilde a \\exp(a_k) \\nabla \\hat u_k, \\nabla p_k) &+ \\gamma\n", + " (\\nabla \\hat a_k, \\nabla \\tilde a) + (\\tilde a \\hat a_k \\exp(a_k)\\nabla u, \\nabla p) &+ (\\tilde a\n", + " \\exp(a_k) \\nabla u_k, \\nabla \\hat p_k) &= - \\gamma (\\nabla a_k, \\nabla\\tilde a) - (\\tilde\n", + " a \\exp(a_k) \\nabla u_k, \\nabla p_k)\\\\\n", + " (\\exp(a_k) \\nabla \\hat u_k, \\nabla \\tilde p) &+ (\\hat a_k \\exp(a_k) \\nabla u_k, \\nabla\n", + " \\tilde p) & &= - (\\exp(a_k) \\nabla u_k,\n", + " \\nabla \\tilde p) + (f, \\tilde p),\n", + " \\end{array}\n", + "$$\n", + "\n", + "for all $(\\tilde u, \\tilde a, \\tilde p)$.\n", + "\n", + "### Discrete Newton system:\n", + "$\n", + "\\def\\tu{\\tilde u}\n", + "\\def\\btu{\\bf \\tilde u}\n", + "\\def\\ta{\\tilde a}\n", + "\\def\\bta{\\bf \\tilde a}\n", + "\\def\\tp{\\tilde p}\n", + "\\def\\btp{\\bf \\tilde p}\n", + "\\def\\hu{\\hat u}\n", + "\\def\\bhu{\\bf \\hat u}\n", + "\\def\\ha{\\hat a}\n", + "\\def\\bha{\\bf \\hat a}\n", + "\\def\\hp{\\hat p}\n", + "\\def\\bhp{\\bf \\hat p}\n", + "$\n", + "The discretized Newton step: denote the vectors corresponding to the discretization of the functions $\\ha_k,\\hu_k, \\hp_k$ by $\\bf \\bha_k, \\bhu_k$ and $\\bhp_k$. Then, the discretization of the above system is given by the following symmetric linear system:\n", + "\n", + "$$\n", + " \\begin{bmatrix}\n", + " \\bf W_{\\scriptsize\\mbox{uu}} & \\bf W_{\\scriptsize\\mbox{ua}} & \\bf A^T \\\\\n", + " \\bf W_{\\scriptsize\\mbox{au}} & \\bf R + \\bf R_{\\scriptsize\\mbox{aa}}& \\bf C^T \\\\\n", + " \\bf A & \\bf C & 0\n", + "\\end{bmatrix}\n", + "\\left[\n", + " \\begin{array}{c}\n", + " \\bhu_k \\\\\n", + " \\bha_k \\\\\n", + " \\bhp_k\n", + " \\end{array} \\right] =\n", + "-\\left[\n", + " \\begin{array}{ccc}\n", + " \\bf{g}_u\\\\\n", + " \\bf{g}_a\\\\\n", + " \\bf{g}_p\n", + "\\end{array}\n", + " \\right],\n", + "$$\n", + "\n", + "where $\\bf W_{\\scriptsize \\mbox{uu}}$, $\\bf W_{\\scriptsize\\mbox{ua}}$, $\\bf W_{\\scriptsize\\mbox{au}}$, and $\\bf R$ are the components of the Hessian matrix of the Lagrangian, $\\bf A$ and $\\bf C$ are the Jacobian of the state equation with respect to the state and the control variables, respectively and $\\bf g_u$, $\\bf g_a$, and $\\bf g_p$ are the discrete gradients of the Lagrangian with respect to $\\bf u $, $\\bf a$ and $\\bf p$, respectively.\n", + "\n", + "### Reduced Hessian apply:\n", + "\n", + "To eliminate the incremental state and adjoint variables, $\\bhu_k$ and $\\bhp_k$, from the first and last equations we use\n", + "\n", + "$$\n", + "\\begin{align}\n", + "\\bhu_k &= -\\bf A^{-1} \\bf C \\, \\bha_k,\\\\\n", + "\\bhp_k &= -\\bf A^{-T} (\\bf W_{\\scriptsize\\mbox{uu}} \\bhu_k +\n", + "\\bf W_{\\scriptsize\\mbox{ua}}\\,\\bha_k).\n", + "\\end{align}\n", + "$$\n", + "\n", + "This results in the following reduced linear system for the Newton step\n", + "\n", + "$$\n", + " \\bf H \\, \\bha_k = -\\bf{g}_a,\n", + "$$\n", + "\n", + "with the reduced Hessian $\\bf H$ applied to a vector $\\bha$ given by\n", + "\n", + "$$\n", + " \\bf H \\bha = \\underbrace{(\\bf R + \\bf R_{\\scriptsize\\mbox{aa}})}_{\\text{Hessian of the regularization}} \\bha +\n", + " \\underbrace{(\\bf C^{T}\\bf A^{-T} (\\bf W_{\\scriptsize\\mbox{uu}}\n", + " \\bf A^{-1} \\bf C - \\bf W_{\\scriptsize\\mbox{ua}}) -\n", + " \\bf W_{\\scriptsize\\mbox{au}} \\bf A^{-1}\n", + " \\bf C)}_{\\text{Hessian of the data misfit}}\\;\\bha.\n", + "$$\n", + "\n", + "### Goals:\n", + "\n", + "By the end of this notebook, you should be able to:\n", + "\n", + "- solve the forward and adjoint Poisson equations\n", + "- understand the inverse method framework\n", + "- visualise and understand the results\n", + "- modify the problem and code\n", + "\n", + "### Mathematical tools used:\n", + "\n", + "- Finite element method\n", + "- Derivation of gradiant and Hessian via the adjoint method\n", + "- inexact Newton-CG\n", + "- Armijo line search\n", + "\n", + "### List of software used:\n", + "\n", + "- FEniCS, a parallel finite element element library for the discretization of partial differential equations\n", + "- PETSc, for scalable and efficient linear algebra operations and solvers\n", + "- Matplotlib, a python package used for plotting the results\n", + "- Numpy, a python package for linear algebra" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up\n", + "\n", + "### Import dependencies" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from dolfin import *\n", + "\n", + "import numpy as np\n", + "import time\n", + "import logging\n", + "\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import nb\n", + "\n", + "start = time.clock()\n", + "\n", + "logging.getLogger('FFC').setLevel(logging.WARNING)\n", + "logging.getLogger('UFL').setLevel(logging.WARNING)\n", + "set_log_active(False)\n", + "\n", + "np.random.seed(seed=1)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The cost function evaluation:\n", + "\n", + "$$\n", + "J(a):=\\underbrace{\\frac{1}{2}\\int_\\Omega (u-u_d)^2\\, dx}_{\\text misfit} + \\underbrace{\\frac{\\gamma}{2}\\int_\\Omega|\\nabla a|^2\\,dx}_{\\text reg}\n", + "$$" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# Define cost function\n", + "def cost(u, ud, a, W, R):\n", + " diff = u.vector() - ud.vector()\n", + " reg = 0.5 * a.vector().inner(R*a.vector() ) \n", + " misfit = 0.5 * diff.inner(W * diff)\n", + " return [reg + misfit, misfit, reg]" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The reduced Hessian apply to a vector v:\n", + "\n", + "$$\n", + "\\begin{align}\n", + "\\bhu &= -\\bf A^{-1} \\bf C \\, & \\text{linearized forward}\\\\\n", + "\\bhp &= -\\bf A^{-T} (\\bf W_{\\scriptsize\\mbox{uu}} \\bhu +\n", + "\\bf W_{\\scriptsize\\mbox{ua}}\\,\\bha) & \\text{adjoint}\\\\\n", + "\\bf H \\bf v &= (\\bf R + \\bf R_{\\scriptsize\\mbox{aa}})\\bf v + \\bf C^T \\bhp + \\bf W_{\\scriptsize\\mbox{au}} \\bhu.\n", + "\\end{align}\n", + "$$" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# define (Gauss-Newton) Hessian apply H * v\n", + "def Hess_GN (v, R, C, A, W):\n", + " solve (A, du, - (C * v))\n", + " solve (A, dp, - (W * du))\n", + " CT_dp = Vector()\n", + " C.init_vector(CT_dp, 1)\n", + " C.transpmult(dp, CT_dp)\n", + " H_V = R * v + CT_dp\n", + " return H_V\n", + "\n", + "# define (Newton) Hessian apply H * v\n", + "def Hess_Newton (v, R, Raa, C, A, W, Wua):\n", + " RHS = -(C * v)\n", + " bc2.apply(RHS)\n", + " solve (A, du, RHS)\n", + " RHS = -(W * du) - Wua * v\n", + " bc2.apply(RHS)\n", + " solve (A, dp, RHS)\n", + " CT_dp = Vector()\n", + " C.init_vector(CT_dp, 1)\n", + " C.transpmult(dp, CT_dp)\n", + " Wua_du = Vector()\n", + " Wua.init_vector(Wua_du, 1)\n", + " Wua.transpmult(du, Wua_du)\n", + " H_V = R*v + Raa*v + CT_dp + Wua_du\n", + " return H_V\n", + "\n", + "# Creat Class MyLinearOperator to perform Hessian function\n", + "class MyLinearOperator(LinearOperator):\n", + " cgiter = 0\n", + " def __init__(self, R, Raa, C, A, W, Wua):\n", + " LinearOperator.__init__(self, a_delta, a_delta)\n", + " self.R = R\n", + " self.Raa = Raa\n", + " self.C = C\n", + " self.A = A\n", + " self.W = W\n", + " self.Wua = Wua\n", + "\n", + " # Hessian performed on x, output as generic vector y\n", + " def mult(self, x, y):\n", + " self.cgiter += 1\n", + " y.zero()\n", + " if iter <= 500:\n", + " y.axpy(1., Hess_GN (x, self.R, self.C, self.A, self.W) )\n", + " else:\n", + " y.axpy(1., Hess_Newton (x, self.R, self.Raa, self.C, self.A, self.W, self.Wua) )" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model set up:\n", + "\n", + "As in the introduction, the first thing we need to do is set up the numerical model. In this cell, we set the mesh, the finite element functions $u, p, g$ corresponding to state, adjoint and coefficient/gradient variables, and the corresponding test functions and the parameters for the optimization." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# create mesh and define function spaces\n", + "nx = 64\n", + "ny = 64\n", + "mesh = UnitSquareMesh(nx, ny)\n", + "V = FunctionSpace(mesh, 'Lagrange', 1)\n", + "V2 = FunctionSpace(mesh, 'Lagrange', 2)\n", + "\n", + "# define Trial and Test Functions\n", + "u, p, g, a_delta = TrialFunction(V2), TrialFunction(V2), TrialFunction(V), TrialFunction(V)\n", + "u_test, p_test, ud_test, g_test = TestFunction(V2), TestFunction(V2), TestFunction(V2), TestFunction(V)\n", + "p = Function(V2)\n", + "\n", + "# initialize input functions\n", + "atrue = Expression('log(2 + 7*(pow(pow(x[0] - 0.5,2) + pow(x[1] - 0.5,2),0.5) > 0.2))')\n", + "f = Constant(\"1.0\")\n", + "u0 = Constant(\"0.0\")\n", + "a = interpolate(Expression(\"log(2.0)\"),V)\n", + "\n", + "# plot\n", + "atruef = interpolate(atrue, V)\n", + "plt.figure(figsize=(15,5))\n", + "nb.plot(mesh,subplot_loc=121, mytitle=\"Mesh\", show_axis='on')\n", + "nb.plot(atruef,subplot_loc=122, mytitle=\"True parameter field\")\n", + "plt.show()\n", + "\n", + "# noise level\n", + "noise_level = 0.05\n", + "\n", + "# define parameters for the optimization\n", + "tol = 1e-8\n", + "gamma = 1e-8\n", + "c = 1e-4\n", + "maxiter = 100\n", + "#eps = 1e-4\n", + "plot_on = 0\n", + "\n", + "# initialize iter counters\n", + "iter = 1\n", + "total_cg_iter = 0\n", + "solution = 0" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# set up dirichlet boundary conditions\n", + "def u0_boundary(x,on_boundary):\n", + " return on_boundary\n", + "bc2 = DirichletBC(V2, u0, u0_boundary)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up synthetic observations:\n", + "\n", + "- Propose a coefficient field $a_{\\text true}$ shown above\n", + "- The weak form of the pde: \n", + " Find $u\\in H_0^1(\\Omega)$ such that $\\underbrace{(\\exp(a_{\\text true})\\nabla u,\\nabla v)}_{\\; := \\; a_{pde}} - \\underbrace{(f,v)}_{\\; := \\;L_{pde}} = 0, \\text{ for all } v\\in H_0^1(\\Omega)$.\n", + "\n", + "- Perturb the solution: $u = u + \\eta$, where $\\eta \\sim \\mathcal{N}(0, \\sigma)$" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# weak form for setting up the synthetic observations\n", + "ud = TrialFunction(V2)\n", + "a_goal = inner(exp(atrue) * nabla_grad(ud), nabla_grad(ud_test)) * dx\n", + "L_goal = f * ud_test * dx\n", + "\n", + "# solve the forward/state problem to generate synthetic observations\n", + "goal_A, goal_b = assemble_system(a_goal, L_goal, bc2)\n", + "ud = Function(V2)\n", + "solve(goal_A, ud.vector(), goal_b)\n", + "\n", + "utrue = Function(V2)\n", + "utrue.assign(ud)\n", + "\n", + "# perturb state solution and create synthetic measurements ud\n", + "# ud = u + ||u||/SNR * random.normal\n", + "MAX = ud.vector().norm(\"linf\")\n", + "noise = Vector()\n", + "goal_A.init_vector(noise,1)\n", + "noise.set_local( noise_level * MAX * np.random.normal(0, 1, len(ud.vector().array())) )\n", + "ud.vector().axpy(1., noise)\n", + "bc2.apply(ud.vector())\n", + "\n", + "# plot\n", + "nb.multi1_plot([utrue, ud], [\"State solution with atrue\", \"Synthetic observations\"])\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setting up the state equations, right hand side for the adjoint and the neccessary matrices:\n" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# weak form for setting up the state equation\n", + "a_state = inner(exp(a) * nabla_grad(u), nabla_grad(u_test)) * dx\n", + "L_state = f * u_test * dx\n", + "W_equ = inner(u, u_test) * dx\n", + "\n", + "# # weak form for setting up the right hand side of the adjoint\n", + "u = Function(V2)\n", + "L_adjoint = -inner(u - ud, u_test) * dx\n", + "\n", + "# weak form for setting up matrices\n", + "Wua_equ = inner(exp(a) * a_delta * nabla_grad(p_test), nabla_grad(p)) * dx\n", + "C_equ = inner(exp(a) * a_delta * nabla_grad(u), nabla_grad(u_test)) * dx\n", + "M_equ = inner(g, g_test) * dx\n", + "R_equ = gamma * inner(nabla_grad(g), nabla_grad(g_test)) * dx\n", + "Raa_equ = inner(exp(a) * a_delta * g_test * nabla_grad(u), nabla_grad(p)) * dx\n", + "\n", + "# assemble matrices M, W, and R\n", + "M = assemble(M_equ)\n", + "W = assemble(W_equ)\n", + "R = assemble(R_equ)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# solve state equation\n", + "A, state_b = assemble_system (a_state, L_state, bc2)\n", + "solve (A, u.vector(), state_b)\n", + "\n", + "# evaluate cost\n", + "[cost_old, misfit_old, reg_old] = cost(u, ud, a, W, R)\n", + "\n", + "# plot\n", + "plt.figure(figsize=(15,5))\n", + "nb.plot(a,subplot_loc=121, mytitle=\"a_ini\", vmin=atruef.vector().min(), vmax=atruef.vector().max())\n", + "nb.plot(u,subplot_loc=122, mytitle=\"u(a_ini)\")\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The inexact Newton-CG optimization with Armijo line search:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# initializations\n", + "g, a_delta = Vector(), Vector()\n", + "R.init_vector(a_delta,0)\n", + "R.init_vector(g,0)\n", + "\n", + "du, dp = Vector(), Vector()\n", + "W.init_vector(du,1)\n", + "W.init_vector(dp,0)\n", + "\n", + "a_prev, a_diff = Function(V), Function(V)\n", + "\n", + "print \"Nit CGit cost misfit reg sqrt(-G*D) ||grad|| alpha tolcg\"\n", + "\n", + "while iter < maxiter and solution == 0:\n", + "\n", + " # assemble matrix C\n", + " C = assemble(C_equ)\n", + "\n", + " # solve the adoint problem\n", + " A, adjoint_RHS = assemble_system(a_state, L_adjoint, bc2)\n", + " solve(A, p.vector(), adjoint_RHS)\n", + "\n", + " # assemble W_ua and R\n", + " Wua = assemble (Wua_equ)\n", + " Raa = assemble (Raa_equ)\n", + "\n", + " # evaluate the gradient\n", + " CT_p = Vector()\n", + " C.init_vector(CT_p,1)\n", + " C.transpmult(p.vector(), CT_p)\n", + " MG = CT_p + R * a.vector()\n", + " solve(M, g, MG)\n", + "\n", + " # calculate the norm of the gradient\n", + " grad2 = g.inner(MG)\n", + " gradnorm = sqrt(grad2)\n", + "\n", + " # set the CG tolerance (use Eisenstat\u2013Walker termination criterion)\n", + " if iter == 1:\n", + " gradnorm_ini = gradnorm\n", + " tolcg = min(0.5, sqrt(gradnorm/gradnorm_ini))\n", + "\n", + " # define the Hessian apply operator (with preconditioner)\n", + " Hess_Apply = MyLinearOperator(R, Raa, C, A, W, Wua )\n", + " P = R + gamma * M\n", + " solver = PETScKrylovSolver(\"cg\", \"amg\")\n", + " solver.set_operators(Hess_Apply, P)\n", + " solver.parameters[\"relative_tolerance\"] = tolcg\n", + "\n", + " # solve the Newton system H a_delta = - MG\n", + " solver.solve(a_delta, -MG)\n", + " total_cg_iter += Hess_Apply.cgiter\n", + "\n", + " # linesearch\n", + " alpha = 1\n", + " descent = 0\n", + " no_backtrack = 0\n", + " a_prev.assign(a)\n", + " while descent == 0 and no_backtrack < 10:\n", + " a.vector().axpy(alpha, a_delta )\n", + "\n", + " # solve the state/forward problem\n", + " state_A, state_b = assemble_system(a_state, L_state, bc2)\n", + " solve(state_A, u.vector(), state_b)\n", + "\n", + " # evaluate cost\n", + " [cost_new, misfit_new, reg_new] = cost(u, ud, a, W, R)\n", + "\n", + " # check if Armijo conditions are satisfied\n", + " if cost_new < cost_old - alpha * c * grad2:\n", + " cost_old = cost_new\n", + " descent = 1\n", + " else:\n", + " no_backtrack += 1\n", + " alpha *= 0.5\n", + " a.assign(a_prev) # reset a\n", + "\n", + " # calculate sqrt(-G * D)\n", + " graddir = sqrt(- MG.inner(a_delta) )\n", + "\n", + " sp = \"\"\n", + " print \"%2d %2s %2d %3s %8.5e %1s %8.5e %1s %8.5e %1s %8.5e %1s %8.5e %1s %5.2f %1s %5.3e\" % \\\n", + " (iter, sp, Hess_Apply.cgiter, sp, cost_new, sp, misfit_new, sp, reg_new, sp, \\\n", + " graddir, sp, gradnorm, sp, alpha, sp, tolcg)\n", + "\n", + " if plot_on == 1:\n", + " nb.multi1_plot([a,u,p], [\"a\",\"u\",\"p\"], same_colorbar=False)\n", + " plt.show()\n", + " \n", + " # check for convergence\n", + " if sqrt(grad2) < tol and iter > 1:\n", + " solution = 1\n", + " print \"Newton's method converged in \",iter,\" iterations\"\n", + " print \"Total number of CG iterations: \", total_cg_iter\n", + " \n", + " iter += 1\n", + " \n", + "if solution == 0:\n", + " print \"Newton's method did not converge in \", maxiter, \" iterations\"\n", + "\n", + "print \"Time elapsed: \", time.clock()-start" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "nb.multi1_plot([atruef, a], [\"atrue\", \"a\"])\n", + "nb.multi1_plot([u,p], [\"u\",\"p\"], same_colorbar=False)\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/SubsurfaceBayesian.html b/SubsurfaceBayesian.html new file mode 100644 index 0000000..9d3d0bf --- /dev/null +++ b/SubsurfaceBayesian.html @@ -0,0 +1,28166 @@ + + + +SubsurfaceBayesian + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+
+
+$$\def\data{\bf d_\rm{obs}} +\def\vec{\bf} +\def\m{\bf m} +\def\map{\bf m_{\text{MAP}}} +\def\postcov{\bf \Gamma_{\text{post}}} +\def\prcov{\bf \Gamma_{\text{prior}}} +\def\matrix{\bf} +\def\Hmisfit{\bf H_{\text{misfit}}} +\def\HT{\tilde{\bf H}_{\text{misfit}}} +\def\diag{diag} +\def\Vr{\matrix V_r} +\def\Wr{\matrix W_r} +\def\Ir{\matrix I_r} +\def\Dr{\matrix D_r} +\def\H{\matrix H} +$$

+

Example: Bayesian quantification of parameter uncertainty:

Estimating the (Gaussian) posterior pdf of the coefficient parameter field in an elliptic PDE

In this example we tackle the problem of quantifying the +uncertainty in the solution of an inverse problem governed by an +elliptic PDE via the Bayesian inference framework. +Hence, we state the inverse problem as a +problem of statistical inference over the space of uncertain +parameters, which are to be inferred from data and a physical +model. The resulting solution to the statistical inverse problem +is a posterior distribution that assigns to any candidate set of +parameter fields our belief (expressed as a probability) that a +member of this candidate set is the ``true'' parameter field that +gave rise to the observed data.

+

For simplicity, in what follows we give finite-dimensional expressions (i.e., after +discretization of the parameter space) for the Bayesian +formulation of the inverse problem.

+

Bayes' Theorem:

The posterior probability distribution combines the prior pdf +$\pi_{\text{prior}}(\m)$ over the parameter space, which encodes +any knowledge or assumptions about the parameter space that we may +wish to impose before the data are considered, with a likelihood pdf +$\pi_{\text{like}}(\vec{d}_{\text{obs}} \; | \; \m)$, which explicitly +represents the probability that a given set of parameters $\m$ +might give rise to the observed data $\vec{d}_{\text{obs}} \in +\mathbb{R}^m$, namely:

+

$ +\begin{align} +\pi_{\text{post}}(\m | \data) \propto +\pi_{\text{prior}}(\m) \pi_{\text{like}}(\data | \m). +\end{align} +$

+

Note that the infinite-dimensional analog of Bayes' formula cannot be stated formulated using pdfs but requires Radon-Nikodym derivatives

+

Gaussian prior and noise:

The prior:

We consider a Gaussian prior with mean $\vec m_{\text prior}$ and covariance $\bf \Gamma_{\text{prior}}$. The covariance is given by the discretization of the inverse of differential operator $\mathcal{A}^{-2} = (-\gamma \Delta + \delta I)^{-2}$, where $\gamma$, $\delta > 0$ control the correlation length and the variance of the prior operator. This choice of prior ensures that it is a trace-class operator, guaranteeing bounded pointwise variance and a well-posed infinite-dimensional Bayesian inverse problem

+

The likelihood:

$ +\data = \bf{f}(\m) + \bf{e }, \;\;\; \bf{e} \sim \mathcal{N}(\bf{0}, \bf \Gamma_{\text{noise}} ) +$

+

$ +\pi_{\text like}(\data \; | \; \m) = \exp \left( - \tfrac{1}{2} (\bf{f}(\m) - \data)^T \bf \Gamma_{\text{noise}}^{-1} (\bf{f}(\m) - \data)\right) +$

+

Here $\bf f$ is the parameter-to-observable map that takes a parameter vector $\m$ and maps +it to the space observation vector $\data$.

+

The posterior:

$ +\pi_{\text{post}}(\m \; | \; \data) \propto \exp \left( - \tfrac{1}{2} \parallel \bf{f}(\m) - \data \parallel^{2}_{\bf \Gamma_{\text{noise}}^{-1}} \! - \tfrac{1}{2}\parallel \m - \m_{\text prior} \parallel^{2}_{\bf \Gamma_{\text{prior}}^{-1}} \right) +$

+

The Gaussian approximation of the posterior: $\mathcal{N}(\vec{\map},\bf \Gamma_{\text{post}})$

The mean of this posterior distribution, $\vec{\map}$, is the +parameter vector maximizing the posterior, and +is known as the maximum a posteriori (MAP) point. It can be found +by minimizing the negative log of the posterior, which amounts to +solving a deterministic inverse problem) with appropriately weighted norms,

+

$ +\map := \underset{\m}{\arg \min} \; \mathcal{J}(\m) \;:=\; +\Big( +-\frac{1}{2} \| \bf f(\m) - \data \|^2_{\bf \Gamma_{\text{noise}}^{-1}} +-\frac{1}{2} \| \m -\m_{\text prior} \|^2_{\bf \Gamma_{\text{prior}}^{-1}} +\Big). +$

+

The posterior covariance matrix is then given by the inverse of +the Hessian matrix of $\mathcal{J}$ at $\map$, namely

+

$ +\bf \Gamma_{\text{post}} = \left(\Hmisfit(\map) + \bf \Gamma_{\text{prior}}^{-1} \right)^{-1} += \left(\prcov \Hmisfit + \matrix{I}\right)^{-1}\prcov +$

+

The prior-preconditioned Hessian of the data misfit:

$ + \HT := \prcov \Hmisfit +$

+

The generalized eigenvalue problem:

$ + \Hmisfit \matrix{W} = \prcov^{-1} \matrix{W} \matrix{\Lambda}, +$

+

where $\matrix{\Lambda} = diag(\lambda_i) \in \mathbb{R}^{n\times n}$ +contains the generalized eigenvalues and the columns of $\matrix W\in +\mathbb R^{n\times n}$ the generalized eigenvectors such that +$\matrix{W}^T \prcov^{-1} \matrix{W} = \matrix{I}$. Defining +$\matrix V := \prcov^{-1}\matrix W$

+

$ +\prcov \Hmisfit = \matrix{W} \matrix{\Lambda} \matrix{V}^T. +$

+

Randomized SVD algorithms to construct the approximate spectral decomposition:

When the generalized eigenvalues $\{\lambda_i\}$ decay rapidly, we can +extract a low-rank approximation of $\HT$ by retaining only the $r$ +largest eigenvalues and corresponding eigenvectors,

+

$ +\HT \approx \matrix{W}_r \matrix{\Lambda}_r \matrix{V}_r^T. +$

+

Here, $\matrix{W}_r \in \mathbb{R}^{n\times r}$ contains only the $r$ +eigenvectors of $\HT$ that correspond to the $r$ largest eigenvalues, +which are assembled into the diagonal matrix $\matrix{\Lambda}_r = \diag +(\lambda_i) \in \mathbb{R}^{r \times r}$, and +$\matrix{V}_r=\prcov^{-1} \matrix{W}_r$.

+

Invert with the Sherman-Morrison-Woodbury formula:

$$ +\begin{align} + \notag \left(\HT+ \matrix{I}\right)^{-1} + = \matrix{I}-\matrix{W}_r \matrix{D}_r \matrix{V}_r^T + + \mathcal{O}\left(\sum_{i=r+1}^{n} \frac{\lambda_i}{\lambda_i + + 1}\right), +\end{align} +$$

where $\matrix{D}_r :=\diag(\lambda_i/(\lambda_i+1)) \in +\mathbb{R}^{r\times r}$. The last term in this expression captures the +error due to truncation in terms of the discarded eigenvalues; this +provides a criterion for truncating the spectrum, namely that $r$ is +chosen such that $\lambda_r$ is small relative to 1.

+

The approximate posterior covariance:

$$ +\postcov \approx (\matrix{I} - \matrix{W}_r \matrix{D}_r +\matrix{V}_r^T) \prcov = +\prcov +- \matrix{W}_r \matrix{D}_r \matrix{W}_r^T +$$

Apply the inverse and square-root inverse Hessian to a vector (as needed for drawing samples from a Gaussian distribution with covariance $\H^{-1}$)

$$ + \H^{-1} \bf v \approx ( \matrix{I}-\Wr \Dr \Vr^T) + \prcov \bf v = \big\{ \bf W_r \big[ (\matrix{\Lambda}_r + + \bf I_r)^{-1} - \bf I_r \big] \Vr^T + \bf I \big\} \prcov \bf v +$$$$ + \H^{-1/2} \bf v \approx \big\{ \Wr \big[ (\matrix{\Lambda}_r + + \Ir)^{-1/2} - \Ir \big] \Vr^T + \bf I \big\} \prcov^{1/2}\bf v +$$

This tutorial shows:

    +
  • convergence of the inexact Newton-CG algorithm
  • +
  • low-rank-based approximation of the posterior covariance (built on a low-rank +approximation of the Hessian of the data misfit)
  • +
  • how to construct the low-rank approximation of the Hessian of the data misfit
  • +
  • how to apply the inverse and square-root inverse Hessian to a vector efficiently
  • +
  • samples from the Gaussian approximation of the posterior
  • +
+

Goals:

By the end of this notebook, you should be able to:

+
    +
  • understand the Bayesian inverse framework
  • +
  • visualise and understand the results
  • +
  • modify the problem and code
  • +
+

Mathematical tools used:

    +
  • Finite element method
  • +
  • Derivation of gradiant and Hessian via the adjoint method
  • +
  • inexact Newton-CG
  • +
  • Armijo line search
  • +
  • Bayes' formula
  • +
+

List of software used:

    +
  • FEniCS, a parallel finite element element library for the discretization of partial differential equations
  • +
  • PETSc, for scalable and efficient linear algebra operations and solvers
  • +
  • Matplotlib, A great python package that I used for plotting many of the results
  • +
  • Numpy, A python package for linear algebra. While extensive, this is mostly used to compute means and sums in this notebook.
  • +
+ +
+
+
+
+
+
In [1]:
+
+
+
import dolfin as dl
+import sys
+sys.path.append( "../" )
+from hippylib import *
+import numpy as np
+import matplotlib.pyplot as plt
+%matplotlib inline
+sys.path.append( "../applications/poisson/" )
+from model_subsurf import Poisson, true_model
+
+import nb
+
+import logging
+logging.getLogger('FFC').setLevel(logging.WARNING)
+logging.getLogger('UFL').setLevel(logging.WARNING)
+dl.set_log_active(False)
+
+np.random.seed(seed=1)
+
+#uncomment this to visualize a list of all the methods available 
+#help(Poisson)
+
+ +
+
+
+ +
+
+
+
+
+
+

Set up the mesh and finite element spaces

We compute a two dimensional mesh of a unit square with nx by ny elements. +We define a P2 finite element space for the state and adjoint variable and P1 for the parameter.

+ +
+
+
+
+
+
In [2]:
+
+
+
ndim = 2
+nx = 64
+ny = 64
+mesh = dl.UnitSquareMesh(nx, ny)
+Vh2 = dl.FunctionSpace(mesh, 'Lagrange', 2)
+Vh1 = dl.FunctionSpace(mesh, 'Lagrange', 1)
+Vh = [Vh2, Vh1, Vh2]
+print "Number of dofs: STATE={0}, PARAMETER={1}, ADJOINT={2}".format(Vh[STATE].dim(), Vh[PARAMETER].dim(), Vh[ADJOINT].dim())
+
+ +
+
+
+ +
+
+ + +
+
+
Number of dofs: STATE=16641, PARAMETER=4225, ADJOINT=16641
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Set up the location of observation, Prior Information, and model

To setup the observation operator, we generate ntargets random locations where to evaluate the value of the state.

+

To obtain the synthetic true paramter $a_{\rm true}$ we generate a realization of a Gaussian random field with zero average and covariance matrix $\mathcal{C} = \widetilde{\mathcal{A}}^{-1}$, where $\widetilde{\mathcal{A}}$ is a differential operator of the form +$$ \widetilde{\mathcal{A}} = \gamma {\rm div}\, \Theta\, {\rm grad} + \delta I $$ +Here $\Theta$ is an s.p.d. anisotropic tensor of the form +$$ \Theta = +\begin{bmatrix} +\theta_1 \sin(\alpha)^2 & (\theta_1-\theta_2) \sin(\alpha) \cos{\alpha} \\ +(\theta_1-\theta_2) \sin(\alpha) \cos{\alpha} & \theta_2 \cos(\alpha)^2. +\end{bmatrix} $$

+

For the prior model, we assume that we can measure the log-permeability coefficient at $N$ locations, and we denote with $a^1_{\rm true}$, $\ldots$, $a^N_{\rm true}$ such measures. +We also introduce the mollifier functions +$$ \delta_i(x) = \exp\left( -\frac{\gamma^2}{\delta^2} \| x - x_i \|^2_{\Theta^{-1}}\right), \quad i = 1, \ldots, N,$$ +and we let +$$ \mathcal{A} = \widetilde{\mathcal{A}} + p \sum_{i=1}^N \delta_i I = \widetilde{\mathcal{A}} + p \mathcal{M},$$ +where $p$ is a penalization costant (10 for this problem) and $ \mathcal{M} = \sum_{i=1}^N \delta_i I$.

+

We then compute $a_{\rm pr}$, the mean of the prior measure, as a regularized +least-squares fit of these point observations by solving +$$ +a_{\rm pr} = arg\min_{m} \frac{1}{2}\langle a, \widetilde{\mathcal{A}} a\rangle + \frac{p}{2}\langle a_{\rm true} - a, \mathcal{M}(a_{\rm true}- a) \rangle. +$$

+

Finally the prior distribution is $\mathcal{N}(a_{\rm pr}, \mathcal{C}_{\rm prior})$, with $\mathcal{C}_{\rm prior} = \mathcal{A}^2$.

+ +
+
+
+
+
+
In [3]:
+
+
+
ntargets = 300
+targets = np.random.uniform(0.1,0.9, [ntargets, ndim] )
+print "Number of observation points: {0}".format(ntargets)
+    
+gamma = .1
+delta = .5
+    
+anis_diff = dl.Expression(code_AnisTensor2D)
+anis_diff.theta0 = 2.
+anis_diff.theta1 = .5
+anis_diff.alpha = math.pi/4
+atrue = true_model(Vh[PARAMETER], gamma, delta,anis_diff)
+        
+locations = np.array([[0.1, 0.1], [0.1, 0.9], [.5,.5], [.9, .1], [.9, .9]])
+pen = 1e1
+prior = MollifiedBiLaplacianPrior(Vh[PARAMETER], gamma, delta, locations, atrue, anis_diff, pen)
+      
+print "Prior regularization: (delta_x - gamma*Laplacian)^order: delta={0}, gamma={1}, order={2}".format(delta, gamma,2)    
+            
+objs = [dl.Function(Vh[PARAMETER],atrue), dl.Function(Vh[PARAMETER],prior.mean)]
+mytitles = ["True Parameter", "Prior mean"]
+nb.multi1_plot(objs, mytitles)
+plt.show()
+
+model = Poisson(mesh, Vh, targets, prior)
+
+ +
+
+
+ +
+
+ + +
+
+
Number of observation points: 300
+Prior regularization: (delta_x - gamma*Laplacian)^order: delta=0.5, gamma=0.1, order=2
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

Generate synthetic observations

To generate the synthetic observation, we first solve the forward problem using the true parameter $a_{\rm true}$. Synthetic observations are obtained by perturbing the state variable at the observation points with a random gaussian noise. +rel_noise is the signal to noise ratio.

+ +
+
+
+
+
+
In [4]:
+
+
+
rel_noise = 0.01
+
+utrue = model.generate_vector(STATE)
+model.solveFwd(utrue, [utrue, atrue])
+model.B.mult(utrue, model.u_o)
+MAX = model.u_o.norm("linf")
+noise_std_dev = rel_noise * MAX
+randn_perturb(model.u_o, noise_std_dev)
+model.noise_variance = noise_std_dev*noise_std_dev
+
+vmax = max( utrue.max(), model.u_o.max() )
+vmin = min( utrue.min(), model.u_o.min() )
+
+plt.figure(figsize=(15,5))
+nb.plot(dl.Function(Vh[STATE], utrue), mytitle="True State", subplot_loc=121, vmin=vmin, vmax=vmax)
+nb.plot_pts(targets, model.u_o, mytitle="Observations", subplot_loc=122, vmin=vmin, vmax=vmax)
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

Test the gradient and the Hessian of the model

We test the gradient and the Hessian of the model using finite differences.

+ +
+
+
+
+
+
In [5]:
+
+
+
a0 = dl.interpolate(dl.Expression("sin(x[0])"), Vh[PARAMETER])
+modelVerify(model, a0.vector(), 1e-12)
+
+ +
+
+
+ +
+
+ + +
+
+
(yy, H xx) - (xx, H yy) =  3.52782428491e-12
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

Compute the MAP point

We used the globalized Newtown-CG method to compute the MAP point.

+ +
+
+
+
+
+
In [6]:
+
+
+
a0 = prior.mean.copy()
+solver = ReducedSpaceNewtonCG(model)
+solver.parameters["rel_tolerance"] = 1e-9
+solver.parameters["abs_tolerance"] = 1e-12
+solver.parameters["max_iter"]      = 25
+solver.parameters["inner_rel_tolerance"] = 1e-15
+solver.parameters["c_armijo"] = 1e-4
+solver.parameters["GN_iter"] = 5
+    
+x = solver.solve(a0)
+    
+if solver.converged:
+    print "\nConverged in ", solver.it, " iterations."
+else:
+    print "\nNot Converged"
+
+print "Termination reason: ", solver.termination_reasons[solver.reason]
+print "Final gradient norm: ", solver.final_grad_norm
+print "Final cost: ", solver.final_cost
+
+plt.figure(figsize=(15,5))
+nb.plot(dl.Function(Vh[STATE], x[STATE]), subplot_loc=121,mytitle="State")
+nb.plot(dl.Function(Vh[PARAMETER], x[PARAMETER]), subplot_loc=122,mytitle="Parameter")
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+
+
+It  cg_it cost            misfit          reg             (g,da)          ||g||L2        alpha          tolcg         
+  1   1    1.090354e+03    1.090088e+03    2.659651e-01   -1.517885e+04   9.641736e+04   1.000000e+00   5.000000e-01
+  2   3    3.139022e+02    3.126435e+02    1.258673e+00   -1.591018e+03   1.008425e+04   1.000000e+00   3.234031e-01
+  3   4    2.247016e+02    2.229120e+02    1.789636e+00   -1.791982e+02   4.270926e+03   1.000000e+00   2.104667e-01
+  4  10    1.537977e+02    1.455845e+02    8.213185e+00   -1.435677e+02   2.072291e+03   1.000000e+00   1.466046e-01
+  5  12    1.445560e+02    1.316661e+02    1.288984e+01   -1.852588e+01   9.599493e+02   1.000000e+00   9.978070e-02
+  6  16    1.432204e+02    1.286198e+02    1.460057e+01   -2.662936e+00   4.264475e+02   1.000000e+00   6.650513e-02
+  7  22    1.431352e+02    1.278694e+02    1.526575e+01   -1.703500e-01   1.072508e+02   1.000000e+00   3.335206e-02
+  8  27    1.431345e+02    1.278138e+02    1.532065e+01   -1.386221e-03   9.613463e+00   1.000000e+00   9.985328e-03
+  9  39    1.431345e+02    1.278145e+02    1.531996e+01   -2.247284e-06   3.880829e-01   1.000000e+00   2.006248e-03
+ 10  65    1.431345e+02    1.278145e+02    1.531996e+01   -7.988145e-12   6.383858e-04   1.000000e+00   8.136994e-05
+
+Converged in  10  iterations.
+Termination reason:  Norm of the gradient less than tolerance
+Final gradient norm:  6.26732784796e-08
+Final cost:  143.134493664
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

Compute the low rank Gaussian approximation of the posterior

We used the single pass algorithm to compute a low-rank decomposition of the Hessian Misfit. +In particular, we solve

+$$ H_{\rm misfit} u = \lambda R u. $$

The Figure shows the largest k generalized eigenvectors of the Hessian misfit. +The effective rank of the Hessian misfit is the number of eigenvalues above the red line (y=1). +The effective rank is independent of the mesh size.

+ +
+
+
+
+
+
In [7]:
+
+
+
model.setPointForHessianEvaluations(x)
+Hmisfit = ReducedHessian(model, solver.parameters["inner_rel_tolerance"], gauss_newton_approx=False, misfit_only=True)
+k = 50
+p = 20
+print "Single/Double Pass Algorithm. Requested eigenvectors: {0}; Oversampling {1}.".format(k,p)
+Omega = np.random.randn(x[PARAMETER].array().shape[0], k+p)
+#d, U = singlePassG(Hmisfit, prior.R, prior.Rsolver, Omega, k)
+d, U = doublePassG(Hmisfit, prior.R, prior.Rsolver, Omega, k)
+
+posterior = GaussianLRPosterior(prior, d, U)
+posterior.mean = x[PARAMETER]
+
+#d2, U2 = singlePass(Hmisfit, Omega, k)
+
+plt.plot(range(0,k), d, 'b*', range(0,k+1), np.ones(k+1), '-r')
+plt.yscale('log')
+plt.xlabel('number')
+plt.ylabel('eigenvalue')
+
+nb.plot_eigenvectors(Vh[PARAMETER], U, mytitle="Eigenvector", which=[0,1,2,5,10,15])
+
+ +
+
+
+ +
+
+ + +
+
+
Single/Double Pass Algorithm. Requested eigenvectors: 50; Oversampling 20.
+
+
+
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

Prior and posterior pointwise variance fields

+
+
+
+
+
+
In [8]:
+
+
+
compute_trace = False
+if compute_trace:
+    post_tr, prior_tr, corr_tr = posterior.trace(method="Estimator", tol=5e-2, min_iter=20, max_iter=200)
+    print "Posterior trace {0:5e}; Prior trace {1:5e}; Correction trace {2:5e}".format(post_tr, prior_tr, corr_tr)
+post_pw_variance, pr_pw_variance, corr_pw_variance = posterior.pointwise_variance("Exact")
+
+objs = [dl.Function(Vh[PARAMETER], pr_pw_variance),
+        dl.Function(Vh[PARAMETER], post_pw_variance)]
+mytitles = ["Prior variance", "Posterior variance"]
+nb.multi1_plot(objs, mytitles, logscale=True)
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

Generate samples from Prior and Posterior

+
+
+
+
+
+
In [9]:
+
+
+
nsamples = 5
+noise = dl.Vector()
+posterior.init_vector(noise,"noise")
+noise_size = noise.array().shape[0]
+s_prior = dl.Function(Vh[PARAMETER], name="sample_prior")
+s_post = dl.Function(Vh[PARAMETER], name="sample_post")
+
+range_pr = 2*math.sqrt( pr_pw_variance.max() )
+ps_max   = 2*math.sqrt( post_pw_variance.max() ) + posterior.mean.max()
+ps_min   = -2*math.sqrt( post_pw_variance.max() ) + posterior.mean.min()
+
+for i in range(nsamples):
+    noise.set_local( np.random.randn( noise_size ) )
+    posterior.sample(noise, s_prior.vector(), s_post.vector())
+    plt.figure(figsize=(15,5))
+    nb.plot(s_prior, subplot_loc=121,mytitle="Prior sample", vmin=-range_pr, vmax=range_pr)
+    nb.plot(s_post, subplot_loc=122,mytitle="Posterior sample", vmin=ps_min, vmax=ps_max)
+    plt.show()
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+ + diff --git a/SubsurfaceBayesian.ipynb b/SubsurfaceBayesian.ipynb new file mode 100644 index 0000000..a714c3b --- /dev/null +++ b/SubsurfaceBayesian.ipynb @@ -0,0 +1,559 @@ +{ + "metadata": { + "name": "" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$\\def\\data{\\bf d_\\rm{obs}}\n", + "\\def\\vec{\\bf}\n", + "\\def\\m{\\bf m}\n", + "\\def\\map{\\bf m_{\\text{MAP}}}\n", + "\\def\\postcov{\\bf \\Gamma_{\\text{post}}}\n", + "\\def\\prcov{\\bf \\Gamma_{\\text{prior}}}\n", + "\\def\\matrix{\\bf}\n", + "\\def\\Hmisfit{\\bf H_{\\text{misfit}}}\n", + "\\def\\HT{\\tilde{\\bf H}_{\\text{misfit}}}\n", + "\\def\\diag{diag}\n", + "\\def\\Vr{\\matrix V_r}\n", + "\\def\\Wr{\\matrix W_r}\n", + "\\def\\Ir{\\matrix I_r}\n", + "\\def\\Dr{\\matrix D_r}\n", + "\\def\\H{\\matrix H}\n", + "$$ \n", + "# Example: Bayesian quantification of parameter uncertainty:\n", + "## Estimating the (Gaussian) posterior pdf of the coefficient parameter field in an elliptic PDE\n", + "\n", + "In this example we tackle the problem of quantifying the\n", + "uncertainty in the solution of an inverse problem governed by an\n", + "elliptic PDE via the Bayesian inference framework. \n", + "Hence, we state the inverse problem as a\n", + "problem of statistical inference over the space of uncertain\n", + "parameters, which are to be inferred from data and a physical\n", + "model. The resulting solution to the statistical inverse problem\n", + "is a posterior distribution that assigns to any candidate set of\n", + "parameter fields our belief (expressed as a probability) that a\n", + "member of this candidate set is the ``true'' parameter field that\n", + "gave rise to the observed data.\n", + "\n", + "For simplicity, in what follows we give finite-dimensional expressions (i.e., after\n", + "discretization of the parameter space) for the Bayesian\n", + "formulation of the inverse problem.\n", + "\n", + "### Bayes' Theorem:\n", + "\n", + "The posterior probability distribution combines the prior pdf\n", + "$\\pi_{\\text{prior}}(\\m)$ over the parameter space, which encodes\n", + "any knowledge or assumptions about the parameter space that we may\n", + "wish to impose before the data are considered, with a likelihood pdf\n", + "$\\pi_{\\text{like}}(\\vec{d}_{\\text{obs}} \\; | \\; \\m)$, which explicitly\n", + "represents the probability that a given set of parameters $\\m$\n", + "might give rise to the observed data $\\vec{d}_{\\text{obs}} \\in\n", + "\\mathbb{R}^m$, namely:\n", + "\n", + "$\n", + "\\begin{align}\n", + "\\pi_{\\text{post}}(\\m | \\data) \\propto\n", + "\\pi_{\\text{prior}}(\\m) \\pi_{\\text{like}}(\\data | \\m).\n", + "\\end{align}\n", + "$\n", + "\n", + "Note that the infinite-dimensional analog of Bayes' formula cannot be stated formulated using pdfs but requires Radon-Nikodym derivatives\n", + "\n", + "### Gaussian prior and noise:\n", + "\n", + "#### The prior:\n", + "\n", + "We consider a Gaussian prior with mean $\\vec m_{\\text prior}$ and covariance $\\bf \\Gamma_{\\text{prior}}$. The covariance is given by the discretization of the inverse of differential operator $\\mathcal{A}^{-2} = (-\\gamma \\Delta + \\delta I)^{-2}$, where $\\gamma$, $\\delta > 0$ control the correlation length and the variance of the prior operator. This choice of prior ensures that it is a trace-class operator, guaranteeing bounded pointwise variance and a well-posed infinite-dimensional Bayesian inverse problem\n", + "\n", + "#### The likelihood:\n", + "\n", + "$\n", + "\\data = \\bf{f}(\\m) + \\bf{e }, \\;\\;\\; \\bf{e} \\sim \\mathcal{N}(\\bf{0}, \\bf \\Gamma_{\\text{noise}} )\n", + "$\n", + "\n", + "$\n", + "\\pi_{\\text like}(\\data \\; | \\; \\m) = \\exp \\left( - \\tfrac{1}{2} (\\bf{f}(\\m) - \\data)^T \\bf \\Gamma_{\\text{noise}}^{-1} (\\bf{f}(\\m) - \\data)\\right)\n", + "$\n", + "\n", + "Here $\\bf f$ is the parameter-to-observable map that takes a parameter vector $\\m$ and maps\n", + "it to the space observation vector $\\data$.\n", + "\n", + "#### The posterior:\n", + "\n", + "$\n", + "\\pi_{\\text{post}}(\\m \\; | \\; \\data) \\propto \\exp \\left( - \\tfrac{1}{2} \\parallel \\bf{f}(\\m) - \\data \\parallel^{2}_{\\bf \\Gamma_{\\text{noise}}^{-1}} \\! - \\tfrac{1}{2}\\parallel \\m - \\m_{\\text prior} \\parallel^{2}_{\\bf \\Gamma_{\\text{prior}}^{-1}} \\right)\n", + "$\n", + "\n", + "### The Gaussian approximation of the posterior: $\\mathcal{N}(\\vec{\\map},\\bf \\Gamma_{\\text{post}})$\n", + "\n", + "The mean of this posterior distribution, $\\vec{\\map}$, is the\n", + "parameter vector maximizing the posterior, and\n", + "is known as the maximum a posteriori (MAP) point. It can be found\n", + "by minimizing the negative log of the posterior, which amounts to\n", + "solving a deterministic inverse problem) with appropriately weighted norms,\n", + "\n", + "$\n", + "\\map := \\underset{\\m}{\\arg \\min} \\; \\mathcal{J}(\\m) \\;:=\\;\n", + "\\Big( \n", + "-\\frac{1}{2} \\| \\bf f(\\m) - \\data \\|^2_{\\bf \\Gamma_{\\text{noise}}^{-1}} \n", + "-\\frac{1}{2} \\| \\m -\\m_{\\text prior} \\|^2_{\\bf \\Gamma_{\\text{prior}}^{-1}} \n", + "\\Big).\n", + "$\n", + "\n", + "The posterior covariance matrix is then given by the inverse of\n", + "the Hessian matrix of $\\mathcal{J}$ at $\\map$, namely\n", + "\n", + "$\n", + "\\bf \\Gamma_{\\text{post}} = \\left(\\Hmisfit(\\map) + \\bf \\Gamma_{\\text{prior}}^{-1} \\right)^{-1} \n", + "= \\left(\\prcov \\Hmisfit + \\matrix{I}\\right)^{-1}\\prcov\n", + "$\n", + "\n", + "#### The prior-preconditioned Hessian of the data misfit:\n", + "\n", + "$\n", + " \\HT := \\prcov \\Hmisfit\n", + "$\n", + "\n", + "#### The generalized eigenvalue problem:\n", + "\n", + "$\n", + " \\Hmisfit \\matrix{W} = \\prcov^{-1} \\matrix{W} \\matrix{\\Lambda},\n", + "$\n", + "\n", + "where $\\matrix{\\Lambda} = diag(\\lambda_i) \\in \\mathbb{R}^{n\\times n}$\n", + "contains the generalized eigenvalues and the columns of $\\matrix W\\in\n", + "\\mathbb R^{n\\times n}$ the generalized eigenvectors such that \n", + "$\\matrix{W}^T \\prcov^{-1} \\matrix{W} = \\matrix{I}$. Defining \n", + "$\\matrix V := \\prcov^{-1}\\matrix W$\n", + "\n", + "$\n", + "\\prcov \\Hmisfit = \\matrix{W} \\matrix{\\Lambda} \\matrix{V}^T.\n", + "$\n", + "\n", + "#### Randomized SVD algorithms to construct the approximate spectral decomposition: \n", + "\n", + "When the generalized eigenvalues $\\{\\lambda_i\\}$ decay rapidly, we can\n", + "extract a low-rank approximation of $\\HT$ by retaining only the $r$\n", + "largest eigenvalues and corresponding eigenvectors,\n", + "\n", + "$\n", + "\\HT \\approx \\matrix{W}_r \\matrix{\\Lambda}_r \\matrix{V}_r^T.\n", + "$\n", + "\n", + "Here, $\\matrix{W}_r \\in \\mathbb{R}^{n\\times r}$ contains only the $r$\n", + "eigenvectors of $\\HT$ that correspond to the $r$ largest eigenvalues,\n", + "which are assembled into the diagonal matrix $\\matrix{\\Lambda}_r = \\diag\n", + "(\\lambda_i) \\in \\mathbb{R}^{r \\times r}$, and\n", + "$\\matrix{V}_r=\\prcov^{-1} \\matrix{W}_r$.\n", + "\n", + "#### Invert with the Sherman-Morrison-Woodbury formula:\n", + "\n", + "$$\n", + "\\begin{align}\n", + " \\notag \\left(\\HT+ \\matrix{I}\\right)^{-1}\n", + " = \\matrix{I}-\\matrix{W}_r \\matrix{D}_r \\matrix{V}_r^T +\n", + " \\mathcal{O}\\left(\\sum_{i=r+1}^{n} \\frac{\\lambda_i}{\\lambda_i +\n", + " 1}\\right),\n", + "\\end{align}\n", + "$$\n", + "\n", + "where $\\matrix{D}_r :=\\diag(\\lambda_i/(\\lambda_i+1)) \\in\n", + "\\mathbb{R}^{r\\times r}$. The last term in this expression captures the\n", + "error due to truncation in terms of the discarded eigenvalues; this\n", + "provides a criterion for truncating the spectrum, namely that $r$ is\n", + "chosen such that $\\lambda_r$ is small relative to 1. \n", + "\n", + "#### The approximate posterior covariance:\n", + "\n", + "$$\n", + "\\postcov \\approx (\\matrix{I} - \\matrix{W}_r \\matrix{D}_r\n", + "\\matrix{V}_r^T) \\prcov = \n", + "\\prcov\n", + "- \\matrix{W}_r \\matrix{D}_r \\matrix{W}_r^T\n", + "$$\n", + "\n", + "#### Apply the inverse and square-root inverse Hessian to a vector (as needed for drawing samples from a Gaussian distribution with covariance $\\H^{-1}$)\n", + "\n", + "$$\n", + " \\H^{-1} \\bf v \\approx ( \\matrix{I}-\\Wr \\Dr \\Vr^T)\n", + " \\prcov \\bf v = \\big\\{ \\bf W_r \\big[ (\\matrix{\\Lambda}_r +\n", + " \\bf I_r)^{-1} - \\bf I_r \\big] \\Vr^T + \\bf I \\big\\} \\prcov \\bf v\n", + "$$\n", + " \n", + "$$\n", + " \\H^{-1/2} \\bf v \\approx \\big\\{ \\Wr \\big[ (\\matrix{\\Lambda}_r +\n", + " \\Ir)^{-1/2} - \\Ir \\big] \\Vr^T + \\bf I \\big\\} \\prcov^{1/2}\\bf v\n", + "$$\n", + "\n", + "### This tutorial shows:\n", + "\n", + "- convergence of the inexact Newton-CG algorithm\n", + "- low-rank-based approximation of the posterior covariance (built on a low-rank\n", + "approximation of the Hessian of the data misfit) \n", + "- how to construct the low-rank approximation of the Hessian of the data misfit\n", + "- how to apply the inverse and square-root inverse Hessian to a vector efficiently\n", + "- samples from the Gaussian approximation of the posterior\n", + "\n", + "### Goals:\n", + "\n", + "By the end of this notebook, you should be able to:\n", + "\n", + "- understand the Bayesian inverse framework\n", + "- visualise and understand the results\n", + "- modify the problem and code\n", + "\n", + "### Mathematical tools used:\n", + "\n", + "- Finite element method\n", + "- Derivation of gradiant and Hessian via the adjoint method\n", + "- inexact Newton-CG\n", + "- Armijo line search\n", + "- Bayes' formula\n", + "\n", + "### List of software used:\n", + "\n", + "- FEniCS, a parallel finite element element library for the discretization of partial differential equations\n", + "- PETSc, for scalable and efficient linear algebra operations and solvers\n", + "- Matplotlib, A great python package that I used for plotting many of the results\n", + "- Numpy, A python package for linear algebra. While extensive, this is mostly used to compute means and sums in this notebook." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import dolfin as dl\n", + "import sys\n", + "sys.path.append( \"../\" )\n", + "from hippylib import *\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "sys.path.append( \"../applications/poisson/\" )\n", + "from model_subsurf import Poisson, true_model\n", + "\n", + "import nb\n", + "\n", + "import logging\n", + "logging.getLogger('FFC').setLevel(logging.WARNING)\n", + "logging.getLogger('UFL').setLevel(logging.WARNING)\n", + "dl.set_log_active(False)\n", + "\n", + "np.random.seed(seed=1)\n", + "\n", + "#uncomment this to visualize a list of all the methods available \n", + "#help(Poisson)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Set up the mesh and finite element spaces\n", + "\n", + "We compute a two dimensional mesh of a unit square with nx by ny elements.\n", + "We define a P2 finite element space for the *state* and *adjoint* variable and P1 for the *parameter*." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ndim = 2\n", + "nx = 64\n", + "ny = 64\n", + "mesh = dl.UnitSquareMesh(nx, ny)\n", + "Vh2 = dl.FunctionSpace(mesh, 'Lagrange', 2)\n", + "Vh1 = dl.FunctionSpace(mesh, 'Lagrange', 1)\n", + "Vh = [Vh2, Vh1, Vh2]\n", + "print \"Number of dofs: STATE={0}, PARAMETER={1}, ADJOINT={2}\".format(Vh[STATE].dim(), Vh[PARAMETER].dim(), Vh[ADJOINT].dim())" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Set up the location of observation, Prior Information, and model\n", + "\n", + "To setup the observation operator, we generate *ntargets* random locations where to evaluate the value of the state.\n", + "\n", + "To obtain the synthetic true paramter $a_{\\rm true}$ we generate a realization of a Gaussian random field with zero average and covariance matrix $\\mathcal{C} = \\widetilde{\\mathcal{A}}^{-1}$, where $\\widetilde{\\mathcal{A}}$ is a differential operator of the form\n", + "$$ \\widetilde{\\mathcal{A}} = \\gamma {\\rm div}\\, \\Theta\\, {\\rm grad} + \\delta I $$\n", + "Here $\\Theta$ is an s.p.d. anisotropic tensor of the form\n", + "$$ \\Theta =\n", + "\\begin{bmatrix}\n", + "\\theta_1 \\sin(\\alpha)^2 & (\\theta_1-\\theta_2) \\sin(\\alpha) \\cos{\\alpha} \\\\\n", + "(\\theta_1-\\theta_2) \\sin(\\alpha) \\cos{\\alpha} & \\theta_2 \\cos(\\alpha)^2.\n", + "\\end{bmatrix} $$\n", + "\n", + "For the prior model, we assume that we can measure the log-permeability coefficient at $N$ locations, and we denote with $a^1_{\\rm true}$, $\\ldots$, $a^N_{\\rm true}$ such measures.\n", + "We also introduce the mollifier functions\n", + "$$ \\delta_i(x) = \\exp\\left( -\\frac{\\gamma^2}{\\delta^2} \\| x - x_i \\|^2_{\\Theta^{-1}}\\right), \\quad i = 1, \\ldots, N,$$\n", + "and we let\n", + "$$ \\mathcal{A} = \\widetilde{\\mathcal{A}} + p \\sum_{i=1}^N \\delta_i I = \\widetilde{\\mathcal{A}} + p \\mathcal{M},$$\n", + "where $p$ is a penalization costant (10 for this problem) and $ \\mathcal{M} = \\sum_{i=1}^N \\delta_i I$.\n", + "\n", + "We then compute $a_{\\rm pr}$, the mean of the prior measure, as a regularized\n", + "least-squares fit of these point observations by solving\n", + "$$\n", + "a_{\\rm pr} = arg\\min_{m} \\frac{1}{2}\\langle a, \\widetilde{\\mathcal{A}} a\\rangle + \\frac{p}{2}\\langle a_{\\rm true} - a, \\mathcal{M}(a_{\\rm true}- a) \\rangle.\n", + "$$\n", + "\n", + "Finally the prior distribution is $\\mathcal{N}(a_{\\rm pr}, \\mathcal{C}_{\\rm prior})$, with $\\mathcal{C}_{\\rm prior} = \\mathcal{A}^2$." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ntargets = 300\n", + "targets = np.random.uniform(0.1,0.9, [ntargets, ndim] )\n", + "print \"Number of observation points: {0}\".format(ntargets)\n", + " \n", + "gamma = .1\n", + "delta = .5\n", + " \n", + "anis_diff = dl.Expression(code_AnisTensor2D)\n", + "anis_diff.theta0 = 2.\n", + "anis_diff.theta1 = .5\n", + "anis_diff.alpha = math.pi/4\n", + "atrue = true_model(Vh[PARAMETER], gamma, delta,anis_diff)\n", + " \n", + "locations = np.array([[0.1, 0.1], [0.1, 0.9], [.5,.5], [.9, .1], [.9, .9]])\n", + "pen = 1e1\n", + "prior = MollifiedBiLaplacianPrior(Vh[PARAMETER], gamma, delta, locations, atrue, anis_diff, pen)\n", + " \n", + "print \"Prior regularization: (delta_x - gamma*Laplacian)^order: delta={0}, gamma={1}, order={2}\".format(delta, gamma,2) \n", + " \n", + "objs = [dl.Function(Vh[PARAMETER],atrue), dl.Function(Vh[PARAMETER],prior.mean)]\n", + "mytitles = [\"True Parameter\", \"Prior mean\"]\n", + "nb.multi1_plot(objs, mytitles)\n", + "plt.show()\n", + "\n", + "model = Poisson(mesh, Vh, targets, prior)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Generate synthetic observations\n", + "\n", + "To generate the synthetic observation, we first solve the forward problem using the true parameter $a_{\\rm true}$. Synthetic observations are obtained by perturbing the state variable at the observation points with a random gaussian noise.\n", + "*rel_noise* is the signal to noise ratio." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "rel_noise = 0.01\n", + "\n", + "utrue = model.generate_vector(STATE)\n", + "model.solveFwd(utrue, [utrue, atrue])\n", + "model.B.mult(utrue, model.u_o)\n", + "MAX = model.u_o.norm(\"linf\")\n", + "noise_std_dev = rel_noise * MAX\n", + "randn_perturb(model.u_o, noise_std_dev)\n", + "model.noise_variance = noise_std_dev*noise_std_dev\n", + "\n", + "vmax = max( utrue.max(), model.u_o.max() )\n", + "vmin = min( utrue.min(), model.u_o.min() )\n", + "\n", + "plt.figure(figsize=(15,5))\n", + "nb.plot(dl.Function(Vh[STATE], utrue), mytitle=\"True State\", subplot_loc=121, vmin=vmin, vmax=vmax)\n", + "nb.plot_pts(targets, model.u_o, mytitle=\"Observations\", subplot_loc=122, vmin=vmin, vmax=vmax)\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Test the gradient and the Hessian of the model\n", + "\n", + "We test the gradient and the Hessian of the model using finite differences." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "a0 = dl.interpolate(dl.Expression(\"sin(x[0])\"), Vh[PARAMETER])\n", + "modelVerify(model, a0.vector(), 1e-4, 1e-6)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Compute the MAP point\n", + "\n", + "We used the globalized Newtown-CG method to compute the MAP point." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "a0 = prior.mean.copy()\n", + "solver = ReducedSpaceNewtonCG(model)\n", + "solver.parameters[\"rel_tolerance\"] = 1e-9\n", + "solver.parameters[\"abs_tolerance\"] = 1e-12\n", + "solver.parameters[\"max_iter\"] = 25\n", + "solver.parameters[\"inner_rel_tolerance\"] = 1e-15\n", + "solver.parameters[\"c_armijo\"] = 1e-4\n", + "solver.parameters[\"GN_iter\"] = 5\n", + " \n", + "x = solver.solve(a0)\n", + " \n", + "if solver.converged:\n", + " print \"\\nConverged in \", solver.it, \" iterations.\"\n", + "else:\n", + " print \"\\nNot Converged\"\n", + "\n", + "print \"Termination reason: \", solver.termination_reasons[solver.reason]\n", + "print \"Final gradient norm: \", solver.final_grad_norm\n", + "print \"Final cost: \", solver.final_cost\n", + "\n", + "plt.figure(figsize=(15,5))\n", + "nb.plot(dl.Function(Vh[STATE], x[STATE]), subplot_loc=121,mytitle=\"State\")\n", + "nb.plot(dl.Function(Vh[PARAMETER], x[PARAMETER]), subplot_loc=122,mytitle=\"Parameter\")\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Compute the low rank Gaussian approximation of the posterior\n", + "We used the *single pass* algorithm to compute a low-rank decomposition of the Hessian Misfit.\n", + "In particular, we solve\n", + "\n", + "$$ H_{\\rm misfit} u = \\lambda R u. $$\n", + "\n", + "The Figure shows the largest *k* generalized eigenvectors of the Hessian misfit.\n", + "The effective rank of the Hessian misfit is the number of eigenvalues above the red line (y=1).\n", + "The effective rank is independent of the mesh size." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "model.setPointForHessianEvaluations(x)\n", + "Hmisfit = ReducedHessian(model, solver.parameters[\"inner_rel_tolerance\"], gauss_newton_approx=False, misfit_only=True)\n", + "k = 50\n", + "p = 20\n", + "print \"Single/Double Pass Algorithm. Requested eigenvectors: {0}; Oversampling {1}.\".format(k,p)\n", + "Omega = np.random.randn(x[PARAMETER].array().shape[0], k+p)\n", + "#d, U = singlePassG(Hmisfit, prior.R, prior.Rsolver, Omega, k)\n", + "d, U = doublePassG(Hmisfit, prior.R, prior.Rsolver, Omega, k)\n", + "\n", + "posterior = GaussianLRPosterior(prior, d, U)\n", + "posterior.mean = x[PARAMETER]\n", + "\n", + "#d2, U2 = singlePass(Hmisfit, Omega, k)\n", + "\n", + "plt.plot(range(0,k), d, 'b*', range(0,k+1), np.ones(k+1), '-r')\n", + "plt.yscale('log')\n", + "plt.xlabel('number')\n", + "plt.ylabel('eigenvalue')\n", + "\n", + "nb.plot_eigenvectors(Vh[PARAMETER], U, mytitle=\"Eigenvector\", which=[0,1,2,5,10,15])" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prior and posterior pointwise variance fields" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "compute_trace = False\n", + "if compute_trace:\n", + " post_tr, prior_tr, corr_tr = posterior.trace(method=\"Estimator\", tol=5e-2, min_iter=20, max_iter=200)\n", + " print \"Posterior trace {0:5e}; Prior trace {1:5e}; Correction trace {2:5e}\".format(post_tr, prior_tr, corr_tr)\n", + "post_pw_variance, pr_pw_variance, corr_pw_variance = posterior.pointwise_variance(\"Exact\")\n", + "\n", + "objs = [dl.Function(Vh[PARAMETER], pr_pw_variance),\n", + " dl.Function(Vh[PARAMETER], post_pw_variance)]\n", + "mytitles = [\"Prior variance\", \"Posterior variance\"]\n", + "nb.multi1_plot(objs, mytitles, logscale=True)\n", + "plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Generate samples from Prior and Posterior" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "nsamples = 5\n", + "noise = dl.Vector()\n", + "posterior.init_vector(noise,\"noise\")\n", + "noise_size = noise.array().shape[0]\n", + "s_prior = dl.Function(Vh[PARAMETER], name=\"sample_prior\")\n", + "s_post = dl.Function(Vh[PARAMETER], name=\"sample_post\")\n", + "\n", + "range_pr = 2*math.sqrt( pr_pw_variance.max() )\n", + "ps_max = 2*math.sqrt( post_pw_variance.max() ) + posterior.mean.max()\n", + "ps_min = -2*math.sqrt( post_pw_variance.max() ) + posterior.mean.min()\n", + "\n", + "for i in range(nsamples):\n", + " noise.set_local( np.random.randn( noise_size ) )\n", + " posterior.sample(noise, s_prior.vector(), s_post.vector())\n", + " plt.figure(figsize=(15,5))\n", + " nb.plot(s_prior, subplot_loc=121,mytitle=\"Prior sample\", vmin=-range_pr, vmax=range_pr)\n", + " nb.plot(s_post, subplot_loc=122,mytitle=\"Posterior sample\", vmin=ps_min, vmax=ps_max)\n", + " plt.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/UnconstrainedMinimization.html b/UnconstrainedMinimization.html new file mode 100644 index 0000000..897b0eb --- /dev/null +++ b/UnconstrainedMinimization.html @@ -0,0 +1,11180 @@ + + + +UnconstrainedMinimization + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+
+
+

Example: Nonlinear energy functional minimization

In this example we solve the following nonlinear minimization problem

+

Find $u^* \in H^1_0(\Omega)$ such that +$$ u^* = \rm{arg}\min_{u \in H^1_0(\Omega)} \Pi(u). $$

+

Here the energy functional $\Pi(u)$ has the form +$$ \Pi(u) = \frac{1}{2} \int_\Omega k(u) \nabla u \cdot \nabla u dx - \int_\Omega f\,u dx,$$ +where +$$k(u) = k_1 + k_2 u^2. $$

+

Necessary condition (Euler-Lagrange condition)

Let $\delta_u \Pi(u, \hat{u})$ denote the first variation of $\Pi(u)$ in the direction $\hat{u}$, i.e. +$$\delta_u \Pi(u, \hat{u}) := \left. \frac{d}{d \varepsilon} \Pi(u + \varepsilon \hat{u})\right|_{\varepsilon=0} = \lim_{\varepsilon \rightarrow 0} \frac{\Pi(u + \varepsilon \hat{u}) - \Pi(u)}{\varepsilon}.$$

+

The necessary condition is that the first variation of $\Pi(u)$ equals to 0 for all directions $\hat{u}$: +$$ \delta_u \Pi = 0 \Longleftrightarrow \lim_{\varepsilon \rightarrow 0} \frac{\Pi(u + \varepsilon \hat{u}) - \Pi(u)}{\varepsilon} = 0 \quad \forall \hat{u} \in H_0^1(\Omega).$$.

+

Weak form:

To obtain the weak form of the above necessary condition, we first expand the term $\Pi(u + \varepsilon \hat{u})$ as +$$\Pi(u + \varepsilon \hat{u}) = \frac{1}{2} \int_\Omega [k_1 + k_2(u + \varepsilon\hat{u})^2](\nabla u + \varepsilon \nabla \hat{u})\cdot(\nabla u + \varepsilon \nabla \hat{u}) dx - \int_\Omega f\,(u+\varepsilon\hat{u}) dx.$$

+

After some simplification, we obtain +$$\frac{\Pi(u + \varepsilon \hat{u}) - \Pi(u)}{\epsilon} = \int_\Omega \left[k_2 u \hat{u} \nabla u \cdot \nabla u + (k_1 + k_2u^2)\nabla \hat{u}\cdot \nabla u\right] dx - \int_\Omega f \hat{u} dx + \mathcal{O}(\epsilon).$$

+

By neglecting the $\mathcal{O}(\epsilon)$ terms, we write the weak form of the necessary conditions as

+

Find $u\in H_0^1(\Omega)$ such that +$$ \int_\Omega \left[k_2 u \hat{u} \nabla u \cdot \nabla u + (k_1 + k_2u^2)\nabla \hat{u}\cdot \nabla u\right] dx = \int_\Omega f \hat{u} dx \quad \forall \hat{u} \in H_0^1.$$

+

Strong form:

To obtain the strong form, we invoke Green's first identity and write +$$ \int_\Omega \left[k_2 u \nabla u \cdot \nabla u - \nabla \cdot [(k_1 + k_2u^2) \nabla u] \right] \hat{u} dx + \int_{\partial \Omega} [(k_1 + k_2u^2) \nabla u]\cdot n \hat{u} ds = \int_\Omega f \hat{u} dx \quad \forall \hat{u} \in H_0^1.$$

+

Since $\hat{u}$ is arbitrary in $\Omega$ and $\hat{u} = 0$ on $\partial \Omega$, the strong form of the non-linear boundary problem reads +$$ - \nabla \cdot [(k_1 + k_2u^2) \nabla u + k_2 u \nabla u \cdot \nabla u = f \quad {\rm in} \; \Omega; $$ +$$ u = 0 \quad {\rm on} \; \partial\Omega.$$

+

Infinite-dimensional Newton's Method

Consider the expansion of the first variation $\delta_u \Pi(u, \hat{u})$ about $u$ in a direction $\tilde{u}$ +$$\delta_u \Pi(u+\tilde{u}, \hat{u}) \approx \delta_u \Pi(u, \hat{u}) + \delta_u^2\Pi(u, \hat{u}, \tilde{u}),$$ +where +$$ \delta_u^2\Pi(u, \hat{u}, \tilde{u}) = \left. \frac{d}{d\varepsilon} \delta_u \Pi(u + \varepsilon \tilde{u}, \hat{u}) \right|_{\varepsilon=0}.$$

+

The infinite-dimensional Newton's method reads

+

Given the current solution $u_k$, find $\tilde{u} \in H^1_0$ such that +$$ \delta_u^2 \Pi(u_k, \hat{u}, \tilde{u}) = -\delta_u \Pi(u_k, \hat{u}) \quad \forall \, \hat{u} \in H_0^1.$$ +Update the solution using the Newton direction $\tilde{u}$ +$$ u_{k+1} = u_k + \tilde{u}.$$

+

Hessian

To derive the weak form of the Hessian, we first expand the term $\delta_u \Pi(u +\varepsilon \tilde{u},\hat{u})$ as +$$\delta_u \Pi(u+\varepsilon\tilde{u}, \hat{u}) = \int_\Omega \left[k_2 (u+\varepsilon\tilde{u}) \hat{u} \nabla (u+\varepsilon\tilde{u}) \cdot \nabla (u+\varepsilon\tilde{u}) + (k_1 + k_2(u+\varepsilon\tilde{u})^2)\nabla \hat{u}\cdot \nabla (u+\varepsilon\tilde{u}) \right] dx - \int_\Omega f \hat{u} dx \quad \forall \hat{u} \in H_0^1.$$

+

Then, after some simplification, we obtain +$$\delta^2 \Pi(u, \tilde{u}, \hat{u}) := \frac{d}{d\varepsilon} \delta_u \Pi(u+\varepsilon\tilde{u}, \hat{u}) = +\int_\Omega \left[k_2\tilde{u}\hat{u}\nabla u \cdot \nabla u + 2k_2 u \hat{u} \nabla \tilde{u} \cdot \nabla u + 2k_2 u \tilde{u} \nabla \hat{u} \cdot \nabla u + (k_1 + k_2u^2) \nabla \hat{u} \cdot \nabla \tilde{u} \right] dx. $$

+

Weak form of Newton step:

Given $u \in H_0^1$, find $\tilde{u} \in H^1_0$ such that +$$\int_\Omega \left[k_2\tilde{u}\hat{u}\nabla u \cdot \nabla u + 2k_2 u \hat{u} \nabla \tilde{u} \cdot \nabla u + 2k_2 u \tilde{u} \nabla \hat{u} \cdot \nabla u + (k_1 + k_2u^2) \nabla \hat{u} \cdot \nabla \tilde{u} \right] dx = - \int_\Omega \left[k_2 u \hat{u} \nabla u \cdot \nabla u + (k_1 + k_2u^2)\nabla \hat{u}\cdot \nabla u -f \hat{u} \right] dx \quad \forall \, \hat{u} \in H_0^1. $$ +The solution is then updated using the Newton direction $\tilde{u}$ +$$ u^{\rm new} = u + \alpha \tilde{u}.$$ +Here $\alpha$ denotes a relaxation parameter (back-tracking/line-search) used to achieve global convergence of the Newton method.

+

Strong form of the Newton step

$$ - \nabla \cdot \left[ (k_1 + k_2 u^2) \nabla \tilde{u}\right] + 2k_2u\nabla\tilde{u}\cdot\nabla u - \nabla\cdot(2k_2 u \tilde{u} \nabla u) + k_2 \tilde{u} \nabla u \nabla u = \nabla \cdot\left[(k_1 + k_2 u^2)\nabla \right]u - k_2 u \nabla u\cdot \nabla u + f \quad {\rm in} \, \Omega.$$$$ \tilde{u} = 0 \quad {\rm on} \, \partial \Omega. $$ +
+
+
+
+
+
+
+
+

1. Load modules

To start we load the following modules:

+
    +
  • dolfin: the python/C++ interface to FEniCS

    +
  • +
  • math: the python module for mathematical functions

    +
  • +
  • numpy: a python package for linear algebra

    +
  • +
  • matplotlib: a python package used for plotting the results

    +
  • +
+ +
+
+
+
+
+
In [1]:
+
+
+
from dolfin import *
+
+import math
+import numpy as np
+import logging
+
+import matplotlib.pyplot as plt
+%matplotlib inline
+import nb
+
+logging.getLogger('FFC').setLevel(logging.WARNING)
+logging.getLogger('UFL').setLevel(logging.WARNING)
+set_log_active(False)
+
+ +
+
+
+ +
+
+
+
+
+
+

2. Define the mesh and finite element spaces

We construct a triangulation (mesh) $\mathcal{T}_h$ of the computational domain $\Omega := [0, 1]^2$ with nx elements in the x-axis direction and ny elements in the y-axis direction.

+

On the mesh $\mathcal{T}_h$, we then define the finite element space $V_h \subset H^1(\Omega)$ consisting of globally continuous piecewise linear functions and we create a function $u \in V_h$.

+

By denoting by $\left[{\phi_i(x)}\right]_{i=1}^{{\rm dim}(V_h)}$ the finite element basis for the space $V_h$ we have +$$ u = \sum_{i=1}^{{\rm dim}(V_h)} {\rm u}_i \phi_i(x), $$ +where ${\rm u}_i$ represents the coefficients in the finite element expansion of $u$.

+

Finally we define two special types of functions: the TestFunction $\hat{u}$ and the TrialFunction $\tilde{u}$. These special types of functions are used by FEniCS to generate the finite element vectors and matrices which stem from the first and second variations of the energy functional $\Pi$.

+ +
+
+
+
+
+
In [2]:
+
+
+
nx = 32
+ny = 32
+mesh = UnitSquareMesh(nx,ny)
+Vh = FunctionSpace(mesh, "CG", 1)
+
+uh = Function(Vh)
+u_hat = TestFunction(Vh)
+u_tilde = TrialFunction(Vh)
+
+nb.plot(mesh)
+print "dim(Vh) = ", Vh.dim()
+
+ +
+
+
+ +
+
+ + +
+
+
dim(Vh) =  1089
+
+
+
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

3. Define the energy functional

We now define the energy functional +$$ \Pi(u) = \frac{1}{2} \int_\Omega (k_1 + k_2u^2) \nabla u \cdot \nabla u dx - \int_\Omega f\,u dx.$$

+

The parameters $k_1$, $k_2$ and the forcing term $f$ are defined in FEniCS using the keyword Constant. To define coefficients that are space dependent one should use the keyword Expression.

+

The Dirichlet boundary condition +$$ u = 0 \quad {\rm on} \; \partial\Omega$$ +is imposed using the DirichletBC class.

+

To construct this object we need to provide

+
    +
  • the finite element space Vh

    +
  • +
  • the value u_0 of the solution at the Dirichlet boundary. u_0 can either be a Constant or an Expression object.

    +
  • +
  • the object Boundary that defines on which part of $\partial \Omega$ we want to impose such condition.

    +
  • +
+ +
+
+
+
+
+
In [3]:
+
+
+
f = Constant(1.)
+k1 = Constant(0.05)
+k2 = Constant(1.)
+
+Pi = Constant(.5)*(k1 + k2*uh*uh)*inner(nabla_grad(uh), nabla_grad(uh))*dx - f*uh*dx
+
+class Boundary(SubDomain):
+    def inside(self, x, on_boundary):
+        return on_boundary
+
+u_0 = Constant(0.)    
+bc = DirichletBC(Vh,u_0, Boundary() )
+
+ +
+
+
+ +
+
+
+
+
+
+

4. First variation

The weak form of the first variation reads

+$$\delta_u \Pi(u, \hat{u}) = \int_\Omega \left[k_2 u \hat{u} \nabla u \cdot \nabla u + (k_1 + k_2u^2)\nabla \hat{u}\cdot \nabla u\right] dx - \int_\Omega f \hat{u} dx \quad \forall \hat{u} \in H_0^1.$$

We use a finite difference check to verify that our derivation is correct. +More specifically, we consider a function +$$ u_0 = x(x-1)y(y-1) \in H^1_0(\Omega) $$ +and we verify that for a random direction $\hat{u} \in H^1_0(\Omega)$ we have +$$ r := \left| \frac{\Pi(u_0 + \varepsilon \hat{u}) - \Pi(u_0)}{\varepsilon} - \delta_u \Pi(u, \hat{u})\right| = \mathcal{O}(\varepsilon).$$

+

In the figure below we show in a loglog scale the value of $r$ as a function of $\varepsilon$. We observe that $r$ decays linearly for a wide range of values of $\varepsilon$, however we notice an increase in the error for extremely small values of $\varepsilon$ due to numerical stability and finite precision arithmetic.

+

NOTE: To compute the first variation we can also use the automatic differentiation of variational forms capabilities of FEniCS and write

+

grad = derivative(Pi, u, u_hat)

+ +
+
+
+
+
+
In [4]:
+
+
+
grad = (k2*uh*u_hat)*inner(nabla_grad(uh), nabla_grad(uh))*dx + \
+       (k1 + k2*uh*uh)*inner(nabla_grad(uh), nabla_grad(u_hat))*dx - f*u_hat*dx
+
+u0 = interpolate(Expression("x[0]*(x[0]-1)*x[1]*(x[1]-1)"), Vh)
+
+n_eps = 32
+eps = 1e-2*np.power(2., -np.arange(n_eps))
+err_grad = np.zeros(n_eps)
+
+uh.assign(u0)
+pi0 = assemble(Pi)
+grad0 = assemble(grad)
+
+dir = Function(Vh)
+dir.vector().set_local(np.random.randn(Vh.dim()))
+bc.apply(dir.vector())
+dir_grad0 = grad0.inner(dir.vector())
+
+for i in range(n_eps):
+    uh.assign(u0)
+    uh.vector().axpy(eps[i], dir.vector()) #uh = uh + eps[i]*dir
+    piplus = assemble(Pi)
+    err_grad[i] = abs( (piplus - pi0)/eps[i] - dir_grad0 )
+
+plt.figure()    
+plt.loglog(eps, err_grad, "-ob")
+plt.loglog(eps, (.5*err_grad[0]/eps[0])*eps, "-.k")
+plt.title("Finite difference check of the first variation (gradient)")
+plt.xlabel("eps")
+plt.ylabel("Error grad")
+plt.legend(["Error Grad", "First Order"], "upper left")
+
+ +
+
+
+ +
+
+ + +
Out[4]:
+ + +
+
<matplotlib.legend.Legend at 0x116e2b810>
+
+ +
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

5. Second variation

The weak form of the second variation reads

+$$\delta_u^2 \Pi(u, \tilde{u}, \hat{u}) := \frac{d}{d\varepsilon} \delta_u \Pi(u+\varepsilon\tilde{u}, \hat{u}) = +\int_\Omega \left[k_2\tilde{u}\hat{u}\nabla u \cdot \nabla u + 2k_2 u \hat{u} \nabla \tilde{u} \cdot \nabla u + 2k_2 u \tilde{u} \nabla \hat{u} \cdot \nabla u + (k_1 + k_2u^2) \nabla \hat{u} \cdot \nabla \tilde{u} \right] dx. $$

As before, we verify that for a random direction $\hat{u} \in H^1_0(\Omega)$ we have +$$ r := \left\| \frac{\delta_u\Pi(u_0 + \varepsilon \tilde{u}, \hat{u}) - \delta_u \Pi(u_0, \hat{u})}{\varepsilon} - \delta_u^2 \Pi(u, \tilde{u}, \hat{u})\right\| = \mathcal{O}(\varepsilon).$$

+

In the figure below we show in a loglog scale the value of $r$ as a function of $\varepsilon$. As before, we observe that $r$ decays linearly for a wide range of values of $\varepsilon$, however we notice an increase in the error for extremely small values of $\varepsilon$ due to numerical stability and finite precision arithmetic.

+

NOTE: To compute the second variation we can also use automatic differentiation and write

+

H = derivative(grad, u, u_tilde)

+ +
+
+
+
+
+
In [5]:
+
+
+
H = k2*u_tilde*u_hat*inner(nabla_grad(uh), nabla_grad(uh))*dx + \
+     Constant(2.)*(k2*uh*u_hat)*inner(nabla_grad(u_tilde), nabla_grad(uh))*dx + \
+     Constant(2.)*k2*u_tilde*uh*inner(nabla_grad(uh), nabla_grad(u_hat))*dx + \
+     (k1 + k2*uh*uh)*inner(nabla_grad(u_tilde), nabla_grad(u_hat))*dx
+
+uh.assign(u0)
+H_0 = assemble(H)
+err_H = np.zeros(n_eps)
+for i in range(n_eps):
+    uh.assign(u0)
+    uh.vector().axpy(eps[i], dir.vector())
+    grad_plus = assemble(grad)
+    diff_grad = (grad_plus - grad0)
+    diff_grad *= 1/eps[i]
+    H_0dir = H_0 * dir.vector()
+    err_H[i] = (diff_grad - H_0dir).norm("l2")
+    
+plt.figure()    
+plt.loglog(eps, err_H, "-ob")
+plt.loglog(eps, (.5*err_H[0]/eps[0])*eps, "-.k")
+plt.title("Finite difference check of the second variation (Hessian)")
+plt.xlabel("eps")
+plt.ylabel("Error Hessian")
+plt.legend(["Error Hessian", "First Order"], "upper left")
+
+ +
+
+
+ +
+
+ + +
Out[5]:
+ + +
+
<matplotlib.legend.Legend at 0x1174559d0>
+
+ +
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

6. The infinite dimensional Newton Method

The infinite dimensional Newton step reads

+

Given $u_n \in H_0^1$, find $\tilde{u} \in H^1_0$ such that +$$ \delta_u^2 \Pi(u_n, \hat{u}, \tilde{u}) = - \delta_u \Pi(u_n, \hat{u}) \quad $$ +Update the solution $u_{n+1}$ using the Newton direction $\tilde{u}$ +$$ u_{n+1} = u + \alpha \tilde{u}.$$

+

Here, for simplicity, we choose $\alpha$ equal to 1. In general, to guarantee global convergence of the Newton method the parameter $\alpha$ should be appropriately chosen (e.g. back-tracking or line search).

+

The linear systems to compute the Newton directions are solved using the conjugate gradient (CG) with algebraic multigrid preconditioner with a fixed tolerance. In practice, one should solve the Newton system inexactly by early termination of CG +iterations via Eisenstat–Walker (to prevent oversolving) and Steihaug (to avoid negative curvature) criteria.

+

In the output below, for each iteration we report the number of CG iterations, the value of the energy functional, the norm of the gradient, and the inner product between the gradient and the Newton direction $\delta_u \Pi(u_0, \tilde{u})$.

+

In the example, the stopping criterion is relative norm of the gradient $\frac{\delta_u \Pi(u_n, \hat{u})}{\delta_u \Pi(u_0, \hat{u})} \leq \tau$. However robust implementation of the stopping criterion should monitor also the quantity $\delta_u \Pi(u_0, \tilde{u})$.

+ +
+
+
+
+
+
In [6]:
+
+
+
uh.assign(interpolate(Constant(0.), Vh))
+
+rtol = 1e-9
+max_iter = 10
+
+pi0 = assemble(Pi)
+g0 = assemble(grad, bcs=bc)
+tol = g0.norm("l2")*rtol
+
+du = Function(Vh)
+
+lin_it = 0
+print "{0:3} {1:3} {2:15} {3:15} {4:15}".format(
+      "It", "cg_it", "Energy", "(g,du)", "||g||l2")
+
+for i in range(max_iter):
+    [Hn, gn] = assemble_system(H, grad, bc)
+    if gn.norm("l2") < tol:
+        print "\nConverged in ", i, "Newton iterations and ", lin_it, "linear iterations."
+        break
+    myit = solve(Hn, du.vector(), gn, "cg", "petsc_amg")
+    lin_it = lin_it + myit
+    uh.vector().axpy(-1., du.vector())
+    pi = assemble(Pi)
+    print "{0:3d} {1:3d} {2:15e} {3:15e} {4:15e}".format(
+      i, myit, pi, -gn.inner(du.vector()), gn.norm("l2"))
+    
+    plt.figure()
+    nb.plot(uh, mytitle="Iteration {0:1d}".format(i))
+
+ +
+
+
+ +
+
+ + +
+
+
It  cg_it Energy          (g,du)          ||g||l2        
+  0   5    2.131677e+00   -7.006604e-01    3.027344e-02
+  1   5    1.970930e-01   -3.236480e+00    4.776448e-01
+  2   5   -1.353237e-01   -5.650323e-01    1.383324e-01
+  3   5   -1.773194e-01   -7.431324e-02    3.724053e-02
+  4   5   -1.796716e-01   -4.455249e-03    7.765361e-03
+  5   5   -1.796910e-01   -3.850108e-05    7.391805e-04
+  6   5   -1.796910e-01   -4.634213e-09    9.310065e-06
+  7   5   -1.796910e-01   -8.693346e-17    1.500245e-09
+
+Converged in  8 Newton iterations and  40 linear iterations.
+
+
+
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

7. The built-in non-linear solver in FEniCS

As an alternative, we can resort to the built-in non-linear solver in FEniCS.

+

To this aim, we use the necessary optimality condition and we cast the minimization problem in a non-linear variational problem. +More specifically, we set first variation $\delta_u \Pi(u,\hat{u})$ of the energy functional to zero.

+

The input to the solve functions are

+
    +
  • the weak form of the residual equation (i.e. the first variation $\delta_u \Pi(u,\hat{u})$ of the energy functional);

    +
  • +
  • the initial guess (INPUT)/solution (OUTPUT) of the non-linear problem;

    +
  • +
  • the Dirichlet boundary conditions;

    +
  • +
  • the Jacobian of the residual equation (i.e. the second variation $\delta_u^2 \Pi(u,\hat{u}, \tilde{u})$ of the energy functional). If the Jacobian form is not provided, FEniCS will compute it by automatic differentiation of the residual weak form;

    +
  • +
  • additional parameters for the linear and non-linear solver.

    +
  • +
+ +
+
+
+
+
+
In [7]:
+
+
+
uh.assign(interpolate(Constant(0.), Vh))
+parameters={"symmetric": True, "newton_solver": {"relative_tolerance": 1e-9, "report": True, \
+                                                 "linear_solver": "cg", "preconditioner": "petsc_amg"}}
+solve(grad == 0, uh, bc, J=H, solver_parameters=parameters)
+print "Norm of the gradient at converge", assemble(grad, bcs=bc).norm("l2")
+print "Value of the energy functional at convergence", assemble(Pi)
+nb.plot(uh)
+
+ +
+
+
+ +
+
+ + +
+
+
Norm of the gradient at converge 1.84489890678e-15
+Value of the energy functional at convergence -0.179690966184
+
+
+
+ +
Out[7]:
+ + +
+
<matplotlib.collections.TriMesh at 0x117f6f890>
+
+ +
+ +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+ + diff --git a/UnconstrainedMinimization.ipynb b/UnconstrainedMinimization.ipynb new file mode 100644 index 0000000..171cce0 --- /dev/null +++ b/UnconstrainedMinimization.ipynb @@ -0,0 +1,431 @@ +{ + "metadata": { + "name": "" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example: Nonlinear energy functional minimization\n", + "\n", + "In this example we solve the following nonlinear minimization problem\n", + "\n", + "*Find * $u^* \\in H^1_0(\\Omega)$ *such that*\n", + "$$ u^* = \\rm{arg}\\min_{u \\in H^1_0(\\Omega)} \\Pi(u). $$\n", + "\n", + "Here the energy functional $\\Pi(u)$ has the form\n", + "$$ \\Pi(u) = \\frac{1}{2} \\int_\\Omega k(u) \\nabla u \\cdot \\nabla u dx - \\int_\\Omega f\\,u dx,$$\n", + "where\n", + "$$k(u) = k_1 + k_2 u^2. $$\n", + "\n", + "##Necessary condition (Euler-Lagrange condition)\n", + "\n", + "Let $\\delta_u \\Pi(u, \\hat{u})$ denote the first variation of $\\Pi(u)$ in the *direction* $\\hat{u}$, i.e.\n", + "$$\\delta_u \\Pi(u, \\hat{u}) := \\left. \\frac{d}{d \\varepsilon} \\Pi(u + \\varepsilon \\hat{u})\\right|_{\\varepsilon=0} = \\lim_{\\varepsilon \\rightarrow 0} \\frac{\\Pi(u + \\varepsilon \\hat{u}) - \\Pi(u)}{\\varepsilon}.$$\n", + "\n", + "The necessary condition is that the first variation of $\\Pi(u)$ equals to 0 for all directions $\\hat{u}$:\n", + "$$ \\delta_u \\Pi = 0 \\Longleftrightarrow \\lim_{\\varepsilon \\rightarrow 0} \\frac{\\Pi(u + \\varepsilon \\hat{u}) - \\Pi(u)}{\\varepsilon} = 0 \\quad \\forall \\hat{u} \\in H_0^1(\\Omega).$$.\n", + "\n", + "###Weak form:\n", + "To obtain the weak form of the above necessary condition, we first expand the term $\\Pi(u + \\varepsilon \\hat{u})$ as\n", + "$$\\Pi(u + \\varepsilon \\hat{u}) = \\frac{1}{2} \\int_\\Omega [k_1 + k_2(u + \\varepsilon\\hat{u})^2](\\nabla u + \\varepsilon \\nabla \\hat{u})\\cdot(\\nabla u + \\varepsilon \\nabla \\hat{u}) dx - \\int_\\Omega f\\,(u+\\varepsilon\\hat{u}) dx.$$\n", + "\n", + "After some simplification, we obtain\n", + "$$\\frac{\\Pi(u + \\varepsilon \\hat{u}) - \\Pi(u)}{\\epsilon} = \\int_\\Omega \\left[k_2 u \\hat{u} \\nabla u \\cdot \\nabla u + (k_1 + k_2u^2)\\nabla \\hat{u}\\cdot \\nabla u\\right] dx - \\int_\\Omega f \\hat{u} dx + \\mathcal{O}(\\epsilon).$$\n", + "\n", + "By neglecting the $\\mathcal{O}(\\epsilon)$ terms, we write the weak form of the necessary conditions as\n", + "\n", + "*Find *$u\\in H_0^1(\\Omega)$ *such that*\n", + "$$ \\int_\\Omega \\left[k_2 u \\hat{u} \\nabla u \\cdot \\nabla u + (k_1 + k_2u^2)\\nabla \\hat{u}\\cdot \\nabla u\\right] dx = \\int_\\Omega f \\hat{u} dx \\quad \\forall \\hat{u} \\in H_0^1.$$\n", + "\n", + "###Strong form:\n", + "To obtain the strong form, we invoke Green's first identity and write\n", + "$$ \\int_\\Omega \\left[k_2 u \\nabla u \\cdot \\nabla u - \\nabla \\cdot [(k_1 + k_2u^2) \\nabla u] \\right] \\hat{u} dx + \\int_{\\partial \\Omega} [(k_1 + k_2u^2) \\nabla u]\\cdot n \\hat{u} ds = \\int_\\Omega f \\hat{u} dx \\quad \\forall \\hat{u} \\in H_0^1.$$\n", + "\n", + "Since $\\hat{u}$ is arbitrary in $\\Omega$ and $\\hat{u} = 0$ on $\\partial \\Omega$, the strong form of the non-linear boundary problem reads\n", + "$$ - \\nabla \\cdot [(k_1 + k_2u^2) \\nabla u + k_2 u \\nabla u \\cdot \\nabla u = f \\quad {\\rm in} \\; \\Omega; $$\n", + "$$ u = 0 \\quad {\\rm on} \\; \\partial\\Omega.$$\n", + "\n", + "##Infinite-dimensional Newton's Method\n", + "\n", + "Consider the expansion of the first variation $\\delta_u \\Pi(u, \\hat{u})$ about $u$ in a *direction* $\\tilde{u}$\n", + "$$\\delta_u \\Pi(u+\\tilde{u}, \\hat{u}) \\approx \\delta_u \\Pi(u, \\hat{u}) + \\delta_u^2\\Pi(u, \\hat{u}, \\tilde{u}),$$\n", + "where\n", + "$$ \\delta_u^2\\Pi(u, \\hat{u}, \\tilde{u}) = \\left. \\frac{d}{d\\varepsilon} \\delta_u \\Pi(u + \\varepsilon \\tilde{u}, \\hat{u}) \\right|_{\\varepsilon=0}.$$\n", + "\n", + "The infinite-dimensional Newton's method reads\n", + "\n", + "*Given the current solution *$u_k$, *find* $\\tilde{u} \\in H^1_0$ *such that*\n", + "$$ \\delta_u^2 \\Pi(u_k, \\hat{u}, \\tilde{u}) = -\\delta_u \\Pi(u_k, \\hat{u}) \\quad \\forall \\, \\hat{u} \\in H_0^1.$$\n", + "*Update the solution using the Newton direction* $\\tilde{u}$\n", + "$$ u_{k+1} = u_k + \\tilde{u}.$$\n", + "\n", + "### Hessian\n", + "To derive the weak form of the Hessian, we first expand the term $\\delta_u \\Pi(u +\\varepsilon \\tilde{u},\\hat{u})$ as\n", + "$$\\delta_u \\Pi(u+\\varepsilon\\tilde{u}, \\hat{u}) = \\int_\\Omega \\left[k_2 (u+\\varepsilon\\tilde{u}) \\hat{u} \\nabla (u+\\varepsilon\\tilde{u}) \\cdot \\nabla (u+\\varepsilon\\tilde{u}) + (k_1 + k_2(u+\\varepsilon\\tilde{u})^2)\\nabla \\hat{u}\\cdot \\nabla (u+\\varepsilon\\tilde{u}) \\right] dx - \\int_\\Omega f \\hat{u} dx \\quad \\forall \\hat{u} \\in H_0^1.$$\n", + "\n", + "Then, after some simplification, we obtain\n", + "$$\\delta^2 \\Pi(u, \\tilde{u}, \\hat{u}) := \\frac{d}{d\\varepsilon} \\delta_u \\Pi(u+\\varepsilon\\tilde{u}, \\hat{u}) = \n", + "\\int_\\Omega \\left[k_2\\tilde{u}\\hat{u}\\nabla u \\cdot \\nabla u + 2k_2 u \\hat{u} \\nabla \\tilde{u} \\cdot \\nabla u + 2k_2 u \\tilde{u} \\nabla \\hat{u} \\cdot \\nabla u + (k_1 + k_2u^2) \\nabla \\hat{u} \\cdot \\nabla \\tilde{u} \\right] dx. $$\n", + "\n", + "###Weak form of Newton step:\n", + "*Given *$u \\in H_0^1$, *find * $\\tilde{u} \\in H^1_0$ *such that*\n", + "$$\\int_\\Omega \\left[k_2\\tilde{u}\\hat{u}\\nabla u \\cdot \\nabla u + 2k_2 u \\hat{u} \\nabla \\tilde{u} \\cdot \\nabla u + 2k_2 u \\tilde{u} \\nabla \\hat{u} \\cdot \\nabla u + (k_1 + k_2u^2) \\nabla \\hat{u} \\cdot \\nabla \\tilde{u} \\right] dx = - \\int_\\Omega \\left[k_2 u \\hat{u} \\nabla u \\cdot \\nabla u + (k_1 + k_2u^2)\\nabla \\hat{u}\\cdot \\nabla u -f \\hat{u} \\right] dx \\quad \\forall \\, \\hat{u} \\in H_0^1. $$\n", + "The solution is then updated using the Newton direction $\\tilde{u}$\n", + "$$ u^{\\rm new} = u + \\alpha \\tilde{u}.$$\n", + "Here $\\alpha$ denotes a relaxation parameter (back-tracking/line-search) used to achieve global convergence of the Newton method.\n", + "\n", + "###Strong form of the Newton step\n", + "$$ - \\nabla \\cdot \\left[ (k_1 + k_2 u^2) \\nabla \\tilde{u}\\right] + 2k_2u\\nabla\\tilde{u}\\cdot\\nabla u - \\nabla\\cdot(2k_2 u \\tilde{u} \\nabla u) + k_2 \\tilde{u} \\nabla u \\nabla u = \\nabla \\cdot\\left[(k_1 + k_2 u^2)\\nabla \\right]u - k_2 u \\nabla u\\cdot \\nabla u + f \\quad {\\rm in} \\, \\Omega.$$\n", + "$$ \\tilde{u} = 0 \\quad {\\rm on} \\, \\partial \\Omega. $$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Load modules\n", + "\n", + "To start we load the following modules:\n", + "\n", + "- dolfin: the python/C++ interface to FEniCS\n", + "\n", + "- [math](https://docs.python.org/2/library/math.html): the python module for mathematical functions\n", + "\n", + "- [numpy](http://www.numpy.org/): a python package for linear algebra\n", + "\n", + "- [matplotlib](http://matplotlib.org/): a python package used for plotting the results\n" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from dolfin import *\n", + "\n", + "import math\n", + "import numpy as np\n", + "import logging\n", + "\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import nb\n", + "\n", + "logging.getLogger('FFC').setLevel(logging.WARNING)\n", + "logging.getLogger('UFL').setLevel(logging.WARNING)\n", + "set_log_active(False)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Define the mesh and finite element spaces\n", + "\n", + "We construct a triangulation (mesh) $\\mathcal{T}_h$ of the computational domain $\\Omega := [0, 1]^2$ with `nx` elements in the *x*-axis direction and `ny` elements in the *y*-axis direction.\n", + "\n", + "On the mesh $\\mathcal{T}_h$, we then define the finite element space $V_h \\subset H^1(\\Omega)$ consisting of globally continuous piecewise linear functions and we create a function $u \\in V_h$.\n", + "\n", + "By denoting by $\\left[{\\phi_i(x)}\\right]_{i=1}^{{\\rm dim}(V_h)}$ the finite element basis for the space $V_h$ we have\n", + "$$ u = \\sum_{i=1}^{{\\rm dim}(V_h)} {\\rm u}_i \\phi_i(x), $$\n", + "where ${\\rm u}_i$ represents the coefficients in the finite element expansion of $u$.\n", + "\n", + "Finally we define two special types of functions: the `TestFunction` $\\hat{u}$ and the `TrialFunction` $\\tilde{u}$. These special types of functions are used by `FEniCS` to generate the finite element vectors and matrices which stem from the first and second variations of the energy functional $\\Pi$." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "nx = 32\n", + "ny = 32\n", + "mesh = UnitSquareMesh(nx,ny)\n", + "Vh = FunctionSpace(mesh, \"CG\", 1)\n", + "\n", + "uh = Function(Vh)\n", + "u_hat = TestFunction(Vh)\n", + "u_tilde = TrialFunction(Vh)\n", + "\n", + "nb.plot(mesh)\n", + "print \"dim(Vh) = \", Vh.dim()" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Define the energy functional\n", + "\n", + "We now define the energy functional\n", + "$$ \\Pi(u) = \\frac{1}{2} \\int_\\Omega (k_1 + k_2u^2) \\nabla u \\cdot \\nabla u dx - \\int_\\Omega f\\,u dx.$$\n", + "\n", + "The parameters $k_1$, $k_2$ and the forcing term $f$ are defined in FEniCS using the keyword [`Constant`](http://fenicsproject.org/documentation/dolfin/1.6.0/python/programmers-reference/functions/constant/Constant.html). To define coefficients that are space dependent one should use the keyword [`Expression`](http://fenicsproject.org/documentation/dolfin/1.6.0/python/programmers-reference/functions/expression/Expression.html).\n", + "\n", + "The Dirichlet boundary condition\n", + "$$ u = 0 \\quad {\\rm on} \\; \\partial\\Omega$$\n", + "is imposed using the [`DirichletBC`](http://fenicsproject.org/documentation/dolfin/1.6.0/python/programmers-reference/fem/bcs/DirichletBC.html) class.\n", + "\n", + "To construct this object we need to provide\n", + "\n", + "- the finite element space `Vh`\n", + "\n", + "- the value `u_0` of the solution at the Dirichlet boundary. `u_0` can either be a `Constant` or an `Expression` object.\n", + "\n", + "- the object `Boundary` that defines on which part of $\\partial \\Omega$ we want to impose such condition." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "f = Constant(1.)\n", + "k1 = Constant(0.05)\n", + "k2 = Constant(1.)\n", + "\n", + "Pi = Constant(.5)*(k1 + k2*uh*uh)*inner(nabla_grad(uh), nabla_grad(uh))*dx - f*uh*dx\n", + "\n", + "class Boundary(SubDomain):\n", + " def inside(self, x, on_boundary):\n", + " return on_boundary\n", + "\n", + "u_0 = Constant(0.) \n", + "bc = DirichletBC(Vh,u_0, Boundary() )" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. First variation\n", + "\n", + "The weak form of the first variation reads\n", + "\n", + "$$\\delta_u \\Pi(u, \\hat{u}) = \\int_\\Omega \\left[k_2 u \\hat{u} \\nabla u \\cdot \\nabla u + (k_1 + k_2u^2)\\nabla \\hat{u}\\cdot \\nabla u\\right] dx - \\int_\\Omega f \\hat{u} dx \\quad \\forall \\hat{u} \\in H_0^1.$$\n", + "\n", + "We use a **finite difference check** to verify that our derivation is correct.\n", + "More specifically, we consider a function\n", + "$$ u_0 = x(x-1)y(y-1) \\in H^1_0(\\Omega) $$\n", + "and we verify that for a random direction $\\hat{u} \\in H^1_0(\\Omega)$ we have\n", + "$$ r := \\left| \\frac{\\Pi(u_0 + \\varepsilon \\hat{u}) - \\Pi(u_0)}{\\varepsilon} - \\delta_u \\Pi(u, \\hat{u})\\right| = \\mathcal{O}(\\varepsilon).$$\n", + "\n", + "In the figure below we show in a loglog scale the value of $r$ as a function of $\\varepsilon$. We observe that $r$ decays linearly for a wide range of values of $\\varepsilon$, however we notice an increase in the error for extremely small values of $\\varepsilon$ due to numerical stability and finite precision arithmetic.\n", + "\n", + "**NOTE:** To compute the first variation we can also use the [automatic differentiation](http://fenicsproject.org/documentation/dolfin/1.6.0/python/programmers-reference/fem/formmanipulations/derivative.html) of variational forms capabilities of FEniCS and write\n", + "\n", + "`grad = derivative(Pi, u, u_hat)`" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "grad = (k2*uh*u_hat)*inner(nabla_grad(uh), nabla_grad(uh))*dx + \\\n", + " (k1 + k2*uh*uh)*inner(nabla_grad(uh), nabla_grad(u_hat))*dx - f*u_hat*dx\n", + "\n", + "u0 = interpolate(Expression(\"x[0]*(x[0]-1)*x[1]*(x[1]-1)\"), Vh)\n", + "\n", + "n_eps = 32\n", + "eps = 1e-2*np.power(2., -np.arange(n_eps))\n", + "err_grad = np.zeros(n_eps)\n", + "\n", + "uh.assign(u0)\n", + "pi0 = assemble(Pi)\n", + "grad0 = assemble(grad)\n", + "\n", + "dir = Function(Vh)\n", + "dir.vector().set_local(np.random.randn(Vh.dim()))\n", + "bc.apply(dir.vector())\n", + "dir_grad0 = grad0.inner(dir.vector())\n", + "\n", + "for i in range(n_eps):\n", + " uh.assign(u0)\n", + " uh.vector().axpy(eps[i], dir.vector()) #uh = uh + eps[i]*dir\n", + " piplus = assemble(Pi)\n", + " err_grad[i] = abs( (piplus - pi0)/eps[i] - dir_grad0 )\n", + "\n", + "plt.figure() \n", + "plt.loglog(eps, err_grad, \"-ob\")\n", + "plt.loglog(eps, (.5*err_grad[0]/eps[0])*eps, \"-.k\")\n", + "plt.title(\"Finite difference check of the first variation (gradient)\")\n", + "plt.xlabel(\"eps\")\n", + "plt.ylabel(\"Error grad\")\n", + "plt.legend([\"Error Grad\", \"First Order\"], \"upper left\")\n" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Second variation\n", + "\n", + "The weak form of the second variation reads\n", + "\n", + "$$\\delta_u^2 \\Pi(u, \\tilde{u}, \\hat{u}) := \\frac{d}{d\\varepsilon} \\delta_u \\Pi(u+\\varepsilon\\tilde{u}, \\hat{u}) = \n", + "\\int_\\Omega \\left[k_2\\tilde{u}\\hat{u}\\nabla u \\cdot \\nabla u + 2k_2 u \\hat{u} \\nabla \\tilde{u} \\cdot \\nabla u + 2k_2 u \\tilde{u} \\nabla \\hat{u} \\cdot \\nabla u + (k_1 + k_2u^2) \\nabla \\hat{u} \\cdot \\nabla \\tilde{u} \\right] dx. $$\n", + "\n", + "As before, we verify that for a random direction $\\hat{u} \\in H^1_0(\\Omega)$ we have\n", + "$$ r := \\left\\| \\frac{\\delta_u\\Pi(u_0 + \\varepsilon \\tilde{u}, \\hat{u}) - \\delta_u \\Pi(u_0, \\hat{u})}{\\varepsilon} - \\delta_u^2 \\Pi(u, \\tilde{u}, \\hat{u})\\right\\| = \\mathcal{O}(\\varepsilon).$$\n", + "\n", + "In the figure below we show in a loglog scale the value of $r$ as a function of $\\varepsilon$. As before, we observe that $r$ decays linearly for a wide range of values of $\\varepsilon$, however we notice an increase in the error for extremely small values of $\\varepsilon$ due to numerical stability and finite precision arithmetic.\n", + "\n", + "**NOTE:** To compute the second variation we can also use automatic differentiation and write\n", + "\n", + "`H = derivative(grad, u, u_tilde)`" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "H = k2*u_tilde*u_hat*inner(nabla_grad(uh), nabla_grad(uh))*dx + \\\n", + " Constant(2.)*(k2*uh*u_hat)*inner(nabla_grad(u_tilde), nabla_grad(uh))*dx + \\\n", + " Constant(2.)*k2*u_tilde*uh*inner(nabla_grad(uh), nabla_grad(u_hat))*dx + \\\n", + " (k1 + k2*uh*uh)*inner(nabla_grad(u_tilde), nabla_grad(u_hat))*dx\n", + "\n", + "uh.assign(u0)\n", + "H_0 = assemble(H)\n", + "err_H = np.zeros(n_eps)\n", + "for i in range(n_eps):\n", + " uh.assign(u0)\n", + " uh.vector().axpy(eps[i], dir.vector())\n", + " grad_plus = assemble(grad)\n", + " diff_grad = (grad_plus - grad0)\n", + " diff_grad *= 1/eps[i]\n", + " H_0dir = H_0 * dir.vector()\n", + " err_H[i] = (diff_grad - H_0dir).norm(\"l2\")\n", + " \n", + "plt.figure() \n", + "plt.loglog(eps, err_H, \"-ob\")\n", + "plt.loglog(eps, (.5*err_H[0]/eps[0])*eps, \"-.k\")\n", + "plt.title(\"Finite difference check of the second variation (Hessian)\")\n", + "plt.xlabel(\"eps\")\n", + "plt.ylabel(\"Error Hessian\")\n", + "plt.legend([\"Error Hessian\", \"First Order\"], \"upper left\")" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. The infinite dimensional Newton Method\n", + "\n", + "The infinite dimensional Newton step reads\n", + "\n", + "*Given *$u_n \\in H_0^1$, *find * $\\tilde{u} \\in H^1_0$ *such that*\n", + "$$ \\delta_u^2 \\Pi(u_n, \\hat{u}, \\tilde{u}) = - \\delta_u \\Pi(u_n, \\hat{u}) \\quad $$\n", + "Update the solution $u_{n+1}$ using the Newton direction $\\tilde{u}$\n", + "$$ u_{n+1} = u + \\alpha \\tilde{u}.$$\n", + "\n", + "Here, for simplicity, we choose $\\alpha$ equal to 1. In general, to guarantee global convergence of the Newton method the parameter $\\alpha$ should be appropriately chosen (e.g. *back-tracking* or *line search*).\n", + "\n", + "The linear systems to compute the Newton directions are solved using the conjugate gradient (CG) with algebraic multigrid preconditioner with a fixed tolerance. In practice, one should solve the Newton system inexactly by early termination of CG \n", + "iterations via Eisenstat\u2013Walker (to prevent oversolving) and Steihaug (to avoid negative curvature) criteria.\n", + "\n", + "In the output below, for each iteration we report the number of CG iterations, the value of the energy functional, the norm of the gradient, and the inner product between the gradient and the Newton direction $\\delta_u \\Pi(u_0, \\tilde{u})$.\n", + "\n", + "In the example, the stopping criterion is relative norm of the gradient $\\frac{\\delta_u \\Pi(u_n, \\hat{u})}{\\delta_u \\Pi(u_0, \\hat{u})} \\leq \\tau$. However robust implementation of the stopping criterion should monitor also the quantity $\\delta_u \\Pi(u_0, \\tilde{u})$." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "uh.assign(interpolate(Constant(0.), Vh))\n", + "\n", + "rtol = 1e-9\n", + "max_iter = 10\n", + "\n", + "pi0 = assemble(Pi)\n", + "g0 = assemble(grad, bcs=bc)\n", + "tol = g0.norm(\"l2\")*rtol\n", + "\n", + "du = Function(Vh)\n", + "\n", + "lin_it = 0\n", + "print \"{0:3} {1:3} {2:15} {3:15} {4:15}\".format(\n", + " \"It\", \"cg_it\", \"Energy\", \"(g,du)\", \"||g||l2\")\n", + "\n", + "for i in range(max_iter):\n", + " [Hn, gn] = assemble_system(H, grad, bc)\n", + " if gn.norm(\"l2\") < tol:\n", + " print \"\\nConverged in \", i, \"Newton iterations and \", lin_it, \"linear iterations.\"\n", + " break\n", + " myit = solve(Hn, du.vector(), gn, \"cg\", \"petsc_amg\")\n", + " lin_it = lin_it + myit\n", + " uh.vector().axpy(-1., du.vector())\n", + " pi = assemble(Pi)\n", + " print \"{0:3d} {1:3d} {2:15e} {3:15e} {4:15e}\".format(\n", + " i, myit, pi, -gn.inner(du.vector()), gn.norm(\"l2\"))\n", + " \n", + " plt.figure()\n", + " nb.plot(uh, mytitle=\"Iteration {0:1d}\".format(i))" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. The built-in non-linear solver in FEniCS\n", + "\n", + "As an alternative, we can resort to the [built-in non-linear solver](http://fenicsproject.org/documentation/dolfin/1.6.0/python/programmers-reference/fem/solving/solve.html) in FEniCS.\n", + "\n", + "To this aim, we use the necessary optimality condition and we cast the minimization problem in a non-linear variational problem.\n", + "More specifically, we set first variation $\\delta_u \\Pi(u,\\hat{u})$ of the energy functional to zero.\n", + "\n", + "The input to the `solve` functions are\n", + "\n", + "- the weak form of the residual equation (i.e. the first variation $\\delta_u \\Pi(u,\\hat{u})$ of the energy functional);\n", + "\n", + "- the initial guess (INPUT)/solution (OUTPUT) of the non-linear problem;\n", + "\n", + "- the Dirichlet boundary conditions;\n", + "\n", + "- the Jacobian of the residual equation (i.e. the second variation $\\delta_u^2 \\Pi(u,\\hat{u}, \\tilde{u})$ of the energy functional). If the Jacobian form is not provided, FEniCS will compute it by automatic differentiation of the residual weak form;\n", + "\n", + "- additional parameters for the linear and non-linear solver." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "uh.assign(interpolate(Constant(0.), Vh))\n", + "parameters={\"symmetric\": True, \"newton_solver\": {\"relative_tolerance\": 1e-9, \"report\": True, \\\n", + " \"linear_solver\": \"cg\", \"preconditioner\": \"petsc_amg\"}}\n", + "solve(grad == 0, uh, bc, J=H, solver_parameters=parameters)\n", + "print \"Norm of the gradient at converge\", assemble(grad, bcs=bc).norm(\"l2\")\n", + "print \"Value of the energy functional at convergence\", assemble(Pi)\n", + "nb.plot(uh)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/UnconstrainedMinimization.py b/UnconstrainedMinimization.py new file mode 100644 index 0000000..31f36a4 --- /dev/null +++ b/UnconstrainedMinimization.py @@ -0,0 +1,165 @@ +# Example: Nonlinear energy functional minimization +# +# In this example we solve the following nonlinear minimization problem +# +# Find u^* \in H^1_0(\Omega) such that +# u^* = argmin_{u \in H^1_0(\Omega)} Pi(u) +# +# Here the energy functional Pi(u) has the form +# Pi(u) = \frac{1}{2} \int_\Omega k(u) \nabla u \cdot \nabla u dx - \int_\Omega f\,u dx, +# where +# k(u) = k_1 + k_2 u^2. + +# 1. Load modules + +from dolfin import * + +import math +import numpy as np +import logging + +import matplotlib.pyplot as plt +import nb + +logging.getLogger('FFC').setLevel(logging.WARNING) +logging.getLogger('UFL').setLevel(logging.WARNING) +set_log_active(False) + +# 2. Define the mesh and finite element spaces + +nx = 32 +ny = 32 +mesh = UnitSquareMesh(nx,ny) +Vh = FunctionSpace(mesh, "CG", 1) + +uh = Function(Vh) +u_hat = TestFunction(Vh) +u_tilde = TrialFunction(Vh) + +nb.plot(mesh) +print "dim(Vh) = ", Vh.dim() + +# 3. Define the energy functional + +f = Constant(1.) +k1 = Constant(0.05) +k2 = Constant(1.) + +Pi = Constant(.5)*(k1 + k2*uh*uh)*inner(nabla_grad(uh), nabla_grad(uh))*dx - f*uh*dx + +class Boundary(SubDomain): + def inside(self, x, on_boundary): + return on_boundary + +u_0 = Constant(0.) +bc = DirichletBC(Vh,u_0, Boundary() ) + +# 4. First variation (gradient) + +grad = (k2*uh*u_hat)*inner(nabla_grad(uh), nabla_grad(uh))*dx + \ + (k1 + k2*uh*uh)*inner(nabla_grad(uh), nabla_grad(u_hat))*dx - f*u_hat*dx + +u0 = interpolate(Expression("x[0]*(x[0]-1)*x[1]*(x[1]-1)"), Vh) + +n_eps = 32 +eps = 1e-2*np.power(2., -np.arange(n_eps)) +err_grad = np.zeros(n_eps) + +uh.assign(u0) +pi0 = assemble(Pi) +grad0 = assemble(grad) + +dir = Function(Vh) +dir.vector().set_local(np.random.randn(Vh.dim())) +bc.apply(dir.vector()) +dir_grad0 = grad0.inner(dir.vector()) + +for i in range(n_eps): + uh.assign(u0) + uh.vector().axpy(eps[i], dir.vector()) #uh = uh + eps[i]*dir + piplus = assemble(Pi) + err_grad[i] = abs( (piplus - pi0)/eps[i] - dir_grad0 ) + +plt.figure() +plt.loglog(eps, err_grad, "-ob") +plt.loglog(eps, (.5*err_grad[0]/eps[0])*eps, "-.k") +plt.title("Finite difference check of the first variation (gradient)") +plt.xlabel("eps") +plt.ylabel("Error grad") +plt.legend(["Error Grad", "First Order"], "upper left") + + +# 5. Second variation (Hessian) + +H = k2*u_tilde*u_hat*inner(nabla_grad(uh), nabla_grad(uh))*dx + \ + Constant(2.)*(k2*uh*u_hat)*inner(nabla_grad(u_tilde), nabla_grad(uh))*dx + \ + Constant(2.)*k2*u_tilde*uh*inner(nabla_grad(uh), nabla_grad(u_hat))*dx + \ + (k1 + k2*uh*uh)*inner(nabla_grad(u_tilde), nabla_grad(u_hat))*dx + +uh.assign(u0) +H_0 = assemble(H) +err_H = np.zeros(n_eps) +for i in range(n_eps): + uh.assign(u0) + uh.vector().axpy(eps[i], dir.vector()) + grad_plus = assemble(grad) + diff_grad = (grad_plus - grad0) + diff_grad *= 1/eps[i] + H_0dir = H_0 * dir.vector() + err_H[i] = (diff_grad - H_0dir).norm("l2") + +plt.figure() +plt.loglog(eps, err_H, "-ob") +plt.loglog(eps, (.5*err_H[0]/eps[0])*eps, "-.k") +plt.title("Finite difference check of the second variation (Hessian)") +plt.xlabel("eps") +plt.ylabel("Error Hessian") +plt.legend(["Error Hessian", "First Order"], "upper left") + + + +# 6. The infinite dimensional Newton Method + +uh.assign(interpolate(Constant(0.), Vh)) + +rtol = 1e-9 +max_iter = 10 + +pi0 = assemble(Pi) +g0 = assemble(grad, bcs=bc) +tol = g0.norm("l2")*rtol + +du = Function(Vh) + +lin_it = 0 +print "{0:3} {1:3} {2:15} {3:15} {4:15}".format( + "It", "cg_it", "Energy", "(g,du)", "||g||l2") + +for i in range(max_iter): + [Hn, gn] = assemble_system(H, grad, bc) + if gn.norm("l2") < tol: + print "\nConverged in ", i, "Newton iterations and ", lin_it, "linear iterations." + break + myit = solve(Hn, du.vector(), gn, "cg", "petsc_amg") + lin_it = lin_it + myit + uh.vector().axpy(-1., du.vector()) + pi = assemble(Pi) + print "{0:3d} {1:3d} {2:15e} {3:15e} {4:15e}".format( + i, myit, pi, -gn.inner(du.vector()), gn.norm("l2")) + +plt.figure() +nb.plot(uh, mytitle="Solution") + +# 7. The built-in non-linear solver in FEniCS + +uh.assign(interpolate(Constant(0.), Vh)) +parameters={"symmetric": True, "newton_solver": {"relative_tolerance": 1e-9, "report": True, \ + "linear_solver": "cg", "preconditioner": "petsc_amg"}} +solve(grad == 0, uh, bc, J=H, solver_parameters=parameters) +print "Built-in FEniCS non linear solver." +print "Norm of the gradient at converge", assemble(grad, bcs=bc).norm("l2") +print "Value of the energy functional at convergence", assemble(Pi) +plt.figure() +nb.plot(uh, mytitle="Build-in solver") +plt.show() + diff --git a/index.html b/index.html new file mode 100644 index 0000000..830c48d --- /dev/null +++ b/index.html @@ -0,0 +1,54 @@ + + + + + + + + + Hippylib + + + +

+ +

+ +
+ + +

+Hippylib + +

+ +Python Notebooks: +
+
    +
  • FEniCS101 notebook illustrates the use of FEniCS for the solution of a linear boundary value problem (ipynb, html, py ). +
  • UncontrainedMinimization notebook illustrates the use of FEniCS for the minimization of a non-quadratic energy functional (ipynb, html, py ). +
  • PoissonDeterministic-SD notebook illustrates the solution of an inverse problem for the coefficient field of a Poisson equation, using the steepest descent method (ipynb, html, py ). +
  • PoissonDeterministic-InexactNewton notebook illustrates the solution of an inverse problem for the coefficient field of a Poisson equation, using the inexact Newton CG algorithm (ipynb, html, py) +
  • HessianSpectrum notebook illustrates the spectral property of the Hessian operator for a linear source inversion problem (html) +
  • AdvectionDiffusionBayesian notebook illustrates how to solve a time-dependent linear inverse problem in a Bayesian setting using hIPPYlib (html, py) +
  • SubsurfaceBayesian notebook illustrates how to solve a non-linear parameter inversion for the Poisson equation in a Bayesian setting using hIPPYlib (html, py) +
+ +
See here for instructions on how to use ipython notebooks (files *.ipynb). +
If your system does not support ipython notebooks you can download the python scripts (files *.py). +

+ + Notebooks utilities: +
+
    +
  • nb.py : a collection of plotting utilities using matplotlib. +
+ + diff --git a/model_gls.py b/model_gls.py new file mode 100644 index 0000000..f883551 --- /dev/null +++ b/model_gls.py @@ -0,0 +1,495 @@ +import dolfin as dl +import numpy as np +import sys +sys.path.append( "../../" ) +from hippylib import * +import matplotlib.pyplot as plt + +class TimeDependentAD: + def __init__(self, mesh, Vh, t_init, t_final, t_1, dt, wind_velocity, gls_stab, Prior): + self.mesh = mesh + self.Vh = Vh + self.t_init = t_init + self.t_final = t_final + self.t_1 = t_1 + self.dt = dt + self.sim_times = np.arange(self.t_init, self.t_final+.5*self.dt, self.dt) + + u = dl.TrialFunction(Vh[STATE]) + v = dl.TestFunction(Vh[STATE]) + + kappa = dl.Constant(.001) + dt_expr = dl.Constant(self.dt) + + r_trial = u + dt_expr*( -dl.div(kappa*dl.nabla_grad(u))+ dl.inner(wind_velocity, dl.nabla_grad(u)) ) + r_test = v + dt_expr*( -dl.div(kappa*dl.nabla_grad(v))+ dl.inner(wind_velocity, dl.nabla_grad(v)) ) + + + h = dl.CellSize(mesh) + vnorm = dl.sqrt(dl.inner(wind_velocity, wind_velocity)) + if gls_stab: + tau = dl.Min((h*h)/(dl.Constant(2.)*kappa), h/vnorm ) + else: + tau = dl.Constant(0.) + + self.M = dl.assemble( dl.inner(u,v)*dl.dx ) + self.M_stab = dl.assemble( dl.inner(u, v+tau*r_test)*dl.dx ) + self.Mt_stab = dl.assemble( dl.inner(u+tau*r_trial,v)*dl.dx ) + Nvarf = (dl.inner(kappa *dl.nabla_grad(u), dl.nabla_grad(v)) + dl.inner(wind_velocity, dl.nabla_grad(u))*v )*dl.dx + Ntvarf = (dl.inner(kappa *dl.nabla_grad(v), dl.nabla_grad(u)) + dl.inner(wind_velocity, dl.nabla_grad(v))*u )*dl.dx + self.N = dl.assemble( Nvarf ) + self.Nt = dl.assemble(Ntvarf) + stab = dl.assemble( tau*dl.inner(r_trial, r_test)*dl.dx) + self.L = self.M + dt*self.N + stab + self.Lt = self.M + dt*self.Nt + stab + + boundaries = dl.FacetFunction("size_t", mesh) + boundaries.set_all(0) + + class InsideBoundary(dl.SubDomain): + def inside(self,x,on_boundary): + x_in = x[0] > dl.DOLFIN_EPS and x[0] < 1 - dl.DOLFIN_EPS + y_in = x[1] > dl.DOLFIN_EPS and x[1] < 1 - dl.DOLFIN_EPS + return on_boundary and x_in and y_in + + Gamma_M = InsideBoundary() + Gamma_M.mark(boundaries,1) + ds_marked = dl.Measure("ds")[boundaries] + + self.Q = dl.assemble( self.dt*dl.inner(u, v) * ds_marked(1) ) + + self.Prior = Prior + + self.solver = dl.PETScKrylovSolver("gmres", "ilu") + self.solver.set_operator(self.L) + + self.solvert = dl.PETScKrylovSolver("gmres", "ilu") + self.solvert.set_operator(self.Lt) + + self.ud = self.generate_vector(STATE) + self.noise_variance = 0 + + def generate_vector(self, component = "ALL"): + if component == "ALL": + u = TimeDependentVector(self.sim_times) + u.initialize(self.Q, 0) + a = dl.Vector() + self.Prior.init_vector(a,0) + p = TimeDependentVector(self.sim_times) + p.initialize(self.Q, 0) + return [u, a, p] + elif component == STATE: + u = TimeDependentVector(self.sim_times) + u.initialize(self.Q, 0) + return u + elif component == PARAMETER: + a = dl.Vector() + self.Prior.init_vector(a,0) + return a + elif component == ADJOINT: + p = TimeDependentVector(self.sim_times) + p.initialize(self.Q, 0) + return p + else: + raise + + def init_parameter(self, a): + self.Prior.init_vector(a,0) + + def getIdentityMatrix(self, component): + Xh = self.Vh[component] + test = dl.TestFunction(Xh) + trial = dl.TrialFunction(Xh) + + I = dl.assemble(test*trial*dl.dx) + I.zero() + I.ident_zeros() + + return I + + + def cost(self, x): + Rdx = dl.Vector() + self.Prior.init_vector(Rdx,0) + dx = x[PARAMETER] - self.Prior.mean + self.Prior.R.mult(dx, Rdx) + reg = .5*Rdx.inner(dx) + + u = dl.Vector() + ud = dl.Vector() + self.Q.init_vector(u,0) + self.Q.init_vector(ud,0) + + misfit = 0 + for t in np.arange(self.t_1, self.t_final+(.5*self.dt), self.dt): + x[STATE].retrieve(u,t) + self.ud.retrieve(ud,t) + diff = u - ud + Qdiff = self.Q * diff + misfit += .5/self.noise_variance*Qdiff.inner(diff) + + c = misfit + reg + + return [c, reg, misfit] + + def solveFwd(self, out, x, tol=1e-9): + out.zero() + uold = x[PARAMETER] + u = dl.Vector() + rhs = dl.Vector() + self.M.init_vector(rhs, 0) + self.M.init_vector(u, 0) + self.solver.parameters["relative_tolerance"] = tol + t = self.t_init + while t < self.t_final: + t += self.dt + self.M_stab.mult(uold, rhs) + self.solver.solve(u, rhs) + out.store(u,t) + uold = u + + def solveAdj(self, out, x, tol=1e-9): + out.zero() + pold = dl.Vector() + self.M.init_vector(pold,0) + p = dl.Vector() + self.M.init_vector(p,0) + rhs = dl.Vector() + self.M.init_vector(rhs,0) + rhs_obs = dl.Vector() + + u = dl.Vector() + self.M.init_vector(u,0) + ud = dl.Vector() + self.M.init_vector(ud,0) + + self.solvert.parameters["relative_tolerance"] = tol + + t = self.t_final + while t > self.t_init: + self.Mt_stab.mult(pold,rhs) + if t > self.t_1 - .5*self.dt: + x[STATE].retrieve(u,t) + self.ud.retrieve(ud,t) + ud.axpy(-1., u) + self.Q.mult(ud,rhs_obs) +# print "t = ", t, "solveAdj ||ud-u||_inf = ", ud.norm("linf"), " ||rhs_obs|| = ", rhs_obs.norm("linf") + rhs.axpy(1./self.noise_variance, rhs_obs) + + self.solvert.solve(p, rhs) + pold = p + out.store(p, t) + t -= self.dt + + + + def evalGradientParameter(self,x, mg): + self.Prior.init_vector(mg,1) + dx = x[PARAMETER] - self.Prior.mean + self.Prior.R.mult(dx, mg) + + p0 = dl.Vector() + self.Q.init_vector(p0,0) + x[ADJOINT].retrieve(p0, self.t_init + self.dt) + + mg.axpy(-1., self.Mt_stab*p0) + + g = dl.Vector() + self.M.init_vector(g,1) + + self.Prior.Msolver.solve(g,mg) + + + grad_norm = g.inner(mg) + + return grad_norm + + + def setPointForHessianEvaluations(self, x): + """ + Specify the point x = [u,a,p] at which the Hessian operator (or the Gauss-Newton approximation) + need to be evaluated. + + Nothing to do since the problem is linear + """ + return + + + def solveFwdIncremental(self, sol, rhs, tol): + sol.zero() + uold = dl.Vector() + u = dl.Vector() + Muold = dl.Vector() + myrhs = dl.Vector() + self.M.init_vector(uold, 0) + self.M.init_vector(u, 0) + self.M.init_vector(Muold, 0) + self.M.init_vector(myrhs, 0) + self.solver.parameters["relative_tolerance"] = tol + t = self.t_init + while t < self.t_final: + t += self.dt + self.M_stab.mult(uold, Muold) + rhs.retrieve(myrhs, t) + myrhs.axpy(1., Muold) + self.solver.solve(u, myrhs) + sol.store(u,t) + uold = u + + + + def solveAdjIncremental(self, sol, rhs, tol): + sol.zero() + pold = dl.Vector() + p = dl.Vector() + Mpold = dl.Vector() + myrhs = dl.Vector() + self.M.init_vector(pold, 0) + self.M.init_vector(p, 0) + self.M.init_vector(Mpold, 0) + self.M.init_vector(myrhs, 0) + self.solvert.parameters["relative_tolerance"] = tol + t = self.t_final + while t > self.t_init: + self.Mt_stab.mult(pold, Mpold) + rhs.retrieve(myrhs, t) + myrhs.axpy(1., Mpold) + self.solvert.solve(p, myrhs) + sol.store(p,t) + pold = p + t -= self.dt + + def applyC(self, da, out): + out.zero() + myout = dl.Vector() + self.M.init_vector(myout, 0) + self.M_stab.mult(da,myout) + myout *= -1. + t = self.t_init + self.dt + out.store(myout,t) + + myout.zero() + while t < self.t_final: + t += self.dt + out.store(myout,t) + + def applyCt(self, dp, out): + t = self.t_init + self.dt + dp0 = dl.Vector() + self.M.init_vector(dp0,0) + dp.retrieve(dp0, t) + dp0 *= -1. + self.Mt_stab.mult(dp0, out) + + + def applyWuu(self, du, out, gn_approx=False): + out.zero() + myout = dl.Vector() + self.Q.init_vector(myout,0) + myout.zero() + + t = self.t_init + self.dt + while t < self.t_1 - .5*self.dt: + out.store(myout, t) + t += self.dt + + mydu = dl.Vector() + self.Q.init_vector(mydu,0) + while t < self.t_final+(.5*self.dt): + du.retrieve(mydu,t) + self.Q.mult(mydu, myout) + myout *= 1./self.noise_variance + out.store(myout, t) + t += self.dt + + def applyWua(self, da, out): + out.zero() + + + def applyWau(self, du, out): + out.zero() + + def applyR(self, da, out): + self.Prior.R.mult(da,out) + + def applyRaa(self, da, out): + out.zero() + + def exportState(self, x, filename, varname): + out_file = dl.File(filename) + ufunc = dl.Function(self.Vh[STATE], name=varname) + t = self.t_init + out_file << (dl.Function(self.Vh[STATE], x[PARAMETER], name=varname),t) + while t < self.t_final: + t += self.dt + x[STATE].retrieve(ufunc.vector(), t) + out_file << (ufunc, t) + + + +def v_boundary(x,on_boundary): + return on_boundary + +def q_boundary(x,on_boundary): + return x[0] < dl.DOLFIN_EPS and x[1] < dl.DOLFIN_EPS + +def computeVelocityField(mesh): + Xh = dl.VectorFunctionSpace(mesh,'Lagrange', 2) + Wh = dl.FunctionSpace(mesh, 'Lagrange', 1) + XW = dl.MixedFunctionSpace([Xh, Wh]) + + + Re = 1e2 + + g = dl.Expression(('0.0','(x[0] < 1e-14) - (x[0] > 1 - 1e-14)')) + bc1 = dl.DirichletBC(XW.sub(0), g, v_boundary) + bc2 = dl.DirichletBC(XW.sub(1), dl.Constant(0), q_boundary, 'pointwise') + bcs = [bc1, bc2] + + vq = dl.Function(XW) + (v,q) = dl.split(vq) + (v_test, q_test) = dl.TestFunctions (XW) + + def strain(v): + return dl.sym(dl.nabla_grad(v)) + + F = ( (2./Re)*dl.inner(strain(v),strain(v_test))+ dl.inner (dl.nabla_grad(v)*v, v_test) + - (q * dl.div(v_test)) + ( dl.div(v) * q_test) ) * dl.dx + + dl.solve(F == 0, vq, bcs, solver_parameters={"newton_solver": + {"relative_tolerance":1e-4, "maximum_iterations":100}}) + + return v + + + +if __name__ == "__main__": + dl.set_log_active(False) + np.random.seed(1) + sep = "\n"+"#"*80+"\n" + print sep, "Set up the mesh and finite element spaces.\n","Compute wind velocity", sep + mesh = dl.refine( dl.Mesh("ad_20.xml") ) + wind_velocity = computeVelocityField(mesh) + Vh = dl.FunctionSpace(mesh, "Lagrange", 2) + print "Number of dofs: {0}".format( Vh.dim() ) + + print sep, "Set up Prior Information and model", sep + + true_initial_condition = dl.interpolate(dl.Expression('min(0.5,exp(-100*(pow(x[0]-0.35,2) + pow(x[1]-0.7,2))))'), Vh).vector() + + orderPrior = 2 + + if orderPrior == 1: + gamma = 1 + delta = 1e1 + prior = LaplacianPrior(Vh, gamma, delta) + elif orderPrior == 2: + gamma = 1 + delta = 8 + prior = BiLaplacianPrior(Vh, gamma, delta) + +# prior.mean = interpolate(Expression('min(0.6,exp(-50*(pow(x[0]-0.34,2) + pow(x[1]-0.71,2))))'), Vh).vector() + prior.mean = dl.interpolate(dl.Expression('0.5'), Vh).vector() + + print "Prior regularization: (delta - gamma*Laplacian)^order: delta={0}, gamma={1}, order={2}".format(delta, gamma,orderPrior) + + problem = TimeDependentAD(mesh, [Vh,Vh,Vh], 0., 4., 1., .2, wind_velocity, True, prior) + + print sep, "Generate synthetic observation", sep + rel_noise = 0.001 + utrue = problem.generate_vector(STATE) + x = [utrue, true_initial_condition, None] + problem.solveFwd(x[STATE], x, 1e-9) + MAX = utrue.norm("linf", "linf") + noise_std_dev = rel_noise * MAX + problem.ud.copy(utrue) + problem.ud.randn_perturb(noise_std_dev) + problem.noise_variance = noise_std_dev*noise_std_dev + + print sep, "Test the gradient and the Hessian of the model", sep + a0 = true_initial_condition.copy() + modelVerify(problem, a0, 1e-12) + + print sep, "Compute the reduced gradient and hessian", sep + [u,a,p] = problem.generate_vector() + problem.solveFwd(u, [u,a,p], 1e-12) + problem.solveAdj(p, [u,a,p], 1e-12) + mg = problem.generate_vector(PARAMETER) + grad_norm = problem.evalGradientParameter([u,a,p], mg) + + print "(g,g) = ", grad_norm + + H = ReducedHessian(problem, 1e-12, gauss_newton_approx=False, misfit_only=True) + + print sep, "Compute the low rank Gaussian Approximation of the posterior", sep + k = 80 + p = 20 + print "Double Pass Algorithm. Requested eigenvectors: {0}; Oversampling {1}.".format(k,p) + Omega = np.random.randn(a.array().shape[0], k+p) + d, U = singlePassG(H, prior.R, prior.Rsolver, Omega, k, check_Bortho=False, check_Aortho=False, check_residual=False) + posterior = GaussianLRPosterior( prior, d, U ) + + print sep, "Find the MAP point", sep + + H.misfit_only = False + + solver = CGSolverSteihaug() + solver.set_operator(H) + solver.set_preconditioner( posterior.Hlr ) + solver.parameters["print_level"] = 1 + solver.parameters["rel_tolerance"] = 1e-6 + solver.solve(a, -mg) + problem.solveFwd(u, [u,a,p], 1e-12) + + total_cost, reg_cost, misfit_cost = problem.cost([u,a,p]) + print "Total cost {0:5g}; Reg Cost {1:5g}; Misfit {2:5g}".format(total_cost, reg_cost, misfit_cost) + + posterior.mean = a + + compute_trace = False + if compute_trace: + post_tr, prior_tr, corr_tr = posterior.trace(method="Exact", tol=5e-2, min_iter=20, max_iter=100) + print "Posterior trace {0:5g}; Prior trace {1:5g}; Correction trace {2:5g}".format(post_tr, prior_tr, corr_tr) + post_pw_variance, pr_pw_variance, corr_pw_variance = posterior.pointwise_variance("Exact") + + print sep, "Save results", sep + problem.exportState([u,a,p], "results/conc.pvd", "concentration") + problem.exportState([utrue,true_initial_condition,p], "results/true_conc.pvd", "concentration") + problem.exportState([problem.ud,true_initial_condition,p], "results/noisy_conc.pvd", "concentration") + + if compute_trace: + fid = dl.File("results/pointwise_variance.pvd") + fid << dl.Function(Vh, post_pw_variance, name="Posterior") + fid << dl.Function(Vh, pr_pw_variance, name="Prior") + fid << dl.Function(Vh, corr_pw_variance, name="Correction") + + posterior.exportU(Vh, "hmisfit/evect.pvd") + np.savetxt("hmisfit/eigevalues.dat", d) + + + + print sep, "Generate samples from Prior and Posterior", sep + fid_prior = dl.File("samples/sample_prior.pvd") + fid_post = dl.File("samples/sample_post.pvd") + nsamples = 500 + noise = dl.Vector() + posterior.init_vector(noise,"noise") + noise_size = noise.array().shape[0] + s_prior = dl.Function(Vh, name="sample_prior") + s_post = dl.Function(Vh, name="sample_post") + for i in range(nsamples): + noise.set_local( np.random.randn( noise_size ) ) + posterior.sample(noise, s_prior.vector(), s_post.vector()) + fid_prior << s_prior + fid_post << s_post + + + print sep, "Visualize results", sep + plt.figure() + plt.plot(range(0,k), d, 'b*', range(0,k), np.ones(k), '-r') + plt.yscale('log') + dl.plot(dl.Function(Vh,a, name = "Initial Condition")) + plt.show() + dl.interactive() + + diff --git a/model_subsurf.py b/model_subsurf.py new file mode 100644 index 0000000..abb5f06 --- /dev/null +++ b/model_subsurf.py @@ -0,0 +1,477 @@ +import dolfin as dl +import sys +sys.path.append( "../../" ) +from hippylib import * +import numpy as np +import matplotlib.pyplot as plt + +def u_boundary(x, on_boundary): + return on_boundary and ( x[1] < dl.DOLFIN_EPS or x[1] > 1.0 - dl.DOLFIN_EPS) + +def v_boundary(x, on_boundary): + return on_boundary and ( x[0] < dl.DOLFIN_EPS or x[0] > 1.0 - dl.DOLFIN_EPS) + +def compute_velocity(mesh, Vh, a, u): + #export the velocity field v = - exp( a ) \grad u: then we solve ( exp(-a) v, w) = ( u, div w) + Vv = dl.FunctionSpace(mesh, 'RT', 1) + v = dl.Function(Vv, name="velocity") + vtrial = dl.TrialFunction(Vv) + vtest = dl.TestFunction(Vv) + afun = dl.Function(Vh[PARAMETER], a) + ufun = dl.Function(Vh[STATE], u) + Mv = dl.exp(-afun) *dl.inner(vtrial, vtest) *dl.dx + n = dl.FacetNormal(mesh) + class TopBoundary(dl.SubDomain): + def inside(self,x,on_boundary): + return on_boundary and x[1] > 1 - dl.DOLFIN_EPS + + Gamma_M = TopBoundary() + boundaries = dl.FacetFunction("size_t", mesh) + boundaries.set_all(0) + Gamma_M.mark(boundaries, 1) + dss = dl.Measure("ds")[boundaries] + rhs = ufun*dl.div(vtest)*dl.dx - dl.dot(vtest,n)*dss(1) + bcv = dl.DirichletBC(Vv, dl.Expression( ("0.0", "0.0") ), v_boundary) + dl.solve(Mv == rhs, v, bcv) + + return v + +def true_model(Vh, gamma, delta, anis_diff): + prior = BiLaplacianPrior(Vh, gamma, delta, anis_diff ) + noise = dl.Vector() + prior.init_vector(noise,"noise") + noise_size = noise.array().shape[0] + noise.set_local( np.random.randn( noise_size ) ) + atrue = dl.Vector() + prior.init_vector(atrue, 0) + prior.sample(noise,atrue) + return atrue + +class Poisson: + def __init__(self, mesh, Vh, targets, prior): + """ + Construct a model by proving + - the mesh + - the finite element spaces for the STATE/ADJOINT variable and the PARAMETER variable + - the Prior information + """ + self.mesh = mesh + self.Vh = Vh + + # Initialize Expressions + self.f = dl.Expression("0.0") + + self.u_bdr = dl.Expression("x[1]") + self.u_bdr0 = dl.Expression("0.0") + self.bc = dl.DirichletBC(self.Vh[STATE], self.u_bdr, u_boundary) + self.bc0 = dl.DirichletBC(self.Vh[STATE], self.u_bdr0, u_boundary) + + # Assemble constant matrices + self.prior = prior + self.B = assemblePointwiseObservation(Vh[STATE],targets) + + self.A = [] + self.At = [] + self.C = [] + self.Raa = [] + self.Wau = [] + + self.u_o = dl.Vector() + self.B.init_vector(self.u_o,0) + self.noise_variance = 0 + + def generate_vector(self, component="ALL"): + """ + Return the list x=[u,a,p] where: + - u is any object that describes the state variable + - a is a Vector object that describes the parameter variable. + (Need to support linear algebra operations) + - p is any object that describes the adjoint variable + + If component is STATE, PARAMETER, or ADJOINT return x[component] + """ + if component == "ALL": + x = [dl.Vector(), dl.Vector(), dl.Vector()] + self.B.init_vector(x[STATE],1) + self.prior.init_vector(x[PARAMETER],0) + self.B.init_vector(x[ADJOINT], 1) + elif component == STATE: + x = dl.Vector() + self.B.init_vector(x,1) + elif component == PARAMETER: + x = dl.Vector() + self.prior.init_vector(x,0) + elif component == ADJOINT: + x = dl.Vector() + self.B.init_vector(x,1) + + return x + + def init_parameter(self, a): + """ + Reshape a so that it is compatible with the parameter variable + """ + self.prior.init_vector(a,0) + + def assembleA(self,x, assemble_adjoint = False, assemble_rhs = False): + """ + Assemble the matrices and rhs for the forward/adjoint problems + """ + trial = dl.TrialFunction(self.Vh[STATE]) + test = dl.TestFunction(self.Vh[STATE]) + c = dl.Function(self.Vh[PARAMETER], x[PARAMETER]) + Avarf = dl.inner(dl.exp(c)*dl.nabla_grad(trial), dl.nabla_grad(test))*dl.dx + if not assemble_adjoint: + bform = dl.inner(self.f, test)*dl.dx + Matrix, rhs = dl.assemble_system(Avarf, bform, self.bc) + else: + # Assemble the adjoint of A (i.e. the transpose of A) + s = dl.Function(self.Vh[STATE], x[STATE]) + bform = dl.inner(dl.Constant(0.), test)*dl.dx + Matrix, _ = dl.assemble_system(dl.adjoint(Avarf), bform, self.bc0) + Bu = -(self.B*x[STATE]) + Bu += self.u_o + rhs = dl.Vector() + self.B.init_vector(rhs, 1) + self.B.transpmult(Bu,rhs) + rhs *= 1.0/self.noise_variance + + if assemble_rhs: + return Matrix, rhs + else: + return Matrix + + def assembleC(self, x): + """ + Assemble the derivative of the forward problem with respect to the parameter + """ + trial = dl.TrialFunction(self.Vh[PARAMETER]) + test = dl.TestFunction(self.Vh[STATE]) + s = dl.Function(self.Vh[STATE], x[STATE]) + c = dl.Function(self.Vh[PARAMETER], x[PARAMETER]) + Cvarf = dl.inner(dl.exp(c) * trial * dl.nabla_grad(s), dl.nabla_grad(test)) * dl.dx + C = dl.assemble(Cvarf) +# print "||c||", x[PARAMETER].norm("l2"), "||s||", x[STATE].norm("l2"), "||C||", C.norm("linf") + self.bc0.zero(C) + return C + + def assembleWau(self, x): + """ + Assemble the derivative of the parameter equation with respect to the state + """ + trial = dl.TrialFunction(self.Vh[STATE]) + test = dl.TestFunction(self.Vh[PARAMETER]) + a = dl.Function(self.Vh[ADJOINT], x[ADJOINT]) + c = dl.Function(self.Vh[PARAMETER], x[PARAMETER]) + varf = dl.inner(dl.exp(c)*dl.nabla_grad(trial),dl.nabla_grad(a))*test*dl.dx + Wau = dl.assemble(varf) + dummy = dl.Vector() + Wau.init_vector(dummy,0) + self.bc0.zero_columns(Wau, dummy) + return Wau + + def assembleRaa(self, x): + """ + Assemble the derivative of the parameter equation with respect to the parameter (Newton method) + """ + trial = dl.TrialFunction(self.Vh[PARAMETER]) + test = dl.TestFunction(self.Vh[PARAMETER]) + s = dl.Function(self.Vh[STATE], x[STATE]) + c = dl.Function(self.Vh[PARAMETER], x[PARAMETER]) + a = dl.Function(self.Vh[ADJOINT], x[ADJOINT]) + varf = dl.inner(dl.nabla_grad(a),dl.exp(c)*dl.nabla_grad(s))*trial*test*dl.dx + return dl.assemble(varf) + + + def cost(self, x): + """ + Given the list x = [u,a,p] which describes the state, parameter, and + adjoint variable compute the cost functional as the sum of + the misfit functional and the regularization functional. + + Return the list [cost functional, regularization functional, misfit functional] + + Note: p is not needed to compute the cost functional + """ + assert x[STATE] != None + + diff = self.B*x[STATE] + diff -= self.u_o + misfit = (.5/self.noise_variance) * diff.inner(diff) + + Rdiff_x = dl.Vector() + self.prior.init_vector(Rdiff_x,0) + diff_x = x[PARAMETER] - self.prior.mean + self.prior.R.mult(diff_x, Rdiff_x) + reg = .5 * diff_x.inner(Rdiff_x) + + c = misfit + reg + + return c, reg, misfit + + def solveFwd(self, out, x, tol=1e-9): + """ + Solve the forward problem. + """ + A, b = self.assembleA(x, assemble_rhs = True) + A.init_vector(out, 1) + solver = dl.PETScKrylovSolver("cg", amg_method()) + solver.parameters["relative_tolerance"] = tol + solver.set_operator(A) + nit = solver.solve(out,b) + +# print "FWD", (self.A*out - b).norm("l2")/b.norm("l2"), nit + + + def solveAdj(self, out, x, tol=1e-9): + """ + Solve the adjoint problem. + """ + At, badj = self.assembleA(x, assemble_adjoint = True,assemble_rhs = True) + At.init_vector(out, 1) + + solver = dl.PETScKrylovSolver("cg", amg_method()) + solver.parameters["relative_tolerance"] = tol + solver.set_operator(At) + nit = solver.solve(out,badj) + +# print "ADJ", (self.At*out - badj).norm("l2")/badj.norm("l2"), nit + + def evalGradientParameter(self,x, mg): + """ + Evaluate the gradient for the variation parameter equation at the point x=[u,a,p]. + Parameters: + - x = [u,a,p] the point at which to evaluate the gradient. + - mg the variational gradient (g, atest) being atest a test function in the parameter space + (Output parameter) + + Returns the norm of the gradient in the correct inner product g_norm = sqrt(g,g) + """ + C = self.assembleC(x) + + self.prior.init_vector(mg,0) + C.transpmult(x[ADJOINT], mg) + Rdx = dl.Vector() + self.prior.init_vector(Rdx,0) + dx = x[PARAMETER] - self.prior.mean + self.prior.R.mult(dx, Rdx) + mg.axpy(1., Rdx) + + g = dl.Vector() + self.prior.init_vector(g,1) + + self.prior.Msolver.solve(g, mg) + g_norm = dl.sqrt( g.inner(mg) ) + + return g_norm + + + def setPointForHessianEvaluations(self, x): + """ + Specify the point x = [u,a,p] at which the Hessian operator (or the Gauss-Newton approximation) + need to be evaluated. + """ + self.A = self.assembleA(x) + self.At = self.assembleA(x, assemble_adjoint=True ) + self.C = self.assembleC(x) + self.Wau = self.assembleWau(x) + self.Raa = self.assembleRaa(x) + + + def solveFwdIncremental(self, sol, rhs, tol): + """ + Solve the incremental forward problem for a given rhs + """ + solver = dl.PETScKrylovSolver("cg", amg_method()) + solver.set_operator(self.A) + solver.parameters["relative_tolerance"] = tol + self.A.init_vector(sol,1) + nit = solver.solve(sol,rhs) +# print "FwdInc", (self.A*sol-rhs).norm("l2")/rhs.norm("l2"), nit + + def solveAdjIncremental(self, sol, rhs, tol): + """ + Solve the incremental adjoint problem for a given rhs + """ + solver = dl.PETScKrylovSolver("cg", amg_method()) + solver.set_operator(self.At) + solver.parameters["relative_tolerance"] = tol + self.At.init_vector(sol,1) + nit = solver.solve(sol, rhs) +# print "AdjInc", (self.At*sol-rhs).norm("l2")/rhs.norm("l2"), nit + + def applyC(self, da, out): + self.C.mult(da,out) + + def applyCt(self, dp, out): + self.C.transpmult(dp,out) + + def applyWuu(self, du, out, gn_approx=False): + help = dl.Vector() + self.B.init_vector(help, 0) + self.B.mult(du, help) + self.B.transpmult(help, out) + out *= 1./self.noise_variance + + def applyWua(self, da, out): + self.Wau.transpmult(da,out) + + + def applyWau(self, du, out): + self.Wau.mult(du, out) + + def applyR(self, da, out): + self.prior.R.mult(da, out) + + def Rsolver(self): + return self.prior.Rsolver + + def applyRaa(self, da, out): + self.Raa.mult(da, out) + +if __name__ == "__main__": + dl.set_log_active(False) + sep = "\n"+"#"*80+"\n" + print sep, "Set up the mesh and finite element spaces", sep + ndim = 2 + nx = 64 + ny = 64 + mesh = dl.UnitSquareMesh(nx, ny) + Vh2 = dl.FunctionSpace(mesh, 'Lagrange', 2) + Vh1 = dl.FunctionSpace(mesh, 'Lagrange', 1) + Vh = [Vh2, Vh1, Vh2] + print "Number of dofs: STATE={0}, PARAMETER={1}, ADJOINT={2}".format(Vh[STATE].dim(), Vh[PARAMETER].dim(), Vh[ADJOINT].dim()) + + print sep, "Set up the location of observation, Prior Information, and model", sep + ntargets = 300 + np.random.seed(seed=1) + targets = np.random.uniform(0.1,0.9, [ntargets, ndim] ) + print "Number of observation points: {0}".format(ntargets) + + gamma = .1 + delta = .5 + + anis_diff = dl.Expression(code_AnisTensor2D) + anis_diff.theta0 = 2. + anis_diff.theta1 = .5 + anis_diff.alpha = math.pi/4 + atrue = true_model(Vh[PARAMETER], gamma, delta,anis_diff) + + locations = np.array([[0.1, 0.1], [0.1, 0.9], [.5,.5], [.9, .1], [.9, .9]]) + if 1: + pen = 1e1 + prior = MollifiedBiLaplacianPrior(Vh[PARAMETER], gamma, delta, locations, atrue, anis_diff, pen) + else: + pen = 1e4 + prior = ConstrainedBiLaplacianPrior(Vh[PARAMETER], gamma, delta, locations, atrue, anis_diff, pen) + + print "Prior regularization: (delta_x - gamma*Laplacian)^order: delta={0}, gamma={1}, order={2}".format(delta, gamma,2) + + model = Poisson(mesh, Vh, targets, prior) + + #Generate synthetic observations + utrue = model.generate_vector(STATE) + x = [utrue, atrue, None] + model.solveFwd(x[STATE], x, 1e-9) + model.B.mult(x[STATE], model.u_o) + rel_noise = 0.01 + MAX = model.u_o.norm("linf") + noise_std_dev = rel_noise * MAX + randn_perturb(model.u_o, noise_std_dev) + model.noise_variance = noise_std_dev*noise_std_dev + + print sep, "Test the gradient and the Hessian of the model", sep + a0 = dl.interpolate(dl.Expression("sin(x[0])"), Vh[PARAMETER]) + modelVerify(model, a0.vector(), 1e-12) + + print sep, "Find the MAP point", sep + a0 = prior.mean.copy() + solver = ReducedSpaceNewtonCG(model) + solver.parameters["rel_tolerance"] = 1e-9 + solver.parameters["abs_tolerance"] = 1e-12 + solver.parameters["max_iter"] = 25 + solver.parameters["inner_rel_tolerance"] = 1e-15 + solver.parameters["c_armijo"] = 1e-4 + solver.parameters["GN_iter"] = 5 + + x = solver.solve(a0) + + if solver.converged: + print "\nConverged in ", solver.it, " iterations." + else: + print "\nNot Converged" + + print "Termination reason: ", solver.termination_reasons[solver.reason] + print "Final gradient norm: ", solver.final_grad_norm + print "Final cost: ", solver.final_cost + + print sep, "Compute the low rank Gaussian Approximation of the posterior", sep + model.setPointForHessianEvaluations(x) + Hmisfit = ReducedHessian(model, solver.parameters["inner_rel_tolerance"], gauss_newton_approx=False, misfit_only=True) + k = 50 + p = 20 + print "Double Pass Algorithm. Requested eigenvectors: {0}; Oversampling {1}.".format(k,p) + Omega = np.random.randn(x[PARAMETER].array().shape[0], k+p) + #d, U = singlePassG(Hmisfit, model.R, model.Rsolver, Omega, k, check_Bortho=True, check_Aortho=True, check_residual=True) + d, U = doublePassG(Hmisfit, prior.R, prior.Rsolver, Omega, k, check_Bortho=False, check_Aortho=False, check_residual=False) + posterior = GaussianLRPosterior(prior, d, U) + posterior.mean = x[PARAMETER] + + post_tr, prior_tr, corr_tr = posterior.trace(method="Estimator", tol=1e-1, min_iter=20, max_iter=100) + print "Posterior trace {0:5e}; Prior trace {1:5e}; Correction trace {2:5e}".format(post_tr, prior_tr, corr_tr) + post_pw_variance, pr_pw_variance, corr_pw_variance = posterior.pointwise_variance("Exact") + + print sep, "Save State, Parameter, Adjoint, and observation in paraview", sep + xxname = ["State", "Parameter", "Adjoint"] + xx = [dl.Function(Vh[i], x[i], name=xxname[i]) for i in range(len(Vh))] + dl.File("results/poisson_state.pvd") << xx[STATE] + dl.File("results/poisson_state_true.pvd") << dl.Function(Vh[STATE], utrue, name = xxname[STATE]) + dl.File("results/poisson_parameter.pvd") << xx[PARAMETER] + dl.File("results/poisson_parameter_true.pvd") << dl.Function(Vh[PARAMETER], atrue, name = xxname[PARAMETER]) + dl.File("results/poisson_parameter_prmean.pvd") << dl.Function(Vh[PARAMETER], prior.mean, name = xxname[PARAMETER]) + dl.File("results/poisson_adjoint.pvd") << xx[ADJOINT] + + vtrue = compute_velocity(mesh, Vh, atrue, utrue) + dl.File("results/poisson_vel_true.pvd") << vtrue + v_map = compute_velocity(mesh, Vh, x[PARAMETER], x[STATE]) + dl.File("results/poisson_vel.pvd") << v_map + + exportPointwiseObservation(targets, model.u_o, "results/poisson_observation.vtp") + + fid = dl.File("results/pointwise_variance.pvd") + fid << dl.Function(Vh[PARAMETER], post_pw_variance, name="Posterior") + fid << dl.Function(Vh[PARAMETER], pr_pw_variance, name="Prior") + fid << dl.Function(Vh[PARAMETER], corr_pw_variance, name="Correction") + + + print sep, "Generate samples from Prior and Posterior\n","Export generalized Eigenpairs", sep + fid_prior = dl.File("samples/sample_prior.pvd") + fid_post = dl.File("samples/sample_post.pvd") + nsamples = 500 + noise = dl.Vector() + posterior.init_vector(noise,"noise") + noise_size = noise.array().shape[0] + s_prior = dl.Function(Vh[PARAMETER], name="sample_prior") + s_post = dl.Function(Vh[PARAMETER], name="sample_post") + for i in range(nsamples): + noise.set_local( np.random.randn( noise_size ) ) + posterior.sample(noise, s_prior.vector(), s_post.vector()) + fid_prior << s_prior + fid_post << s_post + + #Save eigenvalues for printing: + posterior.exportU(Vh[PARAMETER], "hmisfit/evect.pvd") + np.savetxt("hmisfit/eigevalues.dat", d) + + print sep, "Visualize results", sep + dl.plot(xx[STATE], title = xxname[STATE]) + dl.plot(dl.exp(xx[PARAMETER]), title = xxname[PARAMETER]) + dl.plot(xx[ADJOINT], title = xxname[ADJOINT]) + + plt.figure() + plt.plot(range(0,k), d, 'b*', range(0,k), np.ones(k), '-r') + plt.yscale('log') + + plt.show() + dl.interactive() + diff --git a/nb.py b/nb.py new file mode 100644 index 0000000..5d8390b --- /dev/null +++ b/nb.py @@ -0,0 +1,235 @@ +import matplotlib.pyplot as plt +import matplotlib.tri as tri +import matplotlib.colors as cls +import dolfin as dl +import numpy as np +from matplotlib import animation + +def mesh2triang(mesh): + xy = mesh.coordinates() + return tri.Triangulation(xy[:, 0], xy[:, 1], mesh.cells()) + +def mplot_cellfunction(cellfn): + C = cellfn.array() + tri = mesh2triang(cellfn.mesh()) + return plt.tripcolor(tri, facecolors=C) + +def mplot_function(f, vmin, vmax, logscale): + mesh = f.function_space().mesh() + if (mesh.geometry().dim() != 2): + raise AttributeError('Mesh must be 2D') + # DG0 cellwise function + if f.vector().size() == mesh.num_cells(): + C = f.vector().array() + if logscale: + return plt.tripcolor(mesh2triang(mesh), C, vmin=vmin, vmax=vmax, norm=cls.LogNorm() ) + else: + return plt.tripcolor(mesh2triang(mesh), C, vmin=vmin, vmax=vmax) + # Scalar function, interpolated to vertices + elif f.value_rank() == 0: + C = f.compute_vertex_values(mesh) + if logscale: + return plt.tripcolor(mesh2triang(mesh), C, vmin=vmin, vmax=vmax, norm=cls.LogNorm() ) + else: + return plt.tripcolor(mesh2triang(mesh), C, shading='gouraud', vmin=vmin, vmax=vmax) + # Vector function, interpolated to vertices + elif f.value_rank() == 1: + w0 = f.compute_vertex_values(mesh) + if (len(w0) != 2*mesh.num_vertices()): + raise AttributeError('Vector field must be 2D') + X = mesh.coordinates()[:, 0] + Y = mesh.coordinates()[:, 1] + U = w0[:mesh.num_vertices()] + V = w0[mesh.num_vertices():] + C = np.sqrt(U*U+V*V) + return plt.quiver(X,Y,U,V, C, units='x', headaxislength=7, headwidth=7, headlength=7, scale=4, pivot='middle') + +# Plot a generic dolfin object (if supported) +def plot(obj, colorbar=True, subplot_loc=None, mytitle=None, show_axis='off', vmin=None, vmax=None, logscale=False): + if subplot_loc is not None: + plt.subplot(subplot_loc) +# plt.gca().set_aspect('equal') + if isinstance(obj, dl.Function): + pp = mplot_function(obj, vmin, vmax, logscale) + elif isinstance(obj, dl.CellFunctionSizet): + pp = mplot_cellfunction(obj) + elif isinstance(obj, dl.CellFunctionDouble): + pp = mplot_cellfunction(obj) + elif isinstance(obj, dl.CellFunctionInt): + pp = mplot_cellfunction(obj) + elif isinstance(obj, dl.Mesh): + if (obj.geometry().dim() != 2): + raise AttributeError('Mesh must be 2D') + pp = plt.triplot(mesh2triang(obj), color='#808080') + colorbar = False + else: + raise AttributeError('Failed to plot %s'%type(obj)) + + plt.axis(show_axis) + + if colorbar: + plt.colorbar(pp, fraction=.1, pad=0.2) + else: + plt.gca().set_aspect('equal') + + if mytitle is not None: + plt.title(mytitle, fontsize=20) + + return pp + +def multi1_plot(objs, titles, same_colorbar=True, show_axis='off', logscale=False): + + vmin = None + vmax = None + if same_colorbar: + vmin = 1e30 + vmax = -1e30 + for f in objs: + if isinstance(f, dl.Function): + fmin = f.vector().min() + fmax = f.vector().max() + if fmin < vmin: + vmin = fmin + if fmax > vmax: + vmax = fmax + + nobj = len(objs) + if nobj == 1: + plt.figure(figsize=(7.5,5)) + subplot_loc = 110 + elif nobj == 2: + plt.figure(figsize=(15,5)) + subplot_loc = 120 + elif nobj == 3: + plt.figure(figsize=(18,4)) + subplot_loc = 130 + else: + raise AttributeError("Too many figures") + + for i in range(nobj): + plot(objs[i], colorbar=True, + subplot_loc=(subplot_loc+i+1), mytitle=titles[i], + show_axis='off', vmin=vmin, vmax=vmax, logscale=logscale) + + +def plot_pts(points, values, colorbar=True, subplot_loc=None, mytitle=None, show_axis='on', vmin=None, vmax=None, xlim=(0,1), ylim=(0,1)): + if subplot_loc is not None: + plt.subplot(subplot_loc) + + pp = plt.scatter(points[:,0], points[:,1], c=values.array(), marker=",", s=20, vmin=vmin, vmax=vmax) + + plt.axis(show_axis) + + if colorbar: + plt.colorbar(pp, fraction=.1, pad=0.2) + else: + plt.gca().set_aspect('equal') + + if mytitle is not None: + plt.title(mytitle, fontsize=20) + + if xlim is not None: + plt.xlim(xlim) + + if ylim is not None: + plt.ylim(ylim) + + return pp + + +def show_solution(Vh, ic, state, same_colorbar=True, colorbar=True, mytitle=None, show_axis='off', logscale=False, times=[0, .4, 1., 2., 3., 4.]): + state.store(ic, 0) + assert len(times) % 3 == 0 + nrows = len(times) / 3 + subplot_loc = nrows*100 + 30 + plt.figure(figsize=(18,4*nrows)) + + if mytitle is None: + title_stamp = "Time {0}s" + else: + title_stamp = mytitle + " at time {0}s" + + vmin = None + vmax = None + + if same_colorbar: + vmin = 1e30 + vmax = -1e30 + for s in state.data: + smax = s.max() + smin = s.min() + if smax > vmax: + vmax = smax + if smin < vmin: + vmin = smin + + counter=1 + myu = dl.Function(Vh) + for i in times: + try: + state.retrieve(myu.vector(),i) + except: + print "Invalid time: ", i + + plot(myu, subplot_loc=(subplot_loc+counter), mytitle=title_stamp.format(i), colorbar=colorbar, + logscale=logscale, show_axis=show_axis, vmin=vmin, vmax=vmax) + counter = counter+1 + + + +def animate(Vh, state, same_colorbar=True, colorbar=True, + subplot_loc=None, mytitle=None, show_axis='off', logscale=False): + + fig = plt.figure() + + vmin = None + vmax = None + + if same_colorbar: + vmin = 1e30 + vmax = -1e30 + for s in state.data: + smax = s.max() + smin = s.min() + if smax > vmax: + vmax = smax + if smin < vmin: + vmin = smin + + def my_animate(i): + time_stamp = "Time: {0:f} s" + obj = dl.Function(Vh, state.data[i]) + t = mytitle + time_stamp.format(state.times[i]) + plt.clf() + return plot(obj, colorbar=True, subplot_loc=None, mytitle=t, show_axis='off', vmin=vmin, vmax=vmax, logscale=False) + + return animation.FuncAnimation(fig, my_animate, np.arange(0, state.nsteps), blit=True) + +def coarsen_v(fun, nx = 16, ny = 16): + #mesh = dl.UnitSquareMesh(nx,ny) + mesh = dl.Mesh("ad_20.xml") + V_H = dl.VectorFunctionSpace(mesh, "CG", 1) + dl.parameters['allow_extrapolation'] = True + fun_H = dl.interpolate(fun, V_H) + dl.parameters['allow_extrapolation'] = False + return fun_H + +def plot_eigenvectors(Vh, U, mytitle, which = [0,1,2,5,10,15]): + assert len(which) % 3 == 0 + nrows = len(which) / 3 + subplot_loc = nrows*100 + 30 + plt.figure(figsize=(18,4*nrows)) + + title_stamp = mytitle + " {0}" + u = dl.Function(Vh) + counter=1 + for i in which: + assert i < U.shape[1] + Ui = U[:,i] + s = 1/np.linalg.norm(Ui, np.inf) + u.vector().set_local(s*Ui) + plot(u, subplot_loc=(subplot_loc+counter), mytitle=title_stamp.format(i), vmin=-1, vmax=1) + counter = counter+1 + + + \ No newline at end of file