diff --git a/07.2_Algorithms.ipynb b/07.2_Algorithms.ipynb new file mode 100644 index 0000000..8e69512 --- /dev/null +++ b/07.2_Algorithms.ipynb @@ -0,0 +1,813 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "%matplotlib inline\n", + "#plt.style.use('ggplot')\n", + "%precision 4\n", + "#np.set_printoptions(suppress=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Interpolation: \n", + "\n", + "`scipy.interpolate` is useful for fitting a function from experimental data and thus evaluating points where no measure exists. The module is based on the FITPACK Fortran subroutines." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# the actual measurements\n", + "measured_time = np.linspace(0, 1, 10)\n", + "noise = (np.random.random(10)*2 - 1) * 1e-1\n", + "measures = np.sin(2 * np.pi * measured_time) + noise\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.interpolate import interp1d\n", + "# linear interpolation\n", + "linear_interp = interp1d(measured_time, measures)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The kind of interpolation can be specified: linear is the default; *zero*, *slinear*, *quadratic* and *cubic* refer to a [spline](https://en.wikipedia.org/wiki/Spline) interpolation of zeroth, first, second or third order; *previous* and *next* simply return the previous or next value of the point." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# cubic interpolation\n", + "cubic_interp = interp1d(measured_time, measures, kind='cubic')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# denser interval to plot the interpolating function\n", + "interpolation_time = np.linspace(0, 1, 50)\n", + "\n", + "plt.figure(figsize=(6, 4))\n", + "plt.plot(measured_time, measures, 'o', ms=6, label='measures')\n", + "\n", + "linear_results = linear_interp(interpolation_time)\n", + "plt.plot(interpolation_time, linear_results, label='linear interp')\n", + "\n", + "cubic_results = cubic_interp(interpolation_time)\n", + "plt.plot(interpolation_time, cubic_results, label='cubic interp')\n", + "plt.legend()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# checking the punctual results\n", + "x=1/4.\n", + "print (\"sin(x):\",np.sin(2 * np.pi * x), \n", + " \"linear prediction:\",linear_interp(x),\n", + " \"cubic prediction:\", cubic_interp(x))\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Optimization and fit: `scipy.optimize`\n", + "\n", + "Optimization is the problem of finding a numerical solution to a minimization or equality.\n", + "\n", + "The `scipy.optimize` module provides algorithms for function minimization (scalar or multi-dimensional), curve fitting and root finding." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy import optimize" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Curve Fitting\n", + "\n", + "Let's take some data, sampled from a sine distribution and affected by random noise" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x_data = np.linspace(-5, 5, num=50)\n", + "y_data = 2.9 * np.sin(1.5 * x_data) + np.random.normal(size=50)\n", + "\n", + "import matplotlib.pyplot as plt\n", + "plt.figure(figsize=(6, 4))\n", + "plt.scatter(x_data, y_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we *known* that the data is sampled from a sine function and we just need to find its parameters, in this case amplitude and frequency, we can do that by least squares curve fitting. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def f(x, A, nu):\n", + " return A * np.sin(nu * x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# fit by means of curve_fit\n", + "# p0 are the guessed values for the parameters\n", + "params, params_covariance = optimize.curve_fit(f, x_data, y_data, p0=[2, 2])\n", + "print(params)\n", + "print (params_covariance)\n", + "\n", + "# get the full output\n", + "#optimize.curve_fit(f, x_data, y_data, p0=[2, 2], full_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As every fitting problem, this transaltes into a minimization problem: the function `f(x_data; params)-y_data` is minimized" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.scatter(x_data, y_data, label='Data')\n", + "plt.plot(x_data, test_func(x_data, params[0], params[1]),\n", + " label='Fitted function')\n", + "\n", + "plt.legend(loc='best')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Roots of scalar and functions\n", + "\n", + "Several methods are there for $f(x): {\\rm I\\!R} \\to {\\rm I\\!R}$, e.g. [Newton-Raphson](https://en.wikipedia.org/wiki/Newton%27s_method) or Brent (a safer version of the secant method, which combines a bracketing strategy with a parabolic approximation)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def f(x):\n", + " return x**3-3*x+1\n", + "\n", + "x = np.linspace(-3,3,100)\n", + "plt.axhline(0)\n", + "plt.plot(x, f(x),'r-');" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.optimize import brentq\n", + "brentq(f, -3, 0), brentq(f, 0, 1), brentq(f, 1,3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.optimize import newton\n", + "newton(f, -3), newton(f, 0), newton(f, 3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In case of $f(x): {\\rm I\\!R}^2 \\to {\\rm I\\!R}$, the `optimize.root` is available, also implementing several methods (MINPACK's hybrd being the default)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.optimize import root, fsolve\n", + "\n", + "def f(x):\n", + " return [x[1] - 3*x[0]*(x[0]+1)*(x[0]-1),\n", + " .25*x[0]**2 + x[1]**2 - 1]\n", + "\n", + "solutions = root(f, (0.5, 0.5))\n", + "print (solutions)\n", + "\n", + "print (f(solutions.x))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Minimization of a function\n", + "\n", + "The `minimize_scalar` function will find the minimum, and can also be told to search within given bounds. By default, it uses the Brent algorithm." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def f(x):\n", + " return x**4 + 3*(x-2)**3 - 15*(x)**2 + 1\n", + "\n", + "x = np.linspace(-8, 5, 100)\n", + "plt.plot(x, f(x));" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "optimize.minimize_scalar(f, method='Brent')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "optimize.minimize_scalar(f, method='bounded', bounds=[0, 6])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Local and global minima" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def f(x, offset):\n", + " return -np.sinc(x-offset)\n", + "\n", + "x = np.linspace(-20, 20, 100)\n", + "plt.plot(x, f(x, 5));" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# note how additional function arguments are passed in\n", + "solution = optimize.minimize_scalar(f, args=(5,))\n", + "solution" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Optimization results are packed accordingly to this [format](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html#scipy.optimize.OptimizeResult)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(x, f(x, 5))\n", + "plt.axvline(sol.x,color='b')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When the function oscillates a lot, often the global minimum is not found. To cope with that, several ranges needs to be explored" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lower = np.random.uniform(-20, 20, 100)\n", + "upper = lower + 1\n", + "solutions = [optimize.minimize_scalar(f, args=(5,), bracket=(l, u)) for (l, u) in zip(lower, upper)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "idx = np.argmin([solution.fun for solution in solutions])\n", + "solution = solutions[idx]\n", + "\n", + "plt.plot(x, f(x, 5))\n", + "plt.axvline(solution.x, color='b');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively a dedicated method is available" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.optimize import basinhopping\n", + "\n", + "x0 = 0\n", + "solution = basinhopping(f, x0, stepsize=1, minimizer_kwargs={'args': (5,)})\n", + "solution" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(x, f(x, 5))\n", + "plt.axvline(solution.x, color='b');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Minimization of f(x): {\\rm I\\!R}^n \\to {\\rm I\\!R}$ is a whole field of study per se. You should have seen it treated in details in other courses.\n", + "\n", + "# Statistics and random numbers: `scipy.stats`\n", + "\n", + "The module scipy.stats contains statistical tools and probabilistic descriptions of random processes. Random number generators for various random process can be found in numpy.random.\n", + "\n", + "Given observations of a random process, their histogram is an estimator of the random process’s PDF (probability density function):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Sample from a normal distribution using numpy's random number generator\n", + "samples = np.random.normal(size=10000)\n", + "\n", + "# Compute a histogram of the sample\n", + "bins = np.linspace(-5, 5, 60)\n", + "\n", + "# Compute the PDF on the bin centers from scipy distribution object\n", + "from scipy import stats\n", + "bin_centers = 0.5*(bins[1:] + bins[:-1])\n", + "pdf = stats.norm.pdf(bin_centers)\n", + "\n", + "from matplotlib import pyplot as plt\n", + "plt.figure(figsize=(6, 4))\n", + "_,_,_ = plt.hist(samples, bins, label=\"Histogram of samples\", density=True)\n", + "plt.plot(bin_centers, pdf, label=\"PDF\")\n", + "plt.legend()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we know that the random process belongs to a given family of random processes, such as normal processes, we can do a maximum-likelihood fit of the observations to estimate the parameters of the underlying distribution. Here we fit a normal process to the observed data:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mean, std = stats.norm.fit(samples)\n", + "print (mean, std) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Statistical tests\n", + "\n", + "A statistical test is a decision indicator. For instance, if we have two sets of observations, that we assume are generated from Gaussian processes, we can use a [T-test](https://en.wikipedia.org/wiki/Student%27s_t-test) to decide whether the means of two sets of observations are significantly different:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Generates 2 sets of observations\n", + "samples1 = np.random.normal(0, size=100)\n", + "samples2 = np.random.normal(0.1, size=100)\n", + "\n", + "# Compute a histogram of the sample\n", + "bins = np.linspace(-4, 4, 30)\n", + "histogram1, bins = np.histogram(samples1, bins=bins, density=True)\n", + "histogram2, bins = np.histogram(samples2, bins=bins, density=True)\n", + "\n", + "plt.figure(figsize=(6, 4))\n", + "plt.hist(samples1, bins=bins, density=True, label=\"Samples 1\")\n", + "plt.hist(samples2, bins=bins, density=True, label=\"Samples 2\")\n", + "plt.legend(loc='best')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "stats.ttest_ind(samples1, samples2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# most of the test statistics are available \n", + "stats.chisquare(samples1,samples2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Numerical integration: `scipy.integrate`\n", + "\n", + "### Function integrals\n", + "\n", + "The most generic integration routine is `scipy.integrate.quad()`, which integrate from a to b (possibly infinite interval) using a technique from the Fortran library QUADPACK.\n", + "\n", + " Let's compute $\\int_0^{\\pi/2} \\sin{\\theta}d\\theta$:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.integrate import quad, quadrature\n", + "res, err = quad(np.sin, 0, np.pi/2)\n", + "\n", + "# An alternative is the gaussian quadrature\n", + "#res, err = quadrature(np.sin, 0, np.pi/2)\n", + "\n", + "print (res, err)\n", + "print (np.allclose(res, 1)) # res is the result, is should be close to 1\n", + "\n", + "print (np.allclose(err, 1 - res)) # err is an estimate of the err" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`scipy.integrate.nquad()` generalize to multiple integration variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.integrate import nquad\n", + "func = lambda x0,x1,x2,x3 : x0**2 + x1*x2 - x3**3 + np.sin(x0) + (\n", + " 1 if (x0-.2*x3-.5-.25*x1>0) else 0)\n", + "points = [[lambda x1,x2,x3 : 0.2*x3 + 0.5 + 0.25*x1], [], [], []]\n", + "def opts0(*args, **kwargs):\n", + " return {'points':[0.2*args[2] + 0.5 + 0.25*args[0]]} \n", + "\n", + "result, abserr, out = nquad(func, [[0,1], [-1,1], [.13,.8], [-.15,1]], opts=[opts0,{},{},{}], full_output=True)\n", + "\n", + "print (result, abserr)\n", + "print (out)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Integrating differential equations\n", + "\n", + "`scipy.integrate` also features routines for integrating Ordinary Differential Equations (ODE). In particular, `scipy.integrate.odeint()` solves ODE of the form:\n", + "\n", + "```python\n", + "dy/dt = rhs(y1, y2, .., t0,...)\n", + "```\n", + "\n", + "As an introduction, let us solve the ODE $\\frac{dy}{dt} = -2 y$ between $t = 0 \\dots 4$, with the initial condition $y(t=0) = 1$. First the function computing the derivative of the position needs to be defined:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def calc_derivative(ypos, time):\n", + " return -2 * ypos" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then, to compute y as a function of time:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.integrate import odeint\n", + "time_vec = np.linspace(0, 4, 40)\n", + "y = odeint(calc_derivative, y0=1, t=time_vec)\n", + "\n", + "\n", + "plt.figure(figsize=(4, 3))\n", + "plt.plot(time_vec, y)\n", + "plt.xlabel('t: Time')\n", + "plt.ylabel('y: Position')\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let us integrate a more complex ODE: a damped spring-mass oscillator. The position of a mass attached to a spring obeys the 2nd order ODE $y'' + 2 \\varepsilon \\omega_0 y' + \\omega_0^2 y = 0$ with $\\omega_0^2 = k/m$ with $k$ the spring constant, $m$ the mass and $\\varepsilon = c/(2 m \\omega_0)$ with $c$ the damping coefficient. We set:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mass = 0.5 # kg\n", + "kspring = 4 # N/m\n", + "cviscous = 0.4 # N s/m\n", + "\n", + "# and thus\n", + "eps = cviscous / (2 * mass * np.sqrt(kspring/mass))\n", + "omega = np.sqrt(kspring / mass)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For `odeint()`, the 2nd order equation needs to be transformed in a system of two first-order equations for the vector $Y = (y, y')$: the function computes the velocity and acceleration:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def calc_derivative(yvec, time, eps, omega):\n", + " return (yvec[1], -eps * omega * yvec[1] - omega **2 * yvec[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "time_vec = np.linspace(0, 10, 100)\n", + "yinit = (1, 0)\n", + "yarr = odeint(calc_derivative, yinit, time_vec, args=(eps, omega))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(4, 3))\n", + "plt.plot(time_vec, yarr[:, 0], label='y')\n", + "plt.plot(time_vec, yarr[:, 1], label=\"y'\")\n", + "plt.legend(loc='best')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fast Fourier Trasform\n", + "\n", + "The `scipy.fftpack` module computes fast Fourier transforms (FFTs) and offers utilities to handle them. The main functions are:\n", + "\n", + "* `scipy.fftpack.fft()` to compute the FFT\n", + "* `scipy.fftpack.fftfreq()` to generate the sampling frequencies\n", + "* `scipy.fftpack.ifft()` computes the inverse FFT, from frequency space to signal space\n", + "\n", + "Let's generate a noisy signal:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from scipy import fftpack\n", + "from matplotlib import pyplot as plt\n", + "\n", + "# Seed the random number generator\n", + "np.random.seed(1234)\n", + "\n", + "time_step = 0.02\n", + "period = 5.\n", + "\n", + "time_vec = np.arange(0, 20, time_step)\n", + "sig = (np.sin(2 * np.pi / period * time_vec)\n", + " + 0.5 * np.random.randn(time_vec.size))\n", + "\n", + "plt.figure(figsize=(6, 5))\n", + "plt.plot(time_vec, sig, label='Original signal')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's compute and plot the power spectrum" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The FFT of the signal\n", + "sig_fft = fftpack.fft(sig)\n", + "\n", + "# And the power (sig_fft is of complex dtype)\n", + "power = np.abs(sig_fft)\n", + "\n", + "# The corresponding frequencies\n", + "sample_freq = fftpack.fftfreq(sig.size, d=time_step)\n", + "\n", + "# Plot the FFT power\n", + "plt.figure(figsize=(6, 5))\n", + "plt.plot(sample_freq, power)\n", + "plt.xlabel('Frequency [Hz]')\n", + "plt.ylabel('plower')\n", + "\n", + "# Find the peak frequency: we can focus on only the positive frequencies\n", + "pos_mask = np.where(sample_freq > 0)\n", + "freqs = sample_freq[pos_mask]\n", + "peak_freq = freqs[power[pos_mask].argmax()]\n", + "\n", + "# Check that it does indeed correspond to the frequency that we generate\n", + "# the signal with\n", + "np.allclose(peak_freq, 1./period)\n", + "\n", + "# An inner plot to show the peak frequency\n", + "axes = plt.axes([0.55, 0.3, 0.3, 0.5])\n", + "plt.title('Peak frequency')\n", + "plt.plot(freqs[:8], power[:8])\n", + "plt.setp(axes, yticks=[])\n", + "\n", + "# scipy.signal.find_peaks_cwt can also be used for more advanced\n", + "# peak detection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_freq_fft = sig_fft.copy()\n", + "high_freq_fft[np.abs(sample_freq) > peak_freq] = 0\n", + "filtered_sig = fftpack.ifft(high_freq_fft)\n", + "\n", + "plt.figure(figsize=(6, 5))\n", + "plt.plot(time_vec, sig, label='Original signal')\n", + "plt.plot(time_vec, filtered_sig, linewidth=3, label='Filtered signal')\n", + "plt.xlabel('Time [s]')\n", + "plt.ylabel('Amplitude')\n", + "\n", + "plt.legend(loc='best')\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}