diff --git a/recipes/mcmc-sampling-emcee/env.yml b/recipes/mcmc-sampling-emcee/env.yml index c1b52a8..f711e97 100644 --- a/recipes/mcmc-sampling-emcee/env.yml +++ b/recipes/mcmc-sampling-emcee/env.yml @@ -6,9 +6,10 @@ channels: - conda-forge dependencies: - - gammapy=1.0.2 - - python=3.9 + - python=3.10 + - gammapy=1.2 - jupyter - - scipy<1.12 - - emcee=2.2.1 - - corner=2.1 + - scipy=1.8.0 + - emcee=3.1.4 + - corner=2.2.2 + - zeus-mcmc=2.5.4 diff --git a/recipes/mcmc-sampling-emcee/mcmc_sampling.ipynb b/recipes/mcmc-sampling-emcee/mcmc_sampling.ipynb index e49bcb3..78a49df 100644 --- a/recipes/mcmc-sampling-emcee/mcmc_sampling.ipynb +++ b/recipes/mcmc-sampling-emcee/mcmc_sampling.ipynb @@ -65,24 +65,22 @@ "import numpy as np\n", "import astropy.units as u\n", "from astropy.coordinates import SkyCoord\n", - "from gammapy.irf import load_cta_irfs\n", + "from gammapy.irf import load_irf_dict_from_file\n", "from gammapy.maps import WcsGeom, MapAxis\n", "from gammapy.modeling.models import (\n", " ExpCutoffPowerLawSpectralModel,\n", + " PowerLawSpectralModel,\n", " GaussianSpatialModel,\n", " SkyModel,\n", " Models,\n", " FoVBackgroundModel,\n", + " GaussianPrior,\n", + " UniformPrior,\n", ")\n", "from gammapy.datasets import MapDataset\n", "from gammapy.makers import MapDatasetMaker\n", "from gammapy.data import Observation\n", - "from sampling import (\n", - " run_mcmc,\n", - " par_to_model,\n", - " plot_corner,\n", - " plot_trace,\n", - ")\n", + "\n", "from gammapy.modeling import Fit" ] }, @@ -94,7 +92,8 @@ "source": [ "import logging\n", "\n", - "logging.basicConfig(level=logging.INFO)" + "logging.basicConfig(level=logging.INFO)\n", + "log = logging.getLogger(__name__)" ] }, { @@ -112,7 +111,7 @@ "metadata": {}, "outputs": [], "source": [ - "irfs = load_cta_irfs(\n", + "irfs = load_irf_dict_from_file(\n", " \"$GAMMAPY_DATA/cta-1dc/caldb/data/cta/1dc/bcf/South_z20_50h/irf_file.fits\"\n", ")\n", "\n", @@ -161,12 +160,15 @@ " lambda_=\"0.05 TeV-1\",\n", ")\n", "\n", + "\n", "sky_model_simu = SkyModel(\n", " spatial_model=spatial_model, spectral_model=spectral_model, name=\"source\"\n", ")\n", "\n", - "bkg_model = FoVBackgroundModel(dataset_name=\"dataset-mcmc\")\n", + "bkg_model = FoVBackgroundModel(dataset_name=dataset.name)\n", "models = Models([sky_model_simu, bkg_model])\n", + "models_true = models.copy() # comparison later between true and fitted values\n", + "\n", "print(models)" ] }, @@ -242,39 +244,41 @@ "metadata": {}, "outputs": [], "source": [ - "# Define the free parameters and min, max values\n", - "parameters = dataset.models.parameters\n", + "def init_model():\n", + "\n", + " # Define the free parameters and min, max values\n", + " parameters = dataset.models.parameters\n", "\n", - "parameters[\"sigma\"].frozen = True\n", - "parameters[\"lon_0\"].frozen = True\n", - "parameters[\"lat_0\"].frozen = True\n", - "parameters[\"amplitude\"].frozen = False\n", - "parameters[\"index\"].frozen = False\n", - "parameters[\"lambda_\"].frozen = False\n", + " # Setting the free/frozen parameters\n", + " parameters[\"norm\"].frozen = False\n", "\n", + " parameters[\"sigma\"].frozen = True\n", + " parameters[\"lon_0\"].frozen = True\n", + " parameters[\"lat_0\"].frozen = True\n", + " parameters[\"tilt\"].frozen = True\n", "\n", - "parameters[\"norm\"].frozen = True\n", - "parameters[\"tilt\"].frozen = True\n", + " # Setting the priors\n", + " parameters[\"index\"].prior = GaussianPrior(mu=2.0, sigma=0.5)\n", + " parameters[\"norm\"].prior = GaussianPrior(mu=1.0, sigma=0.1)\n", "\n", - "parameters[\"norm\"].min = 0.5\n", - "parameters[\"norm\"].max = 2\n", + " # For uniform priors, choose how strong you want the prior to be\n", + " weight = 10\n", "\n", - "parameters[\"index\"].min = 1\n", - "parameters[\"index\"].max = 5\n", - "parameters[\"lambda_\"].min = 1e-3\n", - "parameters[\"lambda_\"].max = 1\n", + " parameters[\"lambda_\"].prior = UniformPrior(min=1e-2, max=1, weight=weight)\n", + " parameters[\"amplitude\"].prior = UniformPrior(\n", + " min=3e-13, max=3e-11, weight=weight\n", + " )\n", + " parameters[\"sigma\"].prior = UniformPrior(min=0.01, max=0.5, weight=weight)\n", "\n", - "parameters[\"amplitude\"].min = 0.01 * parameters[\"amplitude\"].value\n", - "parameters[\"amplitude\"].max = 100 * parameters[\"amplitude\"].value\n", + " # Setting amplitude init values a bit offset to see evolution\n", + " # Here starting close to the real value\n", + " parameters[\"index\"].value = 1.5\n", + " parameters[\"amplitude\"].value = 5e-12\n", + " parameters[\"lambda_\"].value = 0.5\n", + " parameters[\"norm\"].value = 0.9\n", "\n", - "parameters[\"sigma\"].min = 0.05\n", - "parameters[\"sigma\"].max = 1\n", "\n", - "# Setting amplitude init values a bit offset to see evolution\n", - "# Here starting close to the real value\n", - "parameters[\"index\"].value = 2.0\n", - "parameters[\"amplitude\"].value = 3.2e-12\n", - "parameters[\"lambda_\"].value = 0.05\n", + "init_model()\n", "\n", "print(dataset.models)\n", "print(\"stat =\", dataset.stat_sum())" @@ -286,11 +290,115 @@ "metadata": {}, "outputs": [], "source": [ - "%%time\n", - "# Now let's define a function to init parameters and run the MCMC with emcee\n", + "def lnprob(pars, dataset):\n", + " \"\"\"\n", + " Estimate the likelihood of a model including prior on parameters.\n", + " Input :\n", + " pars : a list of parameters\n", + " dataset: a gammapy dataset\n", + " \"\"\"\n", + " # The MCMC sampler will evaluate the likelihood of the model given\n", + " # a set of parameters. We need to update the model parameters before\n", + " # evaluating the new likelihood value.\n", + " for value, parameter in zip(\n", + " pars, dataset.models.parameters.free_parameters\n", + " ):\n", + " parameter.value = value\n", + "\n", + " # dataset.stat_sum returns Cash statistics values that is minimized\n", + " # emcee will maximisise the LogLikelihood so we need -dataset.stat_sum\n", + " total_lnprob = (\n", + " -dataset.stat_sum()\n", + " ) # stat_sum now includes stat + stat_priors\n", + "\n", + " return total_lnprob" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import emcee\n", + "import logging\n", + "\n", + "\n", + "nwalkers = 8\n", + "nrun = 2000\n", + "\n", + "init_model()\n", + "\n", + "p0 = [free_par.value for free_par in dataset.models.parameters.free_parameters]\n", + "labels = [\n", + " free_par.name for free_par in dataset.models.parameters.free_parameters\n", + "]\n", + "ndim = len(p0)\n", + "\n", + "rng = np.random.default_rng(seed=42)\n", + "\n", + "randomize_walkers = rng.normal(1, 0.03, size=(nwalkers, len(p0)))\n", + "p0_walkers = (\n", + " np.tile(p0, [nwalkers, 1]) * randomize_walkers\n", + ") # init value for all walkers with slightly different values\n", + "\n", + "print(dataset.models.parameters.free_parameters[\"amplitude\"])\n", + "print(dataset.models.parameters.free_parameters[\"lambda_\"])\n", + "\n", + "print(\"Initial values for walkers are : \", p0_walkers)\n", + "\n", + "sampler = emcee.EnsembleSampler(\n", + " nwalkers,\n", + " ndim,\n", + " lnprob,\n", + " args=[dataset],\n", + ")\n", + "\n", + "log.info(f\"Free parameters: {labels}\")\n", + "log.info(f\"Starting emcee sampling: nwalkers={nwalkers}, nrun={nrun}\")\n", + "\n", + "\n", "# Depending on your number of walkers, Nrun and dimensionality, this can take a while (> minutes)\n", - "sampler = run_mcmc(dataset, nwalkers=6, nrun=150) # to speedup the notebook\n", - "# sampler=run_mcmc(dataset,nwalkers=12,nrun=1000) # more accurate contours" + "state = sampler.run_mcmc(\n", + " p0_walkers, nsteps=nrun, progress=True\n", + ") # to speedup the notebook\n", + "\n", + "samples1 = sampler.get_chain()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import zeus\n", + "\n", + "nwalkers = 8\n", + "nrun = 1000\n", + "\n", + "init_model()\n", + "\n", + "p0 = [free_par.value for free_par in dataset.models.parameters.free_parameters]\n", + "\n", + "# Use the same starting points for both methods\n", + "p0_walkers = (\n", + " np.tile(p0, [nwalkers, 1]) * randomize_walkers\n", + ") # init value for all walkers with slightly different values\n", + "\n", + "print(\"Initial values for walkers are : \", p0_walkers)\n", + "\n", + "\n", + "sampler2 = zeus.EnsembleSampler(nwalkers, ndim, lnprob, args=[dataset])\n", + "\n", + "log.info(f\"Free parameters: {labels}\")\n", + "log.info(f\"Starting Zeus MCMC sampling: nwalkers={nwalkers}, nrun={nrun}\")\n", + "\n", + "# Depending on your number of walkers, Nrun and dimensionality, this can take a while (> minutes)\n", + "state = sampler2.run_mcmc(p0_walkers, nsteps=nrun, progress=True)\n", + "samples2 = sampler2.get_chain()" ] }, { @@ -310,24 +418,63 @@ "cell_type": "code", "execution_count": null, "metadata": { - "scrolled": false + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "nbsphinx-thumbnail" + ] }, "outputs": [], "source": [ - "plot_trace(sampler, dataset)" + "fig, axes = plt.subplots(len(labels), sharex=True, figsize=(10, 7))\n", + "\n", + "for idx, ax in enumerate(axes):\n", + " ax.plot(samples1[:, :, idx], \"-k\", alpha=0.2) # emcee\n", + " ax.plot(samples2[:, :, idx], \"-b\", alpha=0.2) # Zeus MCMC\n", + " ax.set_ylabel(labels[idx])\n", + "\n", + "plt.xlabel(\"Nrun\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Comparison of both algorithms\n", + "\n", + "Note that the convergence is quite different between both MCMC algorithms. \n", + "`zeus-mcmc` was able to converge to a steady solution much faster than `emcee`. This means that you will burn less walkers steps and in the end you will have a better sampling of your posterior distributions. While `emcee` was faster to run you'll have to discard a larger fraction of the steps." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "tags": [ - "nbsphinx-thumbnail" - ] + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [], "source": [ - "plot_corner(sampler, dataset, nburn=50)" + "from corner import corner\n", + "\n", + "nburn1 = 800\n", + "nburn2 = 150\n", + "\n", + "print(\"Corner plot with emcee\")\n", + "s = samples1[nburn1:, :, :].reshape((-1, len(labels)))\n", + "corner(s, labels=labels, quantiles=[0.16, 0.5, 0.84], show_titles=True)\n", + "plt.show()\n", + "\n", + "print(\"Corner plot with Zeus MCMC\")\n", + "s = samples2[nburn2:, :, :].reshape((-1, len(labels)))\n", + "corner(s, labels=labels, quantiles=[0.16, 0.5, 0.84], show_titles=True)\n", + "plt.show()" ] }, { @@ -336,7 +483,9 @@ "source": [ "## Plot the model dispersion\n", "\n", - "Using the samples from the chain after the burn period, we can plot the different models compared to the truth model. To do this we need to the spectral models for each parameter state in the sample." + "Using the samples from the chain after the burn period, we can plot the different models compared to the truth model. \n", + "To do this we need to generate a spectral model for each parameter state in the sample. \n", + "The shaded area will represent the uncertainty band." ] }, { @@ -346,16 +495,22 @@ "outputs": [], "source": [ "emin, emax = [0.1, 100] * u.TeV\n", - "nburn = 50\n", + "nburn = 100\n", + "nmodel = 100 # number of samples to draw\n", + "\n", + "samples = samples2\n", "\n", "fig, ax = plt.subplots(1, 1, figsize=(12, 6))\n", "\n", - "for nwalk in range(0, 6):\n", - " for n in range(nburn, nburn + 100):\n", - " pars = sampler.chain[nwalk, n, :]\n", + "for nwalk in range(0, nwalkers):\n", + " for n in range(nburn, nburn + nmodel):\n", + " pars = samples[n, nwalk, :]\n", "\n", " # set model parameters\n", - " par_to_model(dataset, pars)\n", + " for i, free_par in enumerate(\n", + " dataset.models.parameters.free_parameters\n", + " ):\n", + " free_par.value = pars[i]\n", " spectral_model = dataset.models[\"source\"].spectral_model\n", "\n", " spectral_model.plot(\n", @@ -369,7 +524,8 @@ "\n", "sky_model_simu.spectral_model.plot(\n", " energy_bounds=(emin, emax), energy_power=2, ax=ax, color=\"red\"\n", - ");" + ")\n", + "plt.show()" ] }, { @@ -389,42 +545,60 @@ "outputs": [], "source": [ "# Here we plot the trace of one walker in a given parameter space\n", - "parx, pary = 0, 1\n", - "\n", - "plt.plot(sampler.chain[0, :, parx], sampler.chain[0, :, pary], \"ko\", ms=1)\n", - "plt.plot(\n", - " sampler.chain[0, :, parx],\n", - " sampler.chain[0, :, pary],\n", - " ls=\":\",\n", - " color=\"grey\",\n", - " alpha=0.5,\n", - ")\n", - "\n", - "plt.xlabel(\"Index\")\n", - "plt.ylabel(\"Amplitude\");" + "walkerid = 0\n", + "parx = 0\n", + "# Re-init the model to compare with the initial simulated parameters\n", + "\n", + "free_pars = dataset.models.parameters.free_parameters\n", + "names = free_pars.names\n", + "true_pars = models_true.parameters\n", + "\n", + "for i, name in enumerate(names):\n", + "\n", + " plt.plot(\n", + " samples1[:, walkerid, parx],\n", + " samples1[:, walkerid, i],\n", + " ls=\":\",\n", + " color=\"k\",\n", + " ms=1,\n", + " label=\"emcee\",\n", + " )\n", + " plt.plot(\n", + " samples2[:, walkerid, parx],\n", + " samples2[:, walkerid, i],\n", + " ls=\":\",\n", + " color=\"blue\",\n", + " ms=1,\n", + " alpha=0.5,\n", + " label=\"Zeus\",\n", + " )\n", + " plt.plot(\n", + " true_pars[parx].value,\n", + " true_pars[name].value,\n", + " \"+\",\n", + " color=\"red\",\n", + " markersize=15,\n", + " label=\"True value\",\n", + " )\n", + " plt.xlabel(names[parx])\n", + " plt.ylabel(name)\n", + " plt.legend()\n", + " plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## PeVatrons in CTA ?\n", - "\n", - "Now it's your turn to play with this MCMC notebook. For example to test the CTA performance to measure a cutoff at very high energies (100 TeV ?).\n", - "\n", - "After defining your Skymodel it can be as simple as this :" + "# PeVatrons in CTA ?" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# dataset = simulate_dataset(model, geom, pointing, irfs)\n", - "# sampler = run_mcmc(dataset)\n", - "# plot_trace(sampler, dataset)\n", - "# plot_corner(sampler, dataset, nburn=200)" + "Now it’s your turn to play with this MCMC notebook. For example test the CTA performance to measure a cutoff at very high energies (100 TeV ?).\n", + "\n" ] }, { @@ -451,7 +625,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "3.9.16" } }, "nbformat": 4, diff --git a/recipes/mcmc-sampling-emcee/sampling.py b/recipes/mcmc-sampling-emcee/sampling.py deleted file mode 100644 index 5729ef8..0000000 --- a/recipes/mcmc-sampling-emcee/sampling.py +++ /dev/null @@ -1,171 +0,0 @@ -# Licensed under a 3-clause BSD style license - see LICENSE.rst -"""MCMC sampling helper functions using ``emcee``.""" -import logging -import numpy as np - -__all__ = ["uniform_prior", "run_mcmc", "plot_trace", "plot_corner"] - -log = logging.getLogger(__name__) - - -# TODO: so far only works with a uniform prior on parameters -# as there is no way yet to enter min,mean,max in parameters for normal prior -# lnprob() uses a uniform prior. hard coded for now. - - -def uniform_prior(value, umin, umax): - """Uniform prior distribution.""" - if umin <= value <= umax: - return 0.0 - else: - return -np.inf - - -def normal_prior(value, mean, sigma): - """Normal prior distribution.""" - return -0.5 * (2 * np.pi * sigma) - (value - mean) ** 2 / (2.0 * sigma) - - -def par_to_model(dataset, pars): - """Update model in dataset with a list of free parameters factors""" - for i, p in enumerate(dataset.models.parameters.free_parameters): - p.factor = pars[i] - - -def ln_uniform_prior(dataset): - """LogLike associated with prior and data/model evaluation. - - Return probability of parameter values according to prior knowledge. - Parameter limits should be done here through uniform prior ditributions - """ - logprob = 0 - for par in dataset.models.parameters.free_parameters: - logprob += uniform_prior(par.value, par.min, par.max) - - return logprob - - -def lnprob(pars, dataset): - """Estimate the likelihood of a model including prior on parameters.""" - # Update model parameters factors inplace - for factor, par in zip(pars, dataset.models.parameters.free_parameters): - par.factor = factor - - lnprob_priors = ln_uniform_prior(dataset) - - # dataset.likelihood returns Cash statistics values - # emcee will maximisise the LogLikelihood so we need -dataset.likelihood - total_lnprob = -dataset.stat_sum() + lnprob_priors - - return total_lnprob - - -def run_mcmc(dataset, nwalkers=8, nrun=1000, threads=1): - """Run the MCMC sampler. - - Parameters - ---------- - dataset : `~gammapy.modeling.Dataset` - Dataset - nwalkers : int - Number of walkers - nrun : int - Number of steps each walker takes - threads : (optional) - Number of threads or processes to use - - Returns - ------- - sampler : `emcee.EnsembleSampler` - sampler object containing the trace of all walkers. - """ - import emcee - - dataset.models.parameters.autoscale() # Autoscale parameters - - # Initialize walkers in a ball of relative size 0.5% in all dimensions if the - # parameters have been fit, or to 10% otherwise - # Handle source position spread differently with a spread of 0.1° - # TODO: the spread of 0.5% below is valid if a pre-fit of the model has been obtained. - # currently the run_mcmc() doesn't know the status of previous fit. - p0var = [] - pars = [] - spread = 0.5 / 100 - spread_pos = 0.1 # in degrees - for par in dataset.models.parameters.free_parameters: - pars.append(par.factor) - if par.name in ["lon_0", "lat_0"]: - p0var.append(spread_pos / par.scale) - else: - p0var.append(spread * par.factor) - - ndim = len(pars) - p0 = emcee.utils.sample_ball(pars, p0var, nwalkers) - - labels = [] - for par in dataset.models.parameters.free_parameters: - labels.append(par.name) - if (par.min is np.nan) and (par.max is np.nan): - log.warning( - f"Missing prior for parameter: {par.name}.\nMCMC will likely fail!" - ) - - log.info(f"Free parameters: {labels}") - - sampler = emcee.EnsembleSampler( - nwalkers, ndim, lnprob, args=[dataset], threads=threads - ) - - log.info(f"Starting MCMC sampling: nwalkers={nwalkers}, nrun={nrun}") - for idx, result in enumerate(sampler.sample(p0, iterations=nrun)): - if idx % (nrun / 4) == 0: - log.info("{:5.0%}".format(idx / nrun)) - log.info("100% => sampling completed") - - return sampler - - -def plot_trace(sampler, dataset): - """ - Plot the trace of walkers for every steps - - Parameters - ---------- - sampler : `emcee.EnsembleSampler` - Sampler object containing the trace of all walkers - dataset : `~gammapy.modeling.Dataset` - Dataset - """ - import matplotlib.pyplot as plt - - labels = [par.name for par in dataset.models.parameters.free_parameters] - - fig, axes = plt.subplots(len(labels), sharex=True) - - for idx, ax in enumerate(axes): - ax.plot(sampler.chain[:, :, idx].T, "-k", alpha=0.2) - ax.set_ylabel(labels[idx]) - - plt.xlabel("Nrun") - plt.show() - - -def plot_corner(sampler, dataset, nburn=0): - """Corner plot for each parameter explored by the walkers. - - Parameters - ---------- - sampler : `emcee.EnsembleSampler` - Sampler object containing the trace of all walkers - dataset : `~gammapy.modeling.Dataset` - Dataset - nburn : int - Number of runs to discard, because considered part of the burn-in phase - """ - from corner import corner - - labels = [par.name for par in dataset.models.parameters.free_parameters] - - samples = sampler.chain[:, nburn:, :].reshape((-1, len(labels))) - - corner(samples, labels=labels, quantiles=[0.16, 0.5, 0.84], show_titles=True)