diff --git a/make_m33_tutorial_datasets.py b/make_m33_tutorial_datasets.py new file mode 100644 index 0000000..0f8ff2a --- /dev/null +++ b/make_m33_tutorial_datasets.py @@ -0,0 +1,47 @@ +# Create the smaller cutouts of the M33 CO and HI cubes to host and +# be available for download online for the tutorials. + + +import astropy.units as u +from spectral_cube import SpectralCube +from pathlib import Path + +# When running, change to the location of the data for `workingdir_co` and `workingdir_hi` +# e.g., +# outputdir = Path("/home/ekoch/ownCloud/code_development/radio_astro_tools/") +# workingdir_co = Path("/home/ekoch/ownCloud/Data/M33/ALMA/") +# workingdir_hi = Path("/home/ekoch/storage/M33/HI/17B-162/") + +# By default, the current directory is used. + +outputdir = Path(".") + +workingdir_co = Path(".") + +workingdir_hi = Path(".") + +# Cut region from the CO(2-1) data cube. + +cube = SpectralCube.read(workingdir_co / "Brick1Tile1_12CO21_0p7kms.image.pbcor_K.fits") + +y, x = 145, 340 + +size = 32 + +cube_cutout = cube[:, y-size:y+size, x-size:x+size] + +cube_cutout.write(outputdir / "M33_ALMA_ACA_12CO21.cutout.fits", overwrite=True) + +# Cutout a similar region from the HI B+C-config cube. + +hi_cube = SpectralCube.read(workingdir_hi / "M33_14B_17B_HI_contsub_width_1kms.image.pbcor.GBT_feathered.fits", use_dask=True) + +hi_cube_cutout = hi_cube.subcube(xlo=cube_cutout.longitude_extrema[1], + xhi=cube_cutout.longitude_extrema[0], + ylo=cube_cutout.latitude_extrema[0], + yhi=cube_cutout.latitude_extrema[1]) + +# Convert to K +hi_cube_cutout = hi_cube_cutout.to(u.K) + +hi_cube_cutout.write(outputdir / "M33_VLA_BCconfig_wGBT_HI.cutout.fits", overwrite=True) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1ea8154 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +numpy +scipy +matplotlib +astropy +aplpy +git+https://github.com/radio-astro-tools/spectral-cube/#egg=spectral-cube +git+https://github.com/radio-astro-tools/radio-beam/#egg=radio-beam +dask[complete] \ No newline at end of file diff --git a/spectral_fitting/fitting_with_spectralcube.ipynb b/spectral_fitting/fitting_with_spectralcube.ipynb new file mode 100644 index 0000000..14fada5 --- /dev/null +++ b/spectral_fitting/fitting_with_spectralcube.ipynb @@ -0,0 +1,585 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9-final" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "Python 3.6.9 64-bit ('casa_py36': conda)", + "display_name": "Python 3.6.9 64-bit ('casa_py36': conda)", + "metadata": { + "interpreter": { + "hash": "2d716b7c92d5d4166e3213fb09c67a7f0504f606473c52b1c1916f6fd984541f" + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fitting with spectral-cube and astropy #\n", + "\n", + "## Authors ##\n", + "[Eric Koch](https://github.com/e-koch), [Adam Ginsburg](https://github.com/keflavich), [Tom Robitaille](https://github.com/astrofrog)\n", + "\n", + "## Learning Goals ##\n", + "\n", + "* Fitting 1D models to spectra in a spectral-line data cube\n", + "* Fitting 2D models to spectral channels or 2D projections\n", + "* Enabling dask mode for large data cubes\n", + "\n", + "## Keywords ##\n", + "\n", + "radio astronomy, spectral-line data cubes, spectral fitting, spatial fitting\n", + "\n", + "## Summary ##\n", + "\n", + "This tutorial demonstrates methods to fit models to a `SpectralCube` object using [astropy.modeling](https://docs.astropy.org/en/stable/modeling/).\n", + "\n", + "## Requires ##\n", + "\n", + "```\n", + "pip install astropy spectral-cube dask aplpy\n", + "```\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "\n", + "import warnings\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import astropy.units as u" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fitting models to both spatial maps and spectra are amongst the most common operations involving spectral-line data cubes.\n", + "\n", + "In this tutorial, we explore how to fit models to a `SpectralCube` using `astropy.modeling`. To show this, we will use an ALMA mosaic observed with the ACA, or 7-m array. This mosaic is one component of an ALMA project to map CO(2-1) in M33 (Project ID 2019.1.01182.S).\n", + "\n", + "You can access the tutorial data cubes [here](https://zenodo.org/record/4050489) on Zenodo.\n", + "\n", + "To keep the data volume small, we will use a 10 MB cutout of the full mosaic for this tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from astropy.utils.data import download_file\n", + "\n", + "datafile = download_file(\n", + " 'https://zenodo.org/record/4021108/files/M33_Brick1Tile1_12CO21_0p7kms.image.pbcor_K.cutout.fits',\n", + " cache=True, show_progress=True)\n", + "\n", + "from spectral_cube import SpectralCube\n", + "\n", + "# Note that the datafile will not end in \".fits\". Because of that, we need to specify the format\n", + "# When the file name end in \"fits\", `format` will not need to be specified \n", + "cube = SpectralCube.read(datafile, format='fits')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The basic cube properties are:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cube" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that the unit of the spectral axis is in m/s from the FITS file. It will be convenient to instead use km/s, so we will convert the cube's spectral unit:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cube = cube.with_spectral_unit(u.km / u.s)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fitting a 1D spectral model ##\n", + "\n", + "This part demonstrates how 1D spectra can be fit to a `SpectralCube`. This spectral-line data cube is of CO(2-1) emission that primarily arises from giant molecular clouds (GMCs). In GMCs, we expect the CO(2-1) line shape to be set by random turbulent motions that produces a Gaussian line shape when observed on $\\sim40$ pc scales, like for these observations. Spectra can also be a combination of Gaussian line profiles if multiple sources are detected along a line-of-sight." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, we will extract a single spectrum with bright CO emission for this example. The pixel (y, x) = (145, 340) is a location with bright CO emission.\n", + "\n", + "We can extract this one spectrum from the cube and plot it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# One example to test this on to start\n", + "y, x = 32, 32\n", + "\n", + "spec = cube[:, y, x]\n", + "spec.quicklook()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This spectrum has a reasonably high signal-to-noise ratio and should provide a good example for spectral fitting. Note that spectra with a low peak signal-to-noise may require restrictions on the parameter space for the fit to converge.\n", + "\n", + "Based on the above spectrum, we can provide good initial parameter values to start the fit with." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from astropy.modeling import models, fitting\n", + "\n", + "# The 1D Gaussian model with initial guesses for parameters\n", + "g_init = models.Gaussian1D(amplitude=1.0 * u.K, mean=-210 * u.km / u.s, stddev=4. * u.km / u.s)\n", + "\n", + "# And fit with the Levenberg-Marquardt algorithm and least squares statistic.\n", + "fit_g = fitting.LevMarLSQFitter()\n", + "\n", + "# The initial model, spectral axis (in km/s) and spectrum are passed for the fit\n", + "g_fit = fit_g(g_init, spec.spectral_axis, spec)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The fit model parameters are:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g_fit" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These parameters all seem reasonable given the plotted spectrum above. To compare more closely, we can plot the spectrum and the fit model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "spec.quicklook()\n", + "plt.plot(spec.spectral_axis, g_fit(spec.spectral_axis))\n", + "plt.ylabel(\"Brightness Temperature (K)\")\n", + "plt.xlabel(\"Radio Velocity (km /s)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And the fit residuals:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(spec.spectral_axis, spec - g_fit(spec.spectral_axis))\n", + "plt.ylabel(\"Brightness Temperature (K)\")\n", + "plt.xlabel(\"Radio Velocity (km /s)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Some residuals remain, however, the fit is fairly good.\n", + "\n", + "One major feature of `astropy.modeling.models` is the built-in handling of units. The fitted model `g_fit` already has each parameter in useful physical units." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Exercise ###\n", + "\n", + "Can the above be example fit be improved with a more sophisticated model? Try the fit using multiple Gaussian components." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using dask ###\n", + "\n", + "When using a large spectral-line data cube, the [dask](https://dask.org/) library can be used in spectral-cube for operations instead of numpy. See the [spectral-cube documentation](https://spectral-cube.readthedocs.io/en/latest/dask.html) on dask integration for a thorough description.\n", + "\n", + "Most of the above code for fitting a spectrum will be unchanged. What will change is how the data cube is read in:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cube = SpectralCube.read(datafile, use_dask=True, format='fits')\n", + "cube = cube.with_spectral_unit(u.km / u.s)" + ] + }, + { + "source": [ + "Using dask is enabled with `use_dask=True`. This will return a [`DaskSpectralCube`](https://spectral-cube.readthedocs.io/en/latest/api/spectral_cube.DaskSpectralCube.html#spectral_cube.DaskSpectralCube):" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cube" + ] + }, + { + "source": [ + "Of note is the new information for the `chunk size` that is defined for a `DaskSpectralCube` but not the `SpectralCube` at the beginning of the tutorial. Dask will load the data in chunks, and the size of the chunks for this data cube are shown as the `chunk size`.\n", + "\n", + "For this tutorial, we are using a small cube and so there is only 1 chunk in the `DaskSpectralCube`. For larger cubes, however, there will be multiple chunks defined. The idea of these chunks is that the data can be loaded in parts to avoid reading the entire cube into memory at once." + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "Dask provides a task scheduler that can be used to parallelize operations. For this tutorial, we will run the fitting one a single-core, or the `synchronous` scheduler in dask. To set this for our SpectralCube, we use the `use_dask_scheduler` function:" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cube.use_dask_scheduler('synchronous')" + ] + }, + { + "source": [ + "See the [integration with dask](https://spectral-cube.readthedocs.io/en/latest/dask.html) documentation page for more information on setting different schedulers and enabling parallel operations.\n", + "\n", + "To track the progress of operations (particularly when operating on a large SpectralCube), dask provides a progress bar. To enable the progress bar in a notebook or terminal, use:" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dask.diagnostics import ProgressBar\n", + "\n", + "pbar = ProgressBar()\n", + "pbar.register()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fitting with the `DaskSpectralCube` looks similar to the example above:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "spec = cube[:, y, x]\n", + "\n", + "# The 1D Gaussian model with initial guesses for parameters\n", + "g_init = models.Gaussian1D(amplitude=1.0 * u.K, mean=-210 * u.km / u.s, stddev=4. * u.km / u.s)\n", + "\n", + "# And fit with the Levenberg-Marquardt algorithm and least squares statistic.\n", + "fit_g = fitting.LevMarLSQFitter()\n", + "\n", + "# The initial model, spectral axis (in km/s) and spectrum are passed for the fit\n", + "g_fit = fit_g(g_init, spec.spectral_axis, spec)\n", + "\n", + "spec.quicklook()\n", + "plt.plot(spec.spectral_axis, g_fit(spec.spectral_axis))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The result is identical, but we now see multiple progress bars printed. The bar is shown each time dask reads in or performs an operation. In this case, we're seeing dask read in the \"chunk\" of data that contains the spectrum we're fitting.\n", + "\n", + "Using dask in this case is meant to be a working demonstration. The true value of using `DaskSpectralCube` is when many spectra in the cube require fitting. Dask allows for two features here: (i) parallelize fitting spectra to speed up the operation, and (ii) reading in the data in chunks to avoid excessive memory usage, which is critical when working with data cubes larger than your machine's memory. See the [ADD LINK TO PARALLEL FITTING TUTORIAL]() on fitting all spectra in a cube using `DaskSpectralCube`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fitting a 2D spatial model ##\n", + "\n", + "Using the same CO data, we will next cover how to fit a 2D Gaussian to a source and a simplistic approach to identify peaks. To show this, we will make a 2D velocity-integrated CO intensity map, or the moment 0 map. See [ADD LINK TO MOMENTS TUTORIAL]() on moment maps and how to make them.\n", + "\n", + "For this example, we will focus on the same giant molecular cloud (GMC) as the 1D spectrum above.\n", + "\n", + "To make an optimal moment 0 map, we should mask noisy regions of the data to highlight the signal. Here we will use a simple minimum threshold and integrate over the velocities corresponding to the signal in the 1D spectrum above ($\\sim-230$ to $-200$ km/s):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Mask noisy values\n", + "masked_cube = cube.with_mask(cube > 0.12 * u.K)\n", + "\n", + "# Only integrate over channels with signal\n", + "masked_cube_slab = masked_cube.spectral_slab(-230 * u.km / u.s, -190 * u.km / u.s)\n", + "\n", + "moment0 = masked_cube_slab.moment0()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These sources are giant molecular clouds (GMCs) that appear \"blob-like\" at this resolution. A 2D Gaussian should be a reasonable model for each GMC. Note that this shape is primarily the \"beam,\" the resolution element for these observations taken with the ALMA's (Morita) Compact Array.\n", + "\n", + "For this example, we will focus on fitting a model to a single GMC; the same GMC where we fit the 1D spectrum above. To fit the model, we will slice out a small regions centered on the cloud:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "y, x = 32, 32\n", + "size = 13\n", + "\n", + "moment0_cutout = moment0[y-size:y+size, x-size:x+size]\n", + "moment0_cutout.quicklook(use_aplpy=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Most of the map is a single GMC (\"blob\") with some emission from a neighbouring GMC at the north edge. This cut-out should be well-suited to fit a 2D Gaussian to.\n", + "\n", + "The model to fit is defined below. To help the fit converge to the expected values, we will approximate the starting parameter values based on the image above.\n", + "\n", + "We can also fit directly to the RA, Dec values since the modeling handles the units (and we're looking at a very small area so the sky curvature does not matter in this case). The Dec and RA values are returned for each pixel using `spatial_coordinate_map` (in order of Dec then RA)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from astropy.modeling import models, fitting\n", + "\n", + "\n", + "# Define the spatial grid for the fit centered at y, x = 32, 32\n", + "yy, xx = moment0_cutout.spatial_coordinate_map\n", + "\n", + "# Define a single 2D Gaussian model.\n", + "p_init_gauss2D = models.Gaussian2D(x_mean=xx[size, size], y_mean=yy[size, size],\n", + " amplitude=12 * u.K * u.km / u.s,\n", + " x_stddev=8 * u.arcsec, y_stddev=8 * u.arcsec)\n", + "\n", + "\n", + "# And fit with the Levenberg-Marquardt algorithm and least squares statistic.\n", + "fit_p = fitting.LevMarLSQFitter()\n", + "\n", + "# TODO: should be able to use with_fill_value for projections\n", + "# fill value is NOT working for Projection.\n", + "# See https://github.com/radio-astro-tools/spectral-cube/pull/661\n", + "# mom0_sub.with_fill_value(fill_value=0.0).filled_data[:]\n", + "\n", + "# Set NaNs to 0 for the fit.\n", + "moment0_cutout_quant = moment0_cutout.quantity\n", + "# moment0_cutout_quant = moment0_cutout.value\n", + "moment0_cutout_quant[np.isnan(moment0_cutout_quant)] = 0.\n", + "\n", + "with warnings.catch_warnings():\n", + " # Ignore model linearity warning from the fitter\n", + " warnings.simplefilter('ignore')\n", + " p_gauss2D = fit_p(p_init_gauss2D, xx, yy, moment0_cutout_quant)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The fitted model is:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "p_gauss2D" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This corresponds to a spatial size of:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "print(f\"{p_gauss2D.x_stddev.quantity.to(u.arcsec)} by {p_gauss2D.y_stddev.quantity.to(u.arcsec)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The parameters are similar to the cutout image shown above: the peak is about 12 K km/s and the centre of the GMC is near the centre of the cutout.\n", + "\n", + "To visualize the quality of the fit, we can compare the data, model, and residuals:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(18, 6))\n", + "\n", + "plt.subplot(1, 3, 1)\n", + "plt.title(\"Image\", fontsize=18)\n", + "plt.imshow(moment0_cutout.value, origin='lower', cmap='inferno')\n", + "plt.colorbar()\n", + "plt.subplot(1, 3, 2)\n", + "plt.title(\"Model\", fontsize=18)\n", + "plt.imshow(p_gauss2D(xx, yy).value, origin='lower', cmap='inferno')\n", + "plt.colorbar()\n", + "plt.subplot(1, 3, 3)\n", + "plt.title(\"Residual\", fontsize=18)\n", + "plt.imshow(moment0_cutout.value - p_gauss2D(xx, yy).value, origin='lower', cmap='inferno', vmin=-0.7, vmax=0.7)\n", + "plt.colorbar()\n", + "\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The fit agrees fairly well with the data, with the largest residuals coming from the edge of the map where another GMC is located." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This example shows the simplest approach to fitting a 2D model based on a 2D spatial projection. A more sophisticated approach would take the telescope beam into account, as in this case, the beam accounts for most of the size we recover. This should be possible by using the [astropy.convolution.convolve_models](https://docs.astropy.org/en/stable/api/astropy.convolution.convolve_models.html#astropy.convolution.convolve_models) function and fixing the model parameters for the beam." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using Dask ###\n", + "\n", + "2D `Projection` and `Slice` objects do not currently have dask implementations." + ] + } + ] +} \ No newline at end of file diff --git a/spectral_fitting/requirements.txt b/spectral_fitting/requirements.txt new file mode 100644 index 0000000..1ea8154 --- /dev/null +++ b/spectral_fitting/requirements.txt @@ -0,0 +1,8 @@ +numpy +scipy +matplotlib +astropy +aplpy +git+https://github.com/radio-astro-tools/spectral-cube/#egg=spectral-cube +git+https://github.com/radio-astro-tools/radio-beam/#egg=radio-beam +dask[complete] \ No newline at end of file