Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Brute force method to find the optimal number of harmonics for WaveX, DMWaveX, and CMWaveX #1824

Open
wants to merge 26 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG-unreleased.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ the released changes.

## Unreleased
### Changed
- Replaced `pint.utils.find_optimal_nharms` by a more general function `pint.noise_analysis.find_optimal_nharms` which optimizes the `Nharms` for multiple noise components simultaneously.
- Updated the example `rednoise-fit-example.py`
### Added
### Fixed
### Removed
Expand Down
24 changes: 18 additions & 6 deletions docs/examples/rednoise-fit-example.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# %%
from pint import DMconst
from pint.models import get_model
from pint.noise_analysis import find_optimal_nharms
from pint.simulation import make_fake_toas_uniform
from pint.logging import setup as setup_log
from pint.fitter import WLSFitter
Expand Down Expand Up @@ -102,7 +103,11 @@
m1 = deepcopy(m)
m1.remove_component("PLRedNoise")

nharm_opt, d_aics = find_optimal_nharms(m1, t, "WaveX", 30)
aics, nharm_opt = find_optimal_nharms(
m1, t, include_components=["WaveX"], nharms_max=30
)
d_aics = aics - np.min(aics)
nharm_opt = nharm_opt[0]

print("Optimum no of harmonics = ", nharm_opt)

Expand All @@ -120,7 +125,7 @@
plt.ylabel("AIC - AIC$_\\min{} + 1$")
plt.legend()
plt.yscale("log")
# plt.savefig("sim3-aic.pdf")
plt.show()

# %%
# Now create a new model with the optimum number of harmonics
Expand Down Expand Up @@ -183,7 +188,7 @@
)
plt.axvline(fyr, color="black", ls="dotted")
plt.axhline(0, color="grey", ls="--")
plt.ylabel("Fourier coeffs ($\mu$s)")
plt.ylabel("Fourier coeffs ($\\mu$s)")
plt.xscale("log")
plt.legend(fontsize=8)

Expand All @@ -202,6 +207,7 @@
plt.xlabel("Frequency (Hz)")
plt.axvline(fyr, color="black", ls="dotted", label="1 yr$^{-1}$")
plt.legend()
plt.show()

# %% [markdown]
# Note the outlier in the 1 year^-1 bin. This is caused by the covariance with RA and DEC, which introduce a delay with the same frequency.
Expand Down Expand Up @@ -261,7 +267,12 @@

m2 = deepcopy(m1)

nharm_opt, d_aics = find_optimal_nharms(m2, t, "DMWaveX", 30)
aics, nharm_opt = find_optimal_nharms(
m2, t, include_components=["DMWaveX"], nharms_max=30
)
d_aics = aics - np.min(aics)
nharm_opt = nharm_opt[0]

print("Optimum no of harmonics = ", nharm_opt)

# %%
Expand All @@ -275,7 +286,7 @@
plt.ylabel("AIC - AIC$_\\min{} + 1$")
plt.legend()
plt.yscale("log")
# plt.savefig("sim3-aic.pdf")
plt.show()

# %%
# Now create a new model with the optimum number of
Expand Down Expand Up @@ -353,7 +364,7 @@
)
plt.axvline(fyr, color="black", ls="dotted")
plt.axhline(0, color="grey", ls="--")
plt.ylabel("Fourier coeffs ($\mu$s)")
plt.ylabel("Fourier coeffs ($\\mu$s)")
plt.xscale("log")
plt.legend(fontsize=8)

Expand All @@ -372,6 +383,7 @@
plt.xlabel("Frequency (Hz)")
plt.axvline(fyr, color="black", ls="dotted", label="1 yr$^{-1}$")
plt.legend()
plt.show()


# %% [markdown]
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ uncertainties
loguru
nestle>=0.2.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we discuss having a new requirement more broadly? If this is only needed for a subset of tasks, should it be optional?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm personally fine with adding new requirements if the requirement is pure python and doesn't have a bunch of its own new requirements. nestle seems like it is of that (good) type.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this was actually adding joblib?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In particular, I wonder about how using joblib compares to the use of concurrent.futures? I see some discussion online. It seems like we might want to stick with one library for that functionality.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will try concurrent.futures.

numdifftools
joblib
230 changes: 230 additions & 0 deletions src/pint/noise_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
from copy import deepcopy
from typing import List, Optional, Tuple
from itertools import product as cartesian_product

from joblib import Parallel, cpu_count, delayed
import numpy as np
from astropy import units as u

from pint.models.chromatic_model import ChromaticCM
from pint.models.dispersion_model import DispersionDM
from pint.models.phase_offset import PhaseOffset
from pint.models.timing_model import TimingModel
from pint.toa import TOAs
from pint.logging import setup as setup_log
from pint.utils import (
akaike_information_criterion,
cmwavex_setup,
dmwavex_setup,
wavex_setup,
)


def find_optimal_nharms(
model: TimingModel,
toas: TOAs,
include_components: List[str] = ["WaveX", "DMWaveX", "CMWaveX"],
nharms_max: int = 45,
chromatic_index: float = 4,
num_parallel_jobs: Optional[int] = None,
) -> Tuple[tuple, np.ndarray]:
"""Find the optimal number of harmonics for `WaveX`/`DMWaveX`/`CMWaveX` using the
Akaike Information Criterion.

This function runs a brute force search over a grid of harmonic numbers, from 0 to
`nharms_max`. This is executed in multiple processes using the `joblib` library the
number of processes is controlled through the `num_parallel_jobs` argument.

Please note that the execution time scales as `O(nharms_max**len(include_components))`,
which can quickly become large. Hence, if you are using large values of `nharms_max`, it
is recommended that this be run on a cluster with a large number of CPUs.

Parameters
----------
model: `pint.models.timing_model.TimingModel`
The timing model. Should not already contain `WaveX`/`DMWaveX` or `PLRedNoise`/`PLDMNoise`.
toas: `pint.toa.TOAs`
Input TOAs
component: list[str]
Component names; a non-empty sublist of ["WaveX", "DMWaveX", "CMWaveX"]
nharms_max: int, optional
Maximum number of harmonics (default is 45) for each component
chromatic_index: float
Chromatic index for `CMWaveX`
num_parallel_jobs: int, optional
Number of parallel processes. The default is the number of available CPU cores.

Returns
-------
aics: ndarray
Array of AIC values.
nharms_opt: tuple
Optimal numbers of harmonics
"""
assert len(set(include_components).intersection(set(model.components.keys()))) == 0
assert len(include_components) > 0

idxs = list(
cartesian_product(
*np.repeat([np.arange(nharms_max + 1)], len(include_components), axis=0)
)
)

if num_parallel_jobs is None:
num_parallel_jobs = cpu_count()

aics_flat = Parallel(n_jobs=num_parallel_jobs, verbose=13)(
delayed(
lambda ii: compute_aic(model, toas, include_components, ii, chromatic_index)
)(ii)
for ii in idxs
)

aics = np.reshape(aics_flat, [nharms_max + 1] * len(include_components))

assert np.isfinite(aics).all(), "Infs/NaNs found in AICs!"

return aics, np.unravel_index(np.argmin(aics), aics.shape)


def compute_aic(
model: TimingModel,
toas: TOAs,
include_components: List[str],
nharms: np.ndarray,
chromatic_index: float,
):
"""Given a pre-fit model and TOAs, add the `[CM|DM]WaveX` components to the model,
fit the model to the TOAs, and compute the Akaike Information criterion using the
post-fit timing model.

Parameters
----------
model: `pint.models.timing_model.TimingModel`
The pre-fit timing model. Should not already contain `WaveX`/`DMWaveX` or `PLRedNoise`/`PLDMNoise`.
toas: `pint.toa.TOAs`
Input TOAs
component: list[str]
Component names; a non-empty sublist of ["WaveX", "DMWaveX", "CMWaveX"]
nharms: ndarray
The number of harmonics for each component
chromatic_index: float
Chromatic index for `CMWaveX`

Returns
-------
aic: float
The AIC value.
"""
setup_log(level="WARNING")

model1 = prepare_model(
model, toas.get_Tspan(), include_components, nharms, chromatic_index
)

from pint.fitter import Fitter

# Downhill fitters don't work well here.
# TODO: Investigate this.
ftr = Fitter.auto(toas, model1, downhill=False)
ftr.fit_toas(maxiter=10)

return akaike_information_criterion(ftr.model, toas)


def prepare_model(
model: TimingModel,
Tspan: u.Quantity,
include_components: List[str],
nharms: np.ndarray,
chromatic_index: float,
):
"""Given a pre-fit model and TOAs, add the `[CM|DM]WaveX` components to the model. Also sets parameters like
`PHOFF` and `DM` and `CM` derivatives as free.

Parameters
----------
model: `pint.models.timing_model.TimingModel`
The pre-fit timing model. Should not already contain `WaveX`/`DMWaveX` or `PLRedNoise`/`PLDMNoise`.
Tspan: u.Quantity
The observation time span
component: list[str]
Component names; a non-empty sublist of ["WaveX", "DMWaveX", "CMWaveX"]
nharms: ndarray
The number of harmonics for each component
chromatic_index: float
Chromatic index for `CMWaveX`

Returns
-------
aic: float
The AIC value.
"""

model1 = deepcopy(model)

for comp in ["PLRedNoise", "PLDMNoise", "PLCMNoise"]:
if comp in model1.components:
model1.remove_component(comp)

if "PhaseOffset" not in model1.components:
model1.add_component(PhaseOffset())
model1.PHOFF.frozen = False

for jj, comp in enumerate(include_components):
if comp == "WaveX":
nharms_wx = nharms[jj]
if nharms_wx > 0:
wavex_setup(model1, Tspan, n_freqs=nharms_wx, freeze_params=False)
elif comp == "DMWaveX":
nharms_dwx = nharms[jj]
if nharms_dwx > 0:
if "DispersionDM" not in model1.components:
model1.add_component(DispersionDM())

model1["DM"].frozen = False

if model1["DM1"].quantity is None:
model1["DM1"].quantity = 0 * model1["DM1"].units
model1["DM1"].frozen = False

if "DM2" not in model1.params:
model1.components["DispersionDM"].add_param(
model["DM1"].new_param(2)
)
if model1["DM2"].quantity is None:
model1["DM2"].quantity = 0 * model1["DM2"].units
model1["DM2"].frozen = False

if model1["DMEPOCH"].quantity is None:
model1["DMEPOCH"].quantity = model1["PEPOCH"].quantity

dmwavex_setup(model1, Tspan, n_freqs=nharms_dwx, freeze_params=False)
elif comp == "CMWaveX":
nharms_cwx = nharms[jj]
if nharms_cwx > 0:
if "ChromaticCM" not in model1.components:
model1.add_component(ChromaticCM())
model1["TNCHROMIDX"].value = chromatic_index

model1["CM"].frozen = False
if model1["CM1"].quantity is None:
model1["CM1"].quantity = 0 * model1["CM1"].units
model1["CM1"].frozen = False

if "CM2" not in model1.params:
model1.components["ChromaticCM"].add_param(
model1["CM1"].new_param(2)
)
if model1["CM2"].quantity is None:
model1["CM2"].quantity = 0 * model1["CM2"].units
model1["CM2"].frozen = False

if model1["CMEPOCH"].quantity is None:
model1["CMEPOCH"].quantity = model1["PEPOCH"].quantity

cmwavex_setup(model1, Tspan, n_freqs=nharms_cwx, freeze_params=False)
else:
raise ValueError(f"Unsupported component {comp}.")

return model1
Loading
Loading