diff --git a/docs/Project.toml b/docs/Project.toml index a293e946..b088891d 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,11 +1,17 @@ [deps] +BSplineKit = "093aae92-e908-43d7-9660-e50ee39d5a0a" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" +DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2" +DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8" +Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" DynamicalSystemsBase = "6e36e845-645a-534a-86f2-f5d4aa5a06b4" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" +Loess = "4345ca2d-374a-55d4-8d30-97f9976e7612" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] -Documenter = "0.27" \ No newline at end of file +Documenter = "0.27" diff --git a/docs/make.jl b/docs/make.jl index a87ecf99..696b1066 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,23 +1,29 @@ cd(@__DIR__) using TransitionsInTimeseries, Statistics, StatsBase - using Literate # process examples and add then in a sidebar column example_files = readdir(joinpath(@__DIR__, "src", "examples")) +jl_indices = [f[end-2:end] == ".jl" for f in example_files] +jl_examples = example_files[jl_indices] + example_pages = String[] -for file in example_files +for file in jl_examples mkdownname = splitext(file)[1]*".md" Literate.markdown("src/examples/$(file)", "src/examples"; credit = false) push!(example_pages, "examples/$(mkdownname)") end +# Sort pages with increasing complexity rather than alphabetically +permute!(example_pages, [2, 1]) + pages = [ "index.md", "tutorial.md", - "api.md", "Examples" => example_pages, + "api.md", + "refs.md", ] import Downloads @@ -27,6 +33,9 @@ Downloads.download( ) include("build_docs_with_style.jl") +using DocumenterCitations +bib = CitationBibliography(joinpath(@__DIR__, "src", "refs.bib"); style=:authoryear) + build_docs_with_style(pages, TransitionsInTimeseries, Statistics, StatsBase; - authors = "Jan Swierczek-Jereczek , George Datseris " -) \ No newline at end of file + authors = "Jan Swierczek-Jereczek , "* + "George Datseris ", bib) \ No newline at end of file diff --git a/docs/src/api.md b/docs/src/api.md index 1705cb64..9f21bc08 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -3,9 +3,13 @@ ## Main analysis functions ```@docs -WindowedIndicatorConfig +IndicatorsChangesConfig +SlidingWindowConfig +SegmentedWindowConfig estimate_indicator_changes -WindowedIndicatorResults +WindowResults +SlidingWindowResults +SegmentWindowResults ``` ## Significance testing diff --git a/docs/src/examples/do-events.jl b/docs/src/examples/do-events.jl new file mode 100644 index 00000000..ff3974a9 --- /dev/null +++ b/docs/src/examples/do-events.jl @@ -0,0 +1,267 @@ +#= +# Dansgaard-Oescher events and Critical Slowing Down + +The $\delta^{18}O$ timeseries of the North Greenland Ice Core Project ([NGRIP](https://en.wikipedia.org/wiki/North_Greenland_Ice_Core_Project)) are, to this date, the best proxy record for the Dansgaard-Oeschger events ([DO-events](https://en.wikipedia.org/wiki/Dansgaard%E2%80%93Oeschger_event)). DO-events are sudden warming episodes of the North Atlantic, reaching 10 degrees of regional warming within 100 years. They happened quasi-periodically over the last glacial cycle due to transitions between strong and weak states of the Atlantic Meridional Overturning Circulation and might be therefore be the most prominent examples of abrupt transitions in the field of climate science. We here propose to hindcast these events by applying the theory of Critical Slowing Down (CSD) on the NGRIP data, which can be found [here](https://www.iceandclimate.nbi.ku.dk/data/) in its raw format. This analysis has already been done in [boers-early-warning-2018](@cite) and we here try to reproduce Figure 2.d-f. + +## Preprocessing NGRIP + +Data pre-processing is not part of TransitionsInTimeseries.jl, but a step the user has to do before using the package. To present an example with a complete scientific workflow, we will showcase typical data pre-processing here, that consist of the following steps: +1. Load the data, reverse and offset it to have time vector = time before 2000 AD. +2. Filter non-unique points in time and sort the data. +3. Regrid the data from uneven to even sampling. + +The time and $\delta^{18}O$ vectors resulting from the $i$-th preprocessing step are respectively called $t_i$ and $x_i$. The final step consists in obtaining a residual $r$, i.e. the fluctuations of the system around the attractor, which, within the CSD theory, is assumed to be tracked. Over this example, it will appear that the convenience of TransitionsInTimeseries.jl leads the bulk of the code to be written for plotting and preprocessing. + +### Step 1: +=# + +using DelimitedFiles, Downloads + +function load_ngrip() + tmp = Base.download("https://raw.githubusercontent.com/JuliaDynamics/JuliaDynamics/"* + "master/timeseries/NGRIP.csv") + data, labels = readdlm(tmp, header = true) + return reverse(data[:, 1]) .- 2000, reverse(data[:, 2]) # (time, delta-18-0) vectors +end + +t1, x1 = load_ngrip() + +#= +### Step 2 +=# + +uniqueidx(v) = unique(i -> v[i], eachindex(v)) +function keep_unique(t, x) + unique_idx = uniqueidx(t) + return t[unique_idx], x[unique_idx] +end + +function sort_timeseries!(t, x) + p = sortperm(t) + permute!(t, p) + permute!(x, p) + return nothing +end + +t2, x2 = keep_unique(t1, x1) +sort_timeseries!(t2, x2) + +#= +### Step 3 +=# + +using BSplineKit + +function regrid2evensampling(t, x, dt) + itp = BSplineKit.interpolate(t, x, BSplineOrder(4)) + tspan = (ceil(minimum(t)), floor(maximum(t))) + t_even = collect(tspan[1]:dt:tspan[2]) + x_even = itp.(t_even) + return t_even, x_even +end + +dt = 5.0 # dt = 5 yr as in (Boers 2018) +t3, x3 = regrid2evensampling(t2, x2, dt) + +#= +### Step 4 + +For the final step we drop the indices: +=# + +using DSP + +function chebyshev_filter(t, x, fcutoff) + ii = 10 # Chebyshev filtering requires to prune first points of timeseries. + responsetype = Highpass(fcutoff, fs = 1/dt) + designmethod = Chebyshev1(8, 0.05) + r = filt(digitalfilter(responsetype, designmethod), x) + xtrend = x - r + return t[ii:end], x[ii:end], xtrend[ii:end], r[ii:end] +end + +fcutoff = 0.95 * 0.01 # cutoff ≃ 0.01 yr^-1 as in (Boers 2018) +t, x, xtrend, r = chebyshev_filter(t3, x3, fcutoff) + +#= +Let's now visualize our data in what will become our main figure. For the segmentation of the DO-events, we rely on the tabulated data from [rasmussen-stratigraphic-2014](@cite) (which will soon be available as downloadable): +=# + +using CairoMakie, Loess + +function loess_filter(t, x; span = 0.005) + loessmodel = loess(t, x, span = span) + xtrend = Loess.predict(loessmodel, t) + r = x - xtrend + return t, x, xtrend, r +end + +function kyr_xticks(tticks_yr) + tticks_kyr = ["$t" for t in Int.(tticks_yr ./ 1e3)] + return (tticks_yr, tticks_kyr) +end + +function plot_do(traw, xraw, tfilt, xfilt, t, r, t_transitions, xlims, xticks) + fig = Figure(resolution = (1600, 1200), fontsize = 24) + + ## Original timeseries with transition marked by vertical lines + ax1 = Axis(fig[1, 1], xlabel = L"Time (kyr) $\,$", ylabel = L"$\delta^{18}$O (permil)", + xaxisposition = :top, xticks = xticks) + lines!(ax1, traw, xraw, color = (:gray70, 0.5)) + lines!(ax1, tfilt, xfilt, color = :gray10, linewidth = 3) + vlines!(ax1, t_transitions, color = Cycled(1), linewidth = 3) + + ## Residual timeseries + ax2 = Axis(fig[2, 1], ylabel = L"Residual $\,$", xticks = xticks, + xticksvisible = false, xticklabelsvisible = false) + lines!(ax2, t, r, color = :gray50, linewidth = 1) + + ## Axes for variance and AC1 timeseries + ax3 = Axis(fig[3, 1], ylabel = L"Variance $\,$", xticks = xticks, + xticksvisible = false, xticklabelsvisible = false) + ax4 = Axis(fig[4, 1], xlabel = L"Time (kyr) $\,$", ylabel = L"Lag-1 autocor. $\,$", + xticks = xticks) + + axs = [ax1, ax2, ax3, ax4] + [xlims!(ax, xlims) for ax in axs] + ylims!(axs[1], (-48, -34)) + rowgap!(fig.layout, 10) + return fig, axs +end + +xlims = (-60e3, -10e3) +xticks = kyr_xticks(-60e3:5e3:5e3) +t_rasmussen = -[-60000, 59440, 58280, 55800, 54220, 49280, 46860, 43340, 41460, 40160, + 38220, 35480, 33740, 32500, 28900, 27780, 23340, 14692, 11703] +tloess, _, xloess, rloess = loess_filter(t3, x3) # loess-filtered signal for visualization +fig, axs = plot_do(t3, x3, tloess, xloess, t, r, t_rasmussen, xlims, xticks) +fig + +#= +## Hindcast on NGRIP data + +As one can see... there is not much to see so far. Residuals are impossible to simply eye-ball and we therefore use TransitionsInTimeseries.jl to study the evolution, measured by the ridge-regression slope of the residual's variance and lag-1 autocorrelation (AC1) over time. In many examples of the literature, including [boers-early-warning-2018](@cite), the CSD analysis is performed over segments (sometimes only one) of the timeseries, such that a significance value is obtained for each segment. By using `SegmentedWindowConfig`, dealing with segments can be easily done in TransitionsInTimeseries.jl and is demonstrated here: +=# + +using TransitionsInTimeseries, StatsBase +using Random: Xoshiro + +ac1(x) = sum(autocor(x, [1])) # AC1 from StatsBase +indicators = (var, ac1) +change_metrics = RidgeRegressionSlope() +tseg_start = t_rasmussen[1:end-1] .+ 200 +tseg_end = t_rasmussen[2:end] .- 200 +config = SegmentedWindowConfig(indicators, change_metrics, + tseg_start, tseg_end; whichtime = last, width_ind = Int(200÷dt), + min_width_cha = 100) # require >=100 data points to estimate change metric +results = estimate_indicator_changes(config, r, t) +signif = SurrogatesSignificance(n = 10_000, tail = :right, rng = Xoshiro(1995)) +flags = significant_transitions(results, signif) + +#= +That's it! We can now visulaise our results with a generic function that we will re-use later: +=# + +function plot_segment_analysis!(axs, results, signif) + (; t_indicator, x_indicator) = results + for k in eachindex(t_indicator) # loop over the segments + for i in axes(signif.pvalues, 2) # loop over the indicators + if !isinf(signif.pvalues[k, i]) # plot if segment long enough + ## Plot indicator timeseries and its linear regression + ti, xi = t_indicator[k], x_indicator[k][:, i] + lines!(axs[i+2], ti, xi, color = Cycled(1)) + m, p = ridgematrix(ti, 0.0) * xi + if signif.pvalues[k, i] < 0.05 + lines!(axs[i+2], ti, m .* ti .+ p, color = :gray5, linewidth = 3) + else + lines!(axs[i+2], ti, m .* ti .+ p, color = :gray60, linewidth = 3) + end + end + end + end +end +plot_segment_analysis!(axs, results, signif) +fig + +#= +In [boers-early-warning-2018](@cite), 13/16 and 7/16 true positives are respectively found for the variance and AC1, with 16 referring to the total number of transitions. The timeseries actually includes 18 transition but, in [boers-early-warning-2018](@cite), some segments are considered too small to be analysed. In contrast, we here respectively find 9/16 true positives for the variance and 3/16 for AC1. We can track down the discrepancies to be in the surrogate testing, since the indicator timeseries computed here are almost exactly similar to those of [boers-early-warning-2018](@cite). This mismatch points out that packages like TransitionsInTimeseries.jl are wishful for research to be reproducible, especially since CSD is gaining attention - not only within the scientific community but also in popular media. + +## CSD: only a necessary condition, only in some cases + +For codimension-1 systems, approaching a fold, Hopf or transcritical bifurcation implies a widening of the potential $U$, which defines the deterministic term $f = -∇U$ of the SDE's right-hand-side. In the presence of noise, this leads to CSD, which is therefore a **necessary condition** for crossing one of these bifurcations - although it is not always assessable by analysing the timeseries due to practical limitations (e.g. sparse data subject to large measurement noise). It is nonetheless not given that DO-events, as many other real-life applications, can be seen as a codimension-1 fold, Hopf or transcritical bifurcations. Besides this, we emphasise that CSD is **not a sufficient condition** for assessing a transition being ahead in near future, since a resilience loss can happen without actually crossing any bifurcation. This can be illustrated on the present example by performing the same analysis only until few hundred years before the transition: +=# + +tseg_end = t_rasmussen[2:end] .- 700 # stop analysis 500 years earlier than before +config = SegmentedWindowConfig(indicators, change_metrics, + tseg_start, tseg_end, whichtime = last, width_ind = Int(200÷dt), + min_width_cha = 100) +results = estimate_indicator_changes(config, r, t) +signif = SurrogatesSignificance(n = 10_000, tail = :right, rng = Xoshiro(1995)) +flags = significant_transitions(results, signif) +fig, axs = plot_do(t3, x3, tloess, xloess, t, r, t_rasmussen, xlims, xticks) +plot_segment_analysis!(axs, results, signif) +fig + +#= +For the variance and AC1, we here respectively find 6 and 3 positives, although the transitions are still far ahead. This shows that what CSD captures is a potential widening induced by a shift of the forcing parameter rather than the actual transition. We therefore believe, as already suggested in some studies, that "resilience-loss indicators" is a more accurate name than "early-warning signals" when using CSD. + +We draw attention upon the fact that the $\delta^{18}O$ timeseries is noisy and sparsely re-sampled. Furthermore, interpolating over time introduces a potential bias in the statistics, even if performed on a coarse grid. The NGRIP data therefore represents an example that should be handled with care - as many others where CSD analysis has been applied on transitions in the field of geoscience. To contrast with this, we propose to perform the same analysis on synthethic DO data, obtained from an Earth Model of Intermediate Complexity (EMIC). + +!!! warning "Degrees of freedom" + These sources of error come along the usual problem of arbitrarily choosing (1) a filtering method, (2) windowing parameters and (3) appropriate metrics (for instance when the forcing noise is suspected to be correlated). This leads to a large number of degrees of freedom (DoF). Although sensible guesses are possible here, checking that results are robust w.r.t. the DoF should be a standard practice. + +!!! info "Future improvement" + Supporting the computations for uneven timeseries is a planned improvement of TransitionsInTimeseries.jl. This will avoid the need of regridding data on coarse grids and will prevent from introducing any bias. + +## Hindcasting simulated DO-events + +In CLIMBER-X, the EMIC described in [willeit-earth-2022](@cite), DO-like events can be triggered by forcing the North Atlantic with a (white noise) freshwater input. Simulated DO-like events present the big advantage of being evenly sampled in time and free of measurement noise. We run this analysis over two exemplary simulation outputs: +=# + +t_transitions = [[1, 1850, 2970, 3970, 5070, 5810, 7050, 8050], + [1, 3500, 4370, 5790, 7200, 8140]] +t_lb = [[300, 500, 300, 600, 300, 500, 500], [1800, 500, 1000, 900, 500]] +tseg_start = [t_transitions[1][1:end-1] + t_lb[1], t_transitions[2][1:end-1] + t_lb[2]] +tseg_end = [t_transitions[1][2:end] .- 50, t_transitions[2][2:end] .- 50] + +figvec = Figure[] + +for j in 1:2 + ## Download the data and perform loess filtering on it + tmp = Base.download("https://raw.githubusercontent.com/JuliaDynamics/JuliaDynamics/" * + "master/timeseries/climberx-do$(j)-omaxa.csv") + data = readdlm(tmp) + tcx, xcx = data[1, 1000:end], data[2, 1000:end] + t, x, xtrend, r = loess_filter(tcx, xcx, span = 0.02) + + ## Initialize figure + xlims = (0, last(tcx)) + xticks = kyr_xticks(xlims[1]:1e3:xlims[2]) + fig, axs = plot_do(tcx, xcx, t, xtrend, t, r, t_transitions[j], extrema(t), xticks) + ylims!(axs[1], (5, 40)) + axs[1].ylabel = L"Max. Atlantic overturning (Sv) $\,$" + + ## Run sliding analysis and update figure with results + dt = mean(diff(tcx)) + config = SegmentedWindowConfig( + indicators, change_metrics, tseg_start[j], tseg_end[j], + whichtime = last, width_ind = Int(200÷dt), min_width_cha = 100) + results = estimate_indicator_changes(config, r, t) + signif = SurrogatesSignificance(n = 1_000, tail = :right, rng = Xoshiro(1995)) + flags = significant_transitions(results, signif) + + plot_segment_analysis!(axs, results, signif) + vlines!(axs[1], t_transitions[j], color = Cycled(1), linewidth = 3) + push!(figvec, fig) +end +figvec[1] + +#= +It here appears that not all transitions are preceeded by a significant increase of variance and AC1, even in the case of clean and evenly sampled time series. Let's check another case: +=# + +figvec[2] + +#= +Same here! Although CLIMBER-X does not represent real DO-events, the above-performed analysis might be hinting at the fact that not all DO transitions can be forecasted with CSD. Nonetheless, performing a CSD analysis can inform on the evolution of a system's resilience. +=# \ No newline at end of file diff --git a/docs/src/examples/logistic.jl b/docs/src/examples/logistic.jl index bb629cc1..fa252a22 100644 --- a/docs/src/examples/logistic.jl +++ b/docs/src/examples/logistic.jl @@ -65,7 +65,7 @@ width_ind = N÷100 width_cha = 20 stride_cha = 10 -config = WindowedIndicatorConfig(indicators, metric; +config = SlidingWindowConfig(indicators, metric; width_ind, width_cha, stride_cha, ) diff --git a/docs/src/refs.bib b/docs/src/refs.bib new file mode 100644 index 00000000..8010a9d3 --- /dev/null +++ b/docs/src/refs.bib @@ -0,0 +1,54 @@ + +@article{willeit-earth-2022, + title = {The {Earth} system model {CLIMBER}-{X} v1.0 – {Part} 1: {Climate} model description and validation​​​​​​​​​​​​​​}, + volume = {15}, + issn = {1991-9603}, + shorttitle = {The {Earth} system model {CLIMBER}-{X} v1.0 – {Part} 1}, + url = {https://gmd.copernicus.org/articles/15/5905/2022/}, + doi = {10.5194/gmd-15-5905-2022}, + abstract = {The newly developed fast Earth system model CLIMBER-X is presented. The climate component of CLIMBER-X consists of a 2.5-D semi-empirical statistical–dynamical atmosphere model, a 3-D frictional–geostrophic ocean model, a dynamic–thermodynamic sea ice model and a land surface model. All the model components are discretized on a regular lat–long grid with a horizontal resolution of 5 ◦ × 5 ◦. The model has a throughput of ∼ 10 000 simulation years per day on a single node with 16 CPUs on a high-performance computer and is designed to simulate the evolution of the Earth system on temporal scales ranging from decades to {\textgreater} 100 000 years. A comprehensive evaluation of the model performance for the present day and the historical period shows that CLIMBER-X is capable of realistically reproducing many observed climate characteristics, with results that generally lie within the range of state-of-theart general circulation models. The analysis of model performance is complemented by a thorough assessment of climate feedbacks and model sensitivities to changes in external forcings and boundary conditions. Limitations and applicability of the model are critically discussed. CLIMBER-X also includes a detailed representation of the global carbon cycle and is coupled to an ice sheet model, which will be described in separate papers. CLIMBER-X is available as open-source code and is expected to be a useful tool for studying past climate changes and for the investigation of the long-term future evolution of the climate.}, + language = {en}, + number = {14}, + urldate = {2022-08-11}, + journal = {Geoscientific Model Development}, + author = {Willeit, Matteo and Ganopolski, Andrey and Robinson, Alexander and Edwards, Neil R.}, + month = jul, + year = {2022}, + pages = {5905--5948}, + file = {Willeit et al. - 2022 - The Earth system model CLIMBER-X v1.0 – Part 1 Cl.pdf:/home/jan/Zotero/storage/75NKS975/Willeit et al. - 2022 - The Earth system model CLIMBER-X v1.0 – Part 1 Cl.pdf:application/pdf}, +} + +@article{boers-early-warning-2018, + title = {Early-warning signals for {Dansgaard}-{Oeschger} events in a high-resolution ice core record}, + volume = {9}, + issn = {2041-1723}, + url = {http://www.nature.com/articles/s41467-018-04881-7}, + doi = {10.1038/s41467-018-04881-7}, + language = {en}, + number = {1}, + urldate = {2022-10-28}, + journal = {Nature Communications}, + author = {Boers, Niklas}, + month = dec, + year = {2018}, + pages = {2556}, + file = {Boers - 2018 - Early-warning signals for Dansgaard-Oeschger event.pdf:/home/jan/Zotero/storage/DS97Z9UC/Boers - 2018 - Early-warning signals for Dansgaard-Oeschger event.pdf:application/pdf}, +} + +@article{rasmussen-stratigraphic-2014, + title = {A stratigraphic framework for abrupt climatic changes during the {Last} {Glacial} period based on three synchronized {Greenland} ice-core records: refining and extending the {INTIMATE} event stratigraphy}, + volume = {106}, + issn = {02773791}, + shorttitle = {A stratigraphic framework for abrupt climatic changes during the {Last} {Glacial} period based on three synchronized {Greenland} ice-core records}, + url = {https://linkinghub.elsevier.com/retrieve/pii/S0277379114003485}, + doi = {10.1016/j.quascirev.2014.09.007}, + abstract = {Due to their outstanding resolution and well-constrained chronologies, Greenland ice-core records provide a master record of past climatic changes throughout the Last InterglacialeGlacial cycle in the North Atlantic region. As part of the INTIMATE (INTegration of Ice-core, MArine and TErrestrial records) project, protocols have been proposed to ensure consistent and robust correlation between different records of past climate. A key element of these protocols has been the formal definition and ordinal numbering of the sequence of Greenland Stadials (GS) and Greenland Interstadials (GI) within the most recent glacial period. The GS and GI periods are the Greenland expressions of the characteristic DansgaardeOeschger events that represent cold and warm phases of the North Atlantic region, respectively. We present here a more detailed and extended GS/GI template for the whole of the Last Glacial period. It is based on a synchronization of the NGRIP, GRIP, and GISP2 ice-core records that allows the parallel analysis of all three records on a common time scale. The boundaries of the GS and GI periods are defined based on a combination of stable-oxygen isotope ratios of the ice (d18O, reflecting mainly local temperature) and calcium ion concentrations (reflecting mainly atmospheric dust loading) measured in the ice. The data not only resolve the well-known sequence of DansgaardeOeschger events that were first defined and numbered in the ice-core records more than two decades ago, but also better resolve a number of short-lived climatic oscillations, some defined here for the first time. Using this revised scheme, we propose a consistent approach for discriminating and naming all the significant abrupt climatic events of the Last Glacial period that are represented in the Greenland ice records. The final product constitutes an extended and better resolved Greenland stratotype sequence, against which other proxy records can be compared and correlated. It also provides a more secure basis for investigating the dynamics and fundamental causes of these climatic perturbations.}, + language = {en}, + urldate = {2023-10-04}, + journal = {Quaternary Science Reviews}, + author = {Rasmussen, Sune O. and Bigler, Matthias and Blockley, Simon P. and Blunier, Thomas and Buchardt, Susanne L. and Clausen, Henrik B. and Cvijanovic, Ivana and Dahl-Jensen, Dorthe and Johnsen, Sigfus J. and Fischer, Hubertus and Gkinis, Vasileios and Guillevic, Myriam and Hoek, Wim Z. and Lowe, J. John and Pedro, Joel B. and Popp, Trevor and Seierstad, Inger K. and Steffensen, Jørgen Peder and Svensson, Anders M. and Vallelonga, Paul and Vinther, Bo M. and Walker, Mike J.C. and Wheatley, Joe J. and Winstrup, Mai}, + month = dec, + year = {2014}, + pages = {14--28}, + file = {Rasmussen et al. - 2014 - A stratigraphic framework for abrupt climatic chan.pdf:/home/jan/Zotero/storage/VSUTVWPN/Rasmussen et al. - 2014 - A stratigraphic framework for abrupt climatic chan.pdf:application/pdf}, +} diff --git a/docs/src/refs.md b/docs/src/refs.md new file mode 100644 index 00000000..4b476771 --- /dev/null +++ b/docs/src/refs.md @@ -0,0 +1,4 @@ +# References + +```@bibliography +``` \ No newline at end of file diff --git a/docs/src/tutorial.jl b/docs/src/tutorial.jl new file mode 100644 index 00000000..3f135673 --- /dev/null +++ b/docs/src/tutorial.jl @@ -0,0 +1,263 @@ +#= + +# Tutorial + +## [Workflow] (@id workflow) + +Computing transition indicators consists of the following steps: + +1. Doing any preprocessing of raw data first, such as detrending (_not part of TransitionsInTimeseries.jl_). This yields the **input timeseries**. +2. Estimating the timeseries of an indicator by sliding a window over the input timeseries. +3. Computing the changes of the indicator by sliding a window over its timeseries. +4. Generating many surrogates that preserve important statistical properties of the original timeseries. +5. Performing step 2 and 3 for the surrogate timeseries. +6. Checking whether the indicator change timeseries of the real timeseries shows a significant feature (trend, jump or anything else) when compared to the surrogate data. + +These steps are illustrated one by one in the tutorial below, and then summarized in the convenient API that TransitionsInTimeseries.jl exports. + +## [Tutorial -- Educational] (@id example_stepbystep) + +### Raw input data + +Let us load data from a bistable nonlinear model subject to noise and to a gradual change of the forcing that leads to a transition. Furthermore, we also load data from a linear model, which is by definition monostable and therefore incapable of transitioning. This is done to control the rate of false positives, a common problem that can emerge when looking for transition indicators. The models are governed by: + +```math +\dfrac{\mathrm{d}x_{l}}{\mathrm{d}t} = - x_{l} - 1 + f(t) + n(t) \\ +\dfrac{\mathrm{d}x_{nl}}{\mathrm{d}t} = - x_{nl}^3 + x_{nl} + f(t) + n(t) +``` + +with $x_{l}$ the state of the linear model, $x_{nl}$ the state of the bistable model, $f$ the forcing and $n$ the noise. For $f=0$ they both display an equilibrium point at $x=-1$. However, the bistable model also displays a further equilibrium point at $x=1$. Loading (and visualizing with [Makie](https://docs.makie.org/stable/)) such prototypical data to test some indicators can be done by simply running: +=# + +using TransitionsInTimeseries, CairoMakie + +t, x_linear, x_nlinear = load_linear_vs_doublewell() +fig, ax = lines(t, x_linear) +lines!(ax, t, x_nlinear) +ax.title = "raw data" +fig + +#= +### Preprocessing + +!!! note "Not part of TransitionsInTimeseries.jl" + Any timeseries preprocessing, such as the de-trending step we do here, + is not part of TransitionsInTimeseries.jl and is the responsibility of the researcher. + + +The nonlinear system clearly displays a transition between two stability regimes. To forecast such transition, we analyze the fluctuations of the timeseries around the attractor, assumed to be tracked. Therefore, a detrending step is needed - here simply obtained by building the difference of the timeseries with lag 1. +=# + +x_l_fluct = diff(x_linear) +x_nl_fluct = diff(x_nlinear) +tfluct = t[2:end] + +fig, ax = lines(tfluct, x_l_fluct) +lines!(ax, tfluct, x_nl_fluct .+ 0.05) +ax.title = "input timeseries" +fig + +#= +At this point, `x_l_fluct` and `x_nl_fluct` are considered the **input timeseries**. + +!!! info "Detrending in Julia" + Detrending can be performed in many ways. A wide range of Julia packages exists to perform smoothing such as [Loess.jl](https://github.com/JuliaStats/Loess.jl) or [DSP.jl](https://docs.juliadsp.org/latest/contents/). There the detrending step consists of subtracting the smoothed signal from the original one. + +### Indicator timeseries + +We can then compute the values of some "indicator" (a Julia function that inputs a timeseries and outputs a number). An indicator should be a quantity that is likely to change if a transition occurs, or is about to occur in the timeseries. We compute indicators by applying a sliding window over the **input timeseries**, determined by the width and the stride with which it is applied. Here we demonstrate this computation with the AR1-regression coefficient (under white-noise assumption), implemented as [`ar1_whitenoise`](@ref): +=# + +indicator = ar1_whitenoise +indicator_window = (width = 400, stride = 1) + +## By mapping `last::Function` over a windowviewer of the time vector, +## we obtain the last time step of each window. +## This therefore only uses information from `k-width+1` to `k` at time step `k`. +## Alternatives: `first::Function`, `midpoint:::Function`. +t_indicator = windowmap(last, tfluct; indicator_window...) +indicator_l = windowmap(indicator, x_l_fluct; indicator_window...) +indicator_nl = windowmap(indicator, x_nl_fluct; indicator_window...) + +fig, ax = lines(t_indicator, indicator_l) +lines!(ax, t_indicator, indicator_nl) +ax.title = "indicator timeseries" +fig + +#= +The lines plotted above are the **indicator timeseries**. + +### Change metric timeseries + +From here, we process the **indicator timeseries** to quantify changes in it. This step is in essence the same as before: we apply some function over a sliding window of the indicator timeseries. We call this new timeseries the **change metric timeseries**. In the example here, the change metric we will employ will be the slope (over a sliding window), calculated via means of a [`RidgeRegressionSlope`](@ref): +=# + +change_window = (width = 30, stride = 1) +ridgereg = RidgeRegressionSlope(lambda = 0.0) +precompridgereg = precompute(ridgereg, t[1:change_window.width]) + +t_change = windowmap(last, t_indicator; change_window...) +change_l = windowmap(precompridgereg, indicator_l; change_window...) +change_nl = windowmap(precompridgereg, indicator_nl; change_window...) + +fig, ax = lines(t_change, change_l) +lines!(ax, t_change, change_nl) +ax.title = "change metric timeseries" +fig + +#= +### Timeseries surrogates + +As expected from [Critical Slowing Down](@ref approaches), an increase of the AR1-regression coefficient can be observed. Although eyeballing the timeseries might already be suggestive, we want a rigorous framework for testing for significance. + +In TransitionsIdentifiers.jl we perform significance testing using the method of timeseries surrogates and the [TimeseriesSurrogates.jl](https://github.com/JuliaDynamics/TimeseriesSurrogates.jl) Julia package. This has the added benefits of reproducibility, automation and flexibility in choosing the surrogate generation method. Note that `TimeseriesSurrogates` is re-exported by `TransitionsInTimeseries`, so that you don't have to `using` both of them. + +To illustrate the surrogate, we compare the change metric computed from the bistable timeseries what that computed from a surrogate of the same timeseries. +=# + +## Generate Fourier random-phase surrogates +using Random: Xoshiro +s = surrogate(x_nl_fluct, RandomFourier(), Xoshiro(123)) + +function gridfig(nrows, ncols) + fig = Figure() + axs = [Axis(fig[i, j], xticklabelsvisible = i == nrows ? true : false) + for j in 1:ncols, i in 1:nrows] + rowgap!(fig.layout, 10) + return fig, axs +end +fig, axs = gridfig(2, 1) +lines!(axs[1], tfluct, x_nl_fluct, color = Cycled(2)) +lines!(axs[1], tfluct, s .- 0.05, color = Cycled(3)) +axs[1].title = "real signal vs. surrogate(s)" + +## compute and plot indicator and change metric +indicator_s = windowmap(indicator, s; indicator_window...) +change_s = windowmap(precompridgereg, indicator_s; change_window...) + +lines!(axs[2], t_change, change_nl, label = "nonlin", color = Cycled(2)) +lines!(axs[2], t_change, change_s, label = "surrogate", color = Cycled(3)) +axislegend() +axs[2].title = "change metric" + +[xlims!(ax, 0, 50) for ax in axs] +fig + +#= +### Quantifying significance + +To quantify the significance of the values of the **change metric timeseries** we perform a standard surrogate test by computing the [p-value](https://en.wikipedia.org/wiki/P-value) w.r.t. the change metrics of thousands of surrogates of the input timeseries. A low p-value (typically `p<0.05`) is commonly considered as significant. To visualize significant trends, we plot the p-value vs. time: +=# + +n_surrogates = 1_000 +fig, axs = gridfig(2, 2) +axs[1].title = "linear" +axs[2].title = "nonlinear" + +for (j, ax, axsig, x) in zip(1:2, axs[1:2], axs[3:4], (x_l_fluct, x_nl_fluct)) + + orig_change = j == 1 ? change_l : change_nl + sgen = surrogenerator(x, RandomFourier(), Xoshiro(123)) + pval = zeros(length(change_s)) + + ## Collect all surrogate change metrics + for i in 1:n_surrogates + s = sgen() + indicator_s = windowmap(indicator, s; indicator_window...) + change_s = windowmap(precompridgereg, indicator_s; change_window...) + pval += orig_change .< change_s + end + + pval ./= n_surrogates + lines!(ax, t_change, orig_change) # ; color = Cycled(j) + lines!(axsig, t_change, pval) # ; color = Cycled(j+2) +end + +[xlims!(ax, 0, 50) for ax in axs] +fig + +#= +As expected, the data generated by the nonlinear model displays a significant increase of the AR1-regression coefficient before the transition, which is manifested by a low p-value. In contrast, the data generated by the linear model does not show anything similar. + +Performing the step-by-step analysis of transition indicators is possible and might be preferred for users wanting high flexibility. However, this results in a substantial amount of code. We therefore provide convenience functions that wrap this analysis, as shown in the next section. + +## [Tutorial -- TransitionsInTimeseries.jl] (@id example_fastforward) + +TransitionsInTimeseries.jl wraps this typical workflow into a simple, extendable, and modular API that researchers can use with little effort. In addition, it allows performing the same analysis for several indicators / change metrics in one go. + +The interface is simple, and directly parallelizes the [Workflow](@ref). It is based on the creation of a [`TransitionsSurrogatesConfig`](@ref), which contains a list of indicators, and corresponding metrics, to use for doing the above analysis. It also specifies what kind of surrogates to generate. + +The following blocks illustrate how the above extensive example is re-created in TransitionsInTimeseries.jl +=# + +using TransitionsInTimeseries, CairoMakie + +t, x_linear, x_nlinear = load_linear_vs_doublewell() + +## input timeseries and time +input = x_nl_fluct = diff(x_nlinear) +t = t[2:end] + +fig, ax = lines(t, input) +ax.title = "input timeseries" +fig + +#= +To perform all of the above analysis we follow a 2-step process. + +Step 1, we decide what indicators and change metrics to use in [`SlidingWindowConfig`](@ref) and apply those via +a sliding window to the input timeseries using [`transition_metrics`](@ref). +=# + +## These indicators are suitable for Critical Slowing Down +indicators = (var, ar1_whitenoise) + +## use the ridge regression slope for both indicators +change_metrics = RidgeRegressionSlope() + +## choices go into a configuration struct +config = SlidingWindowConfig(indicators, change_metrics; + width_ind = 400, width_cha = 30, whichtime = last) + +## choices are processed +results = estimate_indicator_changes(config, input, t) + +#= +From `result` we can plot the change metric timeseries: +=# + +fig, axs = gridfig(3, 1) +lines!(axs[1], t, input; label = "input", color = Cycled(2)) +scatter!(axs[2], results.t_change, results.x_change[:, 1]; + label = "var slopes", color = Cycled(3)) +scatter!(axs[3], results.t_change, results.x_change[:, 2]; + label = "ar1 slopes", color = Cycled(4)) +[xlims!(ax, 0, 50) for ax in axs] +fig + +#= +Step 2 is to estimate significance using [`SurrogatesConfig`](@ref) +and the function [`estimate_significance!`](@ref). +=# + +signif = SurrogatesSignificance(n = 1000, tail = :right) +flags = significant_transitions(results, signif) + +#= +We can now plot the p-values corresponding to each time series of the change metrics. From the `flags` we can additionally obtain the time points where _both_ indicators show significance, via a simple reduction: +=# + +fig, axs = gridfig(2, 1) +lines!(axs[1], vcat(0.0, t), x_nlinear; label = "raw", color = Cycled(1)) +lines!(axs[1], t, input; label = "input", color = Cycled(2)) +scatter!(axs[2], results.t_change, signif.pvalues[:, 1]; + label = "var p-values", color = Cycled(3)) +scatter!(axs[2], results.t_change, signif.pvalues[:, 2]; + label = "ar1 p-values", color = Cycled(4)) + +flagsboth = vec(reduce(&, flags; dims = 2)) +vlines!(axs[1], results.t_change[flagsboth]; label = "flags", color = ("black", 0.1)) + +[axislegend(ax) for ax in axs] +[xlims!(ax, 0, 50) for ax in axs] +fig \ No newline at end of file diff --git a/docs/src/tutorial.md b/docs/src/tutorial.md index d189a31d..4db96c03 100644 --- a/docs/src/tutorial.md +++ b/docs/src/tutorial.md @@ -1,10 +1,14 @@ +```@meta +EditURL = "tutorial.jl" +``` + # Tutorial ## [Workflow] (@id workflow) Computing transition indicators consists of the following steps: -1. Doing any pre-processing of raw data first such as detrending (_not part of TransitionsInTimeseries.jl_). This yields the **input timeseries**. +1. Doing any preprocessing of raw data first, such as detrending (_not part of TransitionsInTimeseries.jl_). This yields the **input timeseries**. 2. Estimating the timeseries of an indicator by sliding a window over the input timeseries. 3. Computing the changes of the indicator by sliding a window over its timeseries. 4. Generating many surrogates that preserve important statistical properties of the original timeseries. @@ -26,26 +30,26 @@ Let us load data from a bistable nonlinear model subject to noise and to a gradu with $x_{l}$ the state of the linear model, $x_{nl}$ the state of the bistable model, $f$ the forcing and $n$ the noise. For $f=0$ they both display an equilibrium point at $x=-1$. However, the bistable model also displays a further equilibrium point at $x=1$. Loading (and visualizing with [Makie](https://docs.makie.org/stable/)) such prototypical data to test some indicators can be done by simply running: -```@example MAIN +````@example tutorial using TransitionsInTimeseries, CairoMakie t, x_linear, x_nlinear = load_linear_vs_doublewell() -fig, ax = lines(t, x_nlinear) -lines!(ax, t, x_linear) +fig, ax = lines(t, x_linear) +lines!(ax, t, x_nlinear) ax.title = "raw data" fig -``` +```` ### Preprocessing !!! note "Not part of TransitionsInTimeseries.jl" - Any timeseries pre-processing, such as the de-trending step we do here, + Any timeseries preprocessing, such as the de-trending step we do here, is not part of TransitionsInTimeseries.jl and is the responsibility of the researcher. -The nonlinear system clearly displays a transition between two stability regimes. To forecast such transition, we analyze the fluctuations of the timeseries around the tracked attractor. Therefore, a detrending step is needed - here simply obtained by building the difference of the timeseries with lag 1. +The nonlinear system clearly displays a transition between two stability regimes. To forecast such transition, we analyze the fluctuations of the timeseries around the attractor, assumed to be tracked. Therefore, a detrending step is needed - here simply obtained by building the difference of the timeseries with lag 1. -```@example MAIN +````@example tutorial x_l_fluct = diff(x_linear) x_nl_fluct = diff(x_nlinear) tfluct = t[2:end] @@ -54,7 +58,7 @@ fig, ax = lines(tfluct, x_l_fluct) lines!(ax, tfluct, x_nl_fluct .+ 0.05) ax.title = "input timeseries" fig -``` +```` At this point, `x_l_fluct` and `x_nl_fluct` are considered the **input timeseries**. @@ -63,14 +67,15 @@ At this point, `x_l_fluct` and `x_nl_fluct` are considered the **input timeserie ### Indicator timeseries -We can then compute the values of some "indicator" (a Julia function that inputs a timeseries and outputs a number). An indicator should be a quantity that is likely to change if a transition occurs in the timeseries. We compute indicators by applying a sliding window over the **input timeseries**, determined by the width and the stride with which it is applied. Here we demonstrate this computation with the AR1-regression coefficient (under white-noise assumption), implemented as [`ar1_whitenoise`](@ref): +We can then compute the values of some "indicator" (a Julia function that inputs a timeseries and outputs a number). An indicator should be a quantity that is likely to change if a transition occurs, or is about to occur in the timeseries. We compute indicators by applying a sliding window over the **input timeseries**, determined by the width and the stride with which it is applied. Here we demonstrate this computation with the AR1-regression coefficient (under white-noise assumption), implemented as [`ar1_whitenoise`](@ref): -```@example MAIN +````@example tutorial indicator = ar1_whitenoise indicator_window = (width = 400, stride = 1) -# By mapping `last::Function` over a windowviewer of the time vector, we obtain the last time step of each window. -# This therefore only uses information from `k-width` to `k` at time step `k`. +# By mapping `last::Function` over a windowviewer of the time vector, +# we obtain the last time step of each window. +# This therefore only uses information from `k-width+1` to `k` at time step `k`. # Alternatives: `first::Function`, `midpoint:::Function`. t_indicator = windowmap(last, tfluct; indicator_window...) indicator_l = windowmap(indicator, x_l_fluct; indicator_window...) @@ -80,7 +85,7 @@ fig, ax = lines(t_indicator, indicator_l) lines!(ax, t_indicator, indicator_nl) ax.title = "indicator timeseries" fig -``` +```` The lines plotted above are the **indicator timeseries**. @@ -88,8 +93,7 @@ The lines plotted above are the **indicator timeseries**. From here, we process the **indicator timeseries** to quantify changes in it. This step is in essence the same as before: we apply some function over a sliding window of the indicator timeseries. We call this new timeseries the **change metric timeseries**. In the example here, the change metric we will employ will be the slope (over a sliding window), calculated via means of a [`RidgeRegressionSlope`](@ref): - -```@example MAIN +````@example tutorial change_window = (width = 30, stride = 1) ridgereg = RidgeRegressionSlope(lambda = 0.0) precompridgereg = precompute(ridgereg, t[1:change_window.width]) @@ -102,49 +106,57 @@ fig, ax = lines(t_change, change_l) lines!(ax, t_change, change_nl) ax.title = "change metric timeseries" fig -``` +```` ### Timeseries surrogates As expected from [Critical Slowing Down](@ref approaches), an increase of the AR1-regression coefficient can be observed. Although eyeballing the timeseries might already be suggestive, we want a rigorous framework for testing for significance. -In TransitionsIdentifiers.jl we perform significance testing using the method of timeseries surrogates and the [TimeseriesSurrogates.jl](https://github.com/JuliaDynamics/TimeseriesSurrogates.jl) Julia package. This has the added benefits of flexibility in choosing the surrogate generation method, reproducibility, and automation. Note that `TimeseriesSurrogates` is re-exported by `TransitionsInTimeseries`, so that you don't have to `using` both of them. +In TransitionsIdentifiers.jl we perform significance testing using the method of timeseries surrogates and the [TimeseriesSurrogates.jl](https://github.com/JuliaDynamics/TimeseriesSurrogates.jl) Julia package. This has the added benefits of reproducibility, automation and flexibility in choosing the surrogate generation method. Note that `TimeseriesSurrogates` is re-exported by `TransitionsInTimeseries`, so that you don't have to `using` both of them. To illustrate the surrogate, we compare the change metric computed from the bistable timeseries what that computed from a surrogate of the same timeseries. -```@example MAIN +````@example tutorial # Generate Fourier random-phase surrogates using Random: Xoshiro s = surrogate(x_nl_fluct, RandomFourier(), Xoshiro(123)) -fig, ax = lines(tfluct, x_nl_fluct; color = Cycled(2)) -lines!(ax, tfluct, s .- 0.05; color = Cycled(3)) -ax.title = "real signal vs. surrogate(s)" + +function gridfig(nrows, ncols) + fig = Figure() + axs = [Axis(fig[i, j], xticklabelsvisible = i == nrows ? true : false) + for j in 1:ncols, i in 1:nrows] + rowgap!(fig.layout, 10) + return fig, axs +end +fig, axs = gridfig(2, 1) +lines!(axs[1], tfluct, x_nl_fluct, color = Cycled(2)) +lines!(axs[1], tfluct, s .- 0.05, color = Cycled(3)) +axs[1].title = "real signal vs. surrogate(s)" # compute and plot indicator and change metric indicator_s = windowmap(indicator, s; indicator_window...) change_s = windowmap(precompridgereg, indicator_s; change_window...) -ax, = lines(fig[1,2], t_change, change_nl; color = Cycled(2), label = "nonlin") -lines!(ax, t_change, change_s; color = Cycled(3), label = "surrogate") +lines!(axs[2], t_change, change_nl, label = "nonlin", color = Cycled(2)) +lines!(axs[2], t_change, change_s, label = "surrogate", color = Cycled(3)) axislegend() -ax.title = "change metric" +axs[2].title = "change metric" +[xlims!(ax, 0, 50) for ax in axs] fig -``` +```` ### Quantifying significance To quantify the significance of the values of the **change metric timeseries** we perform a standard surrogate test by computing the [p-value](https://en.wikipedia.org/wiki/P-value) w.r.t. the change metrics of thousands of surrogates of the input timeseries. A low p-value (typically `p<0.05`) is commonly considered as significant. To visualize significant trends, we plot the p-value vs. time: -```@example MAIN +````@example tutorial n_surrogates = 1_000 -fig = Figure() -axl = Axis(fig[1,1]; title = "linear") -axnl = Axis(fig[1,2]; title = "nonlinear") -axsigl = Axis(fig[2,1]) -axsignl = Axis(fig[2,2]) +fig, axs = gridfig(2, 2) +axs[1].title = "linear" +axs[2].title = "nonlinear" -for (j, ax, axsig, x) in zip(1:2, (axl, axnl), (axsigl, axsignl), (x_l_fluct, x_nl_fluct)) +for (j, ax, axsig, x) in zip(1:2, axs[1:2], axs[3:4], (x_l_fluct, x_nl_fluct)) orig_change = j == 1 ? change_l : change_nl sgen = surrogenerator(x, RandomFourier(), Xoshiro(123)) @@ -159,12 +171,13 @@ for (j, ax, axsig, x) in zip(1:2, (axl, axnl), (axsigl, axsignl), (x_l_fluct, x_ end pval ./= n_surrogates - lines!(ax, t_change, orig_change; color = Cycled(j)) - lines!(axsig, t_change, pval; color = Cycled(j+2)) + lines!(ax, t_change, orig_change) # ; color = Cycled(j) + lines!(axsig, t_change, pval) # ; color = Cycled(j+2) end +[xlims!(ax, 0, 50) for ax in axs] fig -``` +```` As expected, the data generated by the nonlinear model displays a significant increase of the AR1-regression coefficient before the transition, which is manifested by a low p-value. In contrast, the data generated by the linear model does not show anything similar. @@ -174,11 +187,11 @@ Performing the step-by-step analysis of transition indicators is possible and mi TransitionsInTimeseries.jl wraps this typical workflow into a simple, extendable, and modular API that researchers can use with little effort. In addition, it allows performing the same analysis for several indicators / change metrics in one go. -The interface is simple, and directly parallelizes the [Workflow](@ref). It is based on the creation of a [`WindowedIndicatorConfig`](@ref), which contains a list of indicators, and corresponding metrics, to use for doing the above analysis. +The interface is simple, and directly parallelizes the [Workflow](@ref). It is based on the creation of a [`TransitionsSurrogatesConfig`](@ref), which contains a list of indicators, and corresponding metrics, to use for doing the above analysis. It also specifies what kind of surrogates to generate. -The following blocks illustrate how the above extensive example is re-created in TransitionsInTimeseries.jl. First, let's load the timeseries again. +The following blocks illustrate how the above extensive example is re-created in TransitionsInTimeseries.jl -```@example MAIN +````@example tutorial using TransitionsInTimeseries, CairoMakie t, x_linear, x_nlinear = load_linear_vs_doublewell() @@ -190,14 +203,14 @@ t = t[2:end] fig, ax = lines(t, input) ax.title = "input timeseries" fig -``` +```` To perform all of the above analysis we follow a 2-step process. -Step 1, we decide what indicators and change metrics to use in [`WindowedIndicatorConfig`](@ref) and apply those via -a sliding window to the input timeseries using [`estimate_indicator_changes`](@ref). +Step 1, we decide what indicators and change metrics to use in [`SlidingWindowConfig`](@ref) and apply those via +a sliding window to the input timeseries using [`transition_metrics`](@ref). -```@example MAIN +````@example tutorial # These indicators are suitable for Critical Slowing Down indicators = (var, ar1_whitenoise) @@ -205,46 +218,50 @@ indicators = (var, ar1_whitenoise) change_metrics = RidgeRegressionSlope() # choices go into a configuration struct -config = WindowedIndicatorConfig(indicators, change_metrics; - width_ind = 400, width_cha = 30, whichtime = last -) +config = SlidingWindowConfig(indicators, change_metrics; + width_ind = 400, width_cha = 30, whichtime = last) # choices are processed results = estimate_indicator_changes(config, input, t) -``` - -From `result` we can plot the change metric timeseries -```@example MAIN -fig, ax = lines(t, input; color = Cycled(2), label = "input") -axpval, = scatter(fig[2,1], results.t_change, results.x_change[:, 1]; color = Cycled(3), label = "var slopes") -axpval, = scatter(fig[3,1], results.t_change, results.x_change[:, 2]; color = Cycled(4), label = "ar1 slopes") -``` +```` + +From `result` we can plot the change metric timeseries: + +````@example tutorial +fig, axs = gridfig(3, 1) +lines!(axs[1], t, input; label = "input", color = Cycled(2)) +scatter!(axs[2], results.t_change, results.x_change[:, 1]; + label = "var slopes", color = Cycled(3)) +scatter!(axs[3], results.t_change, results.x_change[:, 2]; + label = "ar1 slopes", color = Cycled(4)) +[xlims!(ax, 0, 50) for ax in axs] +fig +```` -Step 2 is to estimate significance using [`SurrogatesSignificance`](@ref) -and the function [`significant_transitions`](@ref). +Step 2 is to estimate significance using [`SurrogatesConfig`](@ref) +and the function [`estimate_significance!`](@ref). -```@example MAIN -signif = SurrogatesSignificance(n = 1000, tail = :both) +````@example tutorial +signif = SurrogatesSignificance(n = 1000, tail = :right) flags = significant_transitions(results, signif) -``` - -If we want, we can also plot the p-values corresponding to each timeseries change metric +```` -```@example MAIN -fig, ax = lines(t, input; color = Cycled(2), label = "input") -axpval, = scatter(fig[2,1], results.t_change, signif.pvalues[:, 1]; color = Cycled(3), label = "var p-values") -scatter!(axpval, results.t_change, signif.pvalues[:, 2]; color = Cycled(4), label = "ar1 p-values") -axislegend(axpval) -xlims!(ax, 0, 50) -xlims!(axpval, 0, 50) -``` +We can now plot the p-values corresponding to each time series of the change metrics. From the `flags` we can additionally obtain the time points where _both_ indicators show significance, via a simple reduction: -From the `flags` we can obtain the time points where _both_ indicators show significance, via a simple reduction +````@example tutorial +fig, axs = gridfig(2, 1) +lines!(axs[1], vcat(0.0, t), x_nlinear; label = "raw", color = Cycled(1)) +lines!(axs[1], t, input; label = "input", color = Cycled(2)) +scatter!(axs[2], results.t_change, signif.pvalues[:, 1]; + label = "var p-values", color = Cycled(3)) +scatter!(axs[2], results.t_change, signif.pvalues[:, 2]; + label = "ar1 p-values", color = Cycled(4)) -```@example MAIN flagsboth = vec(reduce(&, flags; dims = 2)) +vlines!(axs[1], results.t_change[flagsboth]; label = "flags", color = ("black", 0.1)) -vlines!(ax, results.t_change[flagsboth]; label = "flags", color = ("black", 0.1)) -axislegend(ax) +[axislegend(ax) for ax in axs] +[xlims!(ax, 0, 50) for ax in axs] fig -``` +```` + diff --git a/docs/src/tutorial_literate.jl b/docs/src/tutorial_literate.jl deleted file mode 100644 index 2f491843..00000000 --- a/docs/src/tutorial_literate.jl +++ /dev/null @@ -1,140 +0,0 @@ -using TransitionsInTimeseries, CairoMakie - -t, x_linear, x_nlinear = load_linear_vs_doublewell() -fig, ax = lines(t, x_nlinear) -lines!(ax, t, x_linear) -ax.title = "raw data" -fig - -# -x_l_fluct = diff(x_linear) -x_nl_fluct = diff(x_nlinear) -tfluct = t[2:end] - -fig, ax = lines(tfluct, x_l_fluct) -lines!(ax, tfluct, x_nl_fluct .+ 0.05) -ax.title = "input timeseries" -fig - -# -indicator = ar1_whitenoise -indicator_window = (width = 400, stride = 1) - -# By mapping `last::Function` over a windowviewer of the time vector, we obtain the last time step of each window. -# This therefore only uses information from `k-width` to `k` at time step `k`. -# Alternatives: `first::Function`, `midpoint:::Function`. -t_indicator = windowmap(last, tfluct; indicator_window...) -indicator_l = windowmap(indicator, x_l_fluct; indicator_window...) -indicator_nl = windowmap(indicator, x_nl_fluct; indicator_window...) - -fig, ax = lines(t_indicator, indicator_l) -lines!(ax, t_indicator, indicator_nl) -ax.title = "indicator timeseries" -fig - -# -change_window = (width = 30, stride = 1) -ridgereg = RidgeRegressionSlope(lambda = 0.0) -precompridgereg = precompute(ridgereg, t[1:change_window.width]) - -t_change = windowmap(last, t_indicator; change_window...) -change_l = windowmap(precompridgereg, indicator_l; change_window...) -change_nl = windowmap(precompridgereg, indicator_nl; change_window...) - -fig, ax = lines(t_change, change_l) -lines!(ax, t_change, change_nl) -ax.title = "change metric timeseries" -fig - - -# - -# Generate Fourier random-phase surrogates -using Random: Xoshiro -s = surrogate(x_nl_fluct, RandomFourier(), Xoshiro(123)) -fig, ax = lines(tfluct, x_nl_fluct; color = Cycled(2)) -lines!(ax, tfluct, s .- 0.05; color = Cycled(3)) -ax.title = "real signal vs. surrogate(s)" - -# compute and plot indicator and change metric -indicator_s = windowmap(indicator, s; indicator_window...) -change_s = windowmap(precompridgereg, indicator_s; change_window...) - -ax, = lines(fig[1,2], t_change, change_nl; color = Cycled(2), label = "nonlin") -lines!(ax, t_change, change_s; color = Cycled(3), label = "surrogate") -axislegend() -ax.title = "change metric" - -fig -# - - -n_surrogates = 1_000 -fig = Figure() -axl = Axis(fig[1,1]; title = "linear") -axnl = Axis(fig[1,2]; title = "nonlinear") -axsigl = Axis(fig[2,1]) -axsignl = Axis(fig[2,2]) - -for (j, ax, axsig, x) in zip(1:2, (axl, axnl), (axsigl, axsignl), (x_l_fluct, x_nl_fluct)) - - orig_change = j == 1 ? change_l : change_nl - sgen = surrogenerator(x, RandomFourier(), Xoshiro(123)) - pval = zeros(length(change_s)) - - # Collect all surrogate change metrics - for i in 1:n_surrogates - s = sgen() - indicator_s = windowmap(indicator, s; indicator_window...) - change_s = windowmap(precompridgereg, indicator_s; change_window...) - pval += orig_change .< change_s - end - - pval ./= n_surrogates - lines!(ax, t_change, orig_change; color = Cycled(j)) - lines!(axsig, t_change, pval; color = Cycled(j+2)) -end - -fig - -# %% -# Tutorial short - -using TransitionsInTimeseries, CairoMakie - -t, x_linear, x_nlinear = load_linear_vs_doublewell() - -x_nl_fluct = diff(x_nlinear) -tfluct = t[2:end] - -fig, ax = lines(tfluct, x_nl_fluct) -ax.title = "input timeseries" -fig - -# These indicators are suitable for Critical Slowing Down -indicators = [var, ar1_whitenoise] -indconfig = IndicatorsConfig(tfluct, last, indicators; width = 400) - - -# use the ridge regression slope for both indicators -change_metrics = [RidgeRegressionSlope()] -sigconfig = SignificanceConfig(indconfig, last, change_metrics; - width = 30, n_surrogates = 1000) - -# perform the full analysis -results = indicators_analysis(tfluct, x_nl_fluct, indconfig, sigconfig) - -# Plot the original timeseries vs. p-value time series -fig, ax = lines(tfluct, x_nl_fluct; color = Cycled(2), label = "input") -axpval, = lines(fig[2,1], sigconfig.t_change, results.pval[:, 1]; color = Cycled(3), label = "p-value of var") -lines!(axpval, sigconfig.t_change, results.pval[:, 2]; color = Cycled(4), label = "p-value of ar1") -xlims!(ax, (0, 50)) -xlims!(axpval, (0, 50)) -axislegend(axpval) -fig - - -flags_indicators, flags_andicators = transition_flags(results, 0.05) -vlines!(ax, flags_andicators; label = "flags", color = Cycled(3), linestyle = :dash) -axislegend(ax) -fig \ No newline at end of file diff --git a/src/TransitionsInTimeseries.jl b/src/TransitionsInTimeseries.jl index ac2a4afe..fbeef2f2 100644 --- a/src/TransitionsInTimeseries.jl +++ b/src/TransitionsInTimeseries.jl @@ -48,12 +48,14 @@ export LowfreqPowerSpectrum, PrecomputedLowfreqPowerSpectrum export mean, std, var, skewness, kurtosis # from StatsBase export permutation_entropy export kendalltau, spearman -export RidgeRegressionSlope, PrecomputedRidgeRegressionSlope +export ridgematrix, RidgeRegressionSlope, PrecomputedRidgeRegressionSlope export difference_of_means # analysis -export WindowedIndicatorConfig, estimate_indicator_changes, WindowedIndicatorResults -export TransitionsSignificance, significant_transitions +export IndicatorsChangesConfig, SlidingWindowConfig, SegmentedWindowConfig +export SlidingWindowResults, SegmentWindowResults +export estimate_indicator_changes, WindowResults +export TransitionsSignificance, significant_transitions, segmented_significance export QuantileSignificance, SigmaSignificance, SurrogatesSignificance # timeseries diff --git a/src/analysis/api.jl b/src/analysis/api.jl new file mode 100644 index 00000000..5a8f600b --- /dev/null +++ b/src/analysis/api.jl @@ -0,0 +1,24 @@ +""" + IndicatorsChangesConfig + +Supertype used to define how indicators and their changes are estimated in +[`estimate_indicator_changes`](@ref). Valid subtypes are: + + - [`SlidingWindowConfig`](@ref). + - [`SegmentedWindowConfig`](@ref). +""" +abstract type IndicatorsChangesConfig end + +""" + estimate_indicator_changes(config::IndicatorsChangesConfig, x [,t]) → output + +Estimate possible transitions for input timeseries `x` using the approach specified +in the configuration type `config`, see [`IndicatorsChangesConfig`](@ref) for possibilities. +`t` is the time vector corresponding to `x`, which defaults to `eachindex(x)`. + +Return the output as [`WindowResults`](@ref) which can be given to +[`significant_transitions`](@ref) to deduce which possible transitions are statistically +significant using a variety of significance tests. +""" +function estimate_indicator_changes end +# The function is extended via multiple dispatch in the specific files \ No newline at end of file diff --git a/src/analysis/segmented_window.jl b/src/analysis/segmented_window.jl new file mode 100644 index 00000000..e6d92871 --- /dev/null +++ b/src/analysis/segmented_window.jl @@ -0,0 +1,103 @@ +""" + SegmentedWindowConfig <: IndicatorsChangeConfig + SegmentedWindowConfig(indicators, change_metrics, tseg_start, tseg_end; kwargs...) + +A configuration that can be given to [`estimate_indicator_changes`](@ref). +It estimates transitions by estimating indicators and changes in user-defined +window segments as follows: + +1. For each segment specified, estimate the corresponding indicator timeseries + by sliding a window over the input timeseries (within the window segment). +2. For each segment of the indicator timeseries, estimate a scalar change metric by applying + the change metric over the full segment of the indicator timeseries.d + +`tseg_start, tseg_end` are the starts and ends of the window segments +(the window segments may overlap, that's okay). +`indicators, change_metrics` are identical as in [`SlidingWindowConfig`](@ref). + +## Keyword arguments +- +- `width_ind::Int=100, stride_ind::Int=1, whichtime = midpoint, T = Float64`: keywords + identical as in [`SlidingWindowConfig`](@ref). +- `min_width_cha::Int=50`: minimal width required to perform the change metric estimation. + If segment not sufficiently long, return `NaN`. +""" +struct SegmentedWindowConfig{F, G, W<:Function} <: IndicatorsChangesConfig + indicators::F + change_metrics::G + tseg_start::Vector + tseg_end::Vector + width_ind::Int + stride_ind::Int + min_width_cha::Int + whichtime::W +end + +function SegmentedWindowConfig( + indicators, change_metrics, tseg_start, tseg_end; + width_ind = 100, + stride_ind = 1, + min_width_cha = 50, + whichtime = midpoint, + T = Float64, + ) + if length(tseg_start) ≠ length(tseg_end) + throw(ArgumentError("The vectors containing the start and end time of the"* + " segments must be of equal length.")) + end + indicators, change_metrics = sanitycheck_metrics(indicators, change_metrics) + # Last step: precomputable functions, if any + indicators = map(f -> precompute(f, 1:T(width_ind)), indicators) + + return SegmentedWindowConfig( + indicators, change_metrics, tseg_start, tseg_end, + width_ind, stride_ind, min_width_cha, whichtime, + ) +end + +function estimate_indicator_changes(config::SegmentedWindowConfig, x, t) + X, T = eltype(x), eltype(t) + (; indicators, change_metrics, tseg_start, tseg_end) = config + n_ind = length(indicators) + + t_indicator = [T[] for _ in eachindex(tseg_start)] + x_indicator = [X[;;] for _ in eachindex(tseg_start)] + t_change = T.(config.tseg_end) + x_change = fill(Inf, length(tseg_start), n_ind) + one2one = length(change_metrics) == length(indicators) + + for k in eachindex(tseg_start) + tseg, xseg = segment(t, x, tseg_start[k], tseg_end[k]) + t_indicator[k] = windowmap(config.whichtime, tseg; width = config.width_ind, + stride = config.stride_ind) + len_ind = length(t_indicator[k]) + + # Init with NaN instead of 0 to easily recognise when the segment was too short + # for the computation to be performed. + x_indicator[k] = fill(NaN, len_ind, n_ind) + + # only analyze if segment long enough to compute metrics + if len_ind > config.min_width_cha + # Loop over indicators + for i in 1:n_ind + indicator = indicators[i] + chametric = one2one ? change_metrics[i] : change_metrics[1] + z = view(x_indicator[k], :, i) + windowmap!(indicator, z, xseg; + width = config.width_ind, stride = config.stride_ind + ) + if chametric isa PrecomputableFunction + chametric = precompute(chametric, t_indicator[k]) + end + x_change[k, i] = chametric(z) + end + end + end + # put everything together in the output type + return SegmentWindowResults(t, x, t_indicator, x_indicator, t_change, x_change, config) +end + +function segment(t, x, t1, t2) + i1, i2 = argmin(abs.(t .- t1)), argmin(abs.(t .- t2)) + return t[i1:i2], x[i1:i2] +end diff --git a/src/analysis/windowed_indicators.jl b/src/analysis/sliding_window.jl similarity index 60% rename from src/analysis/windowed_indicators.jl rename to src/analysis/sliding_window.jl index 2473f2da..e0681210 100644 --- a/src/analysis/windowed_indicators.jl +++ b/src/analysis/sliding_window.jl @@ -1,18 +1,23 @@ """ - WindowedIndicatorConfig(indicators, change_metrics; kwargs...) → config + SlidingWindowConfig <: IndicatorsChangesConfig + SlidingWindowConfig(indicators, change_metrics; kwargs...) -A configuration struct for TransitionsInTimeseries.jl that collects -what indicators and corresponding metrics to use in the [`estimate_indicator_changes`](@ref). +A configuration that can be given to [`estimate_indicator_changes`](@ref). +It estimates transitions by a sliding window approach: + +1. Estimate the timeseries of an indicator by sliding a window over the input timeseries. +2. Estimate changes of an indicator by sliding a window of the change metric over + the indicator timeseries. `indicators` is a tuple of indicators (or a single indicator). `change_metrics` is also a tuple or a single function. If a single function, the same change metric is used for all provided indicators. This way the analysis can be efficiently repeated for many indicators and/or change metrics. -Both indicators and change metrics are generic Julia functions that input an -`x::AbstractVector` and output an `s::Real`. Any appropriate function may be given and +Both indicators and change metrics are **generic Julia functions** that input an +`x::AbstractVector` and output an `s::Real`. Any function may be given and see [making custom indicators/change metrics](@ref own_indicator) in the documentation -for more information. +for more information on possible optimizations. ## Keyword arguments @@ -33,11 +38,9 @@ for more information. ``` so any other function of the time window may be given to extract the time point itself, such as `mean` or `median`. - - `T = Float64`: Element type of input timeseries to initialize some computations. - """ -struct WindowedIndicatorConfig{F, G, W<:Function} +struct SlidingWindowConfig{F, G, W<:Function} <: IndicatorsChangesConfig indicators::F change_metrics::G width_ind::Int @@ -47,7 +50,7 @@ struct WindowedIndicatorConfig{F, G, W<:Function} whichtime::W end -function WindowedIndicatorConfig( +function SlidingWindowConfig( indicators, change_metrics; width_ind = 100, stride_ind = 1, @@ -56,7 +59,18 @@ function WindowedIndicatorConfig( whichtime = midpoint, T = Float64, ) - # Sanity checks + indicators, change_metrics = sanitycheck_metrics(indicators, change_metrics) + # Last step: precomputable functions, if any + indicators = map(f -> precompute(f, 1:T(width_ind)), indicators) + change_metrics = map(f -> precompute(f, 1:T(width_cha)), change_metrics) + + return SlidingWindowConfig( + indicators, change_metrics, + width_ind, stride_ind, width_cha, stride_cha, whichtime, + ) +end + +function sanitycheck_metrics(indicators, change_metrics) if !(indicators isa Tuple) indicators = (indicators,) end @@ -68,38 +82,10 @@ function WindowedIndicatorConfig( throw(ArgumentError("The amount of change metrics must be as many as the"* "indicators, or only 1.")) end - # Last step: precomputable functions, if any - indicators = map(f -> precompute(f, 1:T(width_ind)), indicators) - change_metrics = map(f -> precompute(f, 1:T(width_cha)), change_metrics) - - return WindowedIndicatorConfig( - indicators, change_metrics, - width_ind, stride_ind, width_cha, stride_cha, whichtime, - ) -end - -""" - estimate_indicator_changes(config::WindowedIndicatorConfig, x [,t]) → output - -Estimate possible transitions for input timeseries `x` using a sliding window approach -as described by `config`: - -1. Estimate the timeseries of an indicator by sliding a window over the input timeseries. -2. Estimate changes of an indicator by sliding a window of the change metric over - the indicator timeseries. - -If `t` (the time vector of `x`), is not provided, it is assumed `t = eachindex(x)`. - -Return the output as [`WindowedIndicatorResults`](@ref) which can be given to -[`significant_transitions`](@ref) to deduce which possible transitions are statistically -significant using a variety of significance tests. -""" -function estimate_indicator_changes(x, config::WindowedIndicatorConfig) - t = eachindex(x) - return estimate_indicator_changes(t, x, config) + return indicators, change_metrics end -function estimate_indicator_changes(config::WindowedIndicatorConfig, x, t = eachindex(x)) +function estimate_indicator_changes(config::SlidingWindowConfig, x, t = eachindex(x)) # initialize time vectors t_indicator = windowmap(config.whichtime, t; width = config.width_ind, stride = config.stride_ind) @@ -130,18 +116,27 @@ function estimate_indicator_changes(config::WindowedIndicatorConfig, x, t = each end # put everything together in the output type - return WindowedIndicatorResults( + return SlidingWindowResults( t, x, t_indicator, x_indicator, t_change, x_change, config ) end +""" + WindowResults + +Supertype used to gather results of [`estimate_indicator_changes`](@ref). +Valid subtypes are: + + - [`SlidingWindowResults`](@ref). + - [`SegmentWindowResults`](@ref). +""" +abstract type WindowResults end """ - WindowedIndicatorResults + SlidingWindowResults A struct containing the output of [`estimate_indicator_changes`](@ref) used with -[`WindowedIndicatorConfig`](@ref). -It can be used for further analysis, visualization, +[`SlidingWindowConfig`](@ref). It can be used for further analysis, visualization, or given to [`significant_transitions`](@ref). It has the following fields that the user may access @@ -155,29 +150,72 @@ It has the following fields that the user may access - `x_change`, the change metric timeseries (matrix with each column one change metric). - `t_change`, the time vector of the change metric timeseries. -- [`wim::WindowedIndicatorConfig`](@ref), used for the analysis. +- [`config::SlidingWindowConfig`](@ref), used for the analysis. """ -struct WindowedIndicatorResults{TT, T<:Real, X<:Real, XX<:AbstractVector{X}, W} +struct SlidingWindowResults{TT, T<:Real, X<:Real, XX<:AbstractVector{X}, + W} <: WindowResults t::TT # original time vector; most often it is `Base.OneTo`. x::XX t_indicator::Vector{T} x_indicator::Matrix{X} t_change::Vector{T} x_change::Matrix{X} - wim::W + config::W +end + +""" + SegmentWindowResults + +A struct containing the output of [`estimate_indicator_changes`](@ref) used with +[`SegmentedWindowConfig`](@ref). It can be used for further analysis, visualization, +or given to [`significant_transitions`](@ref). + +It has the following fields that the user may access + +- `x`: the input timeseries. +- `t`: the time vector of the input timeseries. + +- `x_indicator::Vector{Matrix}`, with `x_indicator[k]` the indicator timeseries (matrix + with each column one indicator) of the `k`-th segment. +- `t_indicator::Vector{Vector}`, with `t_indicator[k]` the time vector of the indicator + timeseries for the `k`-th segment. + +- `x_change::Matrix`, the change metric values with `x[k, i]` the change metric of the + `i`-th indicator for the `k`-th segment. +- `t_change`, the time vector of the change metric. + +- [`config::SegmentedWindowConfig`](@ref), used for the analysis. +""" +struct SegmentWindowResults{TT, T<:Real, X<:Real, XX<:AbstractVector{X}, + W} <: WindowResults + t::TT # original time vector; most often it is `Base.OneTo`. + x::XX + t_indicator::Vector{Vector{T}} + x_indicator::Vector{Matrix{X}} + t_change::Vector{T} + x_change::Matrix{X} + config::W end -function Base.show(io::IO, ::MIME"text/plain", res::WindowedIndicatorResults) - println(io, "WindowedIndicatorResults") +function Base.show(io::IO, ::MIME"text/plain", res::WindowResults) + println(io, "WindowResults") descriptors = [ "input timeseries" => summary(res.x), - "indicators" => [nameof(i) for i in res.wim.indicators], - "indicator (window, stride)" => (res.wim.width_ind, res.wim.stride_ind), - "change metrics" => [nameof(c) for c in res.wim.change_metrics], - "change metric (window, stride)" => (res.wim.width_cha, res.wim.stride_cha), + "indicators" => [nameof(i) for i in res.config.indicators], + "indicator (window, stride)" => (res.config.width_ind, res.config.stride_ind), + "change metrics" => [nameof(c) for c in res.config.change_metrics], + show_changemetric(res), ] padlen = maximum(length(d[1]) for d in descriptors) + 2 for (desc, val) in descriptors println(io, rpad(" $(desc): ", padlen), val) end +end + +function show_changemetric(res::SlidingWindowResults) + return "change metric (window, stride)" => (res.config.width_cha, res.config.stride_cha) +end + +function show_changemetric(res::SegmentWindowResults) + return "change metric (window)" => ([length(t) for t in res.t_indicator]) end \ No newline at end of file diff --git a/src/change_metrics/slope.jl b/src/change_metrics/slope.jl index a9755ab8..625620c9 100644 --- a/src/change_metrics/slope.jl +++ b/src/change_metrics/slope.jl @@ -38,7 +38,7 @@ struct PrecomputedRidgeRegressionSlope{T} <: Function end function precompute(rr::RidgeRegressionSlope, t::AbstractVector{T}) where {T<:Real} - regression_matrix = precompute_ridgematrix(t, rr.lambda) + regression_matrix = ridgematrix(t, rr.lambda) return PrecomputedRidgeRegressionSlope(isequispaced(t), regression_matrix) end @@ -51,13 +51,18 @@ function (rr::PrecomputedRidgeRegressionSlope)(x::AbstractVector{<:Real}) if !(rr.equispaced) error("Time vector is not evenly spaced." * "So far, the API is only designed for evenly spaced time series!") - # For future something like: M .= precompute_ridgematrix(window_view(t)) + # For future something like: M .= ridgematrix(window_view(t)) end return view(rr.regression_matrix, 1, :)' * x # only return slope. # view(rr.regression_matrix, 2, :)' * x --> would return the bias. end -function precompute_ridgematrix(t::AbstractVector{T}, lambda::Real) where {T<:Real} +function ridgematrix(t::AbstractVector{T}, lambda::Real) where {T<:Real} TT = hcat(t, ones(T, length(t)))' return inv(TT * TT' + lambda .* LinearAlgebra.I(2) ) * TT +end + +function (rr::RidgeRegressionSlope)(x::AbstractVector{<:Real}) + regression_matrix = ridgematrix(eachindex(x), rr.lambda) + return view(regression_matrix, 1, :)' * x # only return slope. end \ No newline at end of file diff --git a/src/analysis/api_significance.jl b/src/significance/api_significance.jl similarity index 71% rename from src/analysis/api_significance.jl rename to src/significance/api_significance.jl index 6a35205c..e3066e1b 100644 --- a/src/analysis/api_significance.jl +++ b/src/significance/api_significance.jl @@ -10,10 +10,10 @@ abstract type TransitionsSignificance end """ - significant_transitions(res::WindowedIndicatorResults, signif::TransitionsSignificance) + significant_transitions(res::WindowResults, signif::TransitionsSignificance) Estimate significant transtions in `res` using the method described by `signif`. Return `flags`, a Boolean matrix with identical size as `res.x_change`. It contains trues wherever a change metric of `res` is deemed significant. """ -function significant_transitions(::WindowedIndicatorResults, ::TransitionsSignificance) end \ No newline at end of file +function significant_transitions(::WindowResults, ::TransitionsSignificance) end \ No newline at end of file diff --git a/src/analysis/quantile_significance.jl b/src/significance/quantile_significance.jl similarity index 88% rename from src/analysis/quantile_significance.jl rename to src/significance/quantile_significance.jl index c571f343..3dfa98bc 100644 --- a/src/analysis/quantile_significance.jl +++ b/src/significance/quantile_significance.jl @@ -2,7 +2,7 @@ QuantileSignificance(; p = 0.95, tail = :both) <: TransitionsSignificance A configuration struct for significance testing [`significant_transitions`](@ref). -When used with [`WindowedIndicatorResults`](@ref), significance is estimated +When used with [`WindowResults`](@ref), significance is estimated by comparing the value of each change metric with its `p`-quantile. Values that exceed the `p`-quantile (if `tail = :right`) or subseed the `1-p`-quantile (if `tail = :left`) @@ -20,7 +20,7 @@ end using Statistics: quantile -function significant_transitions(res::WindowedIndicatorResults, signif::QuantileSignificance) +function significant_transitions(res::WindowResults, signif::QuantileSignificance) flags = similar(res.x_change, Bool) for (i, x) in enumerate(eachcol(res.x_change)) qmin, qmax = quantile(x, (1 - signif.p, signif.p)) @@ -42,7 +42,7 @@ end SigmaSignificance(; factor = 3.0, tail = :both) <: TransitionsSignificance A configuration struct for significance testing [`significant_transitions`](@ref). -When used with [`WindowedIndicatorResults`](@ref), significance is estimated +When used with [`WindowResults`](@ref), significance is estimated by comparing how many standard deviations (`σ`) the value exceeds the mean value (`μ`). Values that exceed (if `tail = :right`) `μ + factor*σ`, or subseed (if `tail = :left`) `μ - factor*σ` are deemed significant. @@ -60,7 +60,7 @@ end using Statistics: std, mean -function significant_transitions(res::WindowedIndicatorResults, signif::SigmaSignificance) +function significant_transitions(res::WindowResults, signif::SigmaSignificance) flags = similar(res.x_change, Bool) for (i, x) in enumerate(eachcol(res.x_change)) μ = mean(x) diff --git a/src/analysis/surrogates_significance.jl b/src/significance/surrogates_significance.jl similarity index 55% rename from src/analysis/surrogates_significance.jl rename to src/significance/surrogates_significance.jl index 5b0a3136..8533bd38 100644 --- a/src/analysis/surrogates_significance.jl +++ b/src/significance/surrogates_significance.jl @@ -15,7 +15,7 @@ using timeseries surrogates. ## Description -When used with [`WindowedIndicatorResults`](@ref), significance is estimated as follows: +When used with [`WindowResults`](@ref), significance is estimated as follows: `n` surrogates from the input timeseries are generated using `surromethod`, which is any `Surrogate` subtype provided by [TimeseriesSurrogates.jl](https://juliadynamics.github.io/TimeseriesSurrogates.jl/dev/api/). @@ -34,7 +34,7 @@ higher change metric, discriminatory statistic values. This is the case for stat that quantify entropy. For statistics that quantify autocorrelation, use `tail = :right` instead. For anything else, use the default `tail = :both`. An iterable of `tail` values can also be given, in which case a specific `tail` -is used for each change metric in [`WindowedIndicatorResults`](@ref). +is used for each change metric in [`WindowResults`](@ref). Note that the raw p-values can be accessed in the field `.pvalues`, after calling the [`significant_transitions`](@ref) function with `SurrogatesSignificance`, in case you wish @@ -57,8 +57,8 @@ function SurrogatesSignificance(; return SurrogatesSignificance(surromethod, n, tail, rng, p, zeros(1,1)) end -function significant_transitions(res::WindowedIndicatorResults, signif::SurrogatesSignificance) - (; indicators, change_metrics) = res.wim +function significant_transitions(res::SlidingWindowResults, signif::SurrogatesSignificance) + (; indicators, change_metrics) = res.config tail = signif.tail if !(tail isa Symbol) && length(tail) ≠ length(indicators) throw(ArgumentError("Given `tail` must be a symbol or an iterable of same length "* @@ -72,7 +72,6 @@ function significant_transitions(res::WindowedIndicatorResults, signif::Surrogat # Dummy vals for surrogate parallelization indicator_dummys = [res.x_indicator[:, 1] for _ in 1:Threads.nthreads()] change_dummys = [res.x_change[:, 1] for _ in 1:Threads.nthreads()] - for i in 1:size(pvalues, 2) # loop over change metrics indicator = indicators[i] i_metric = length(change_metrics) == length(indicators) ? i : 1 @@ -81,17 +80,67 @@ function significant_transitions(res::WindowedIndicatorResults, signif::Surrogat c = view(res.x_change, :, i) # change metric timeseries # p values for current i pval = view(pvalues, :, i) - indicator_metric_surrogates_loop!( + sliding_surrogates_loop!( indicator, chametric, c, pval, signif.n, sgens, indicator_dummys, change_dummys, - res.wim.width_ind, res.wim.stride_ind, res.wim.width_cha, res.wim.stride_cha, tai + res.config.width_ind, res.config.stride_ind, res.config.width_cha, res.config.stride_cha, tai ) end pvalues ./= signif.n return pvalues .< signif.p end -function indicator_metric_surrogates_loop!( +function significant_transitions(res::SegmentWindowResults, signif::SurrogatesSignificance) + # Unpack and sanity checks + X = eltype(res.x_change) + (; indicators, change_metrics, tseg_start, tseg_end) = res.config + tail = signif.tail + if !(tail isa Symbol) && length(tail) ≠ length(indicators) + throw(ArgumentError("Given `tail` must be a symbol or an iterable of same length "* + "as the input indicators. Got length $(length(tail)) instead of $(length(indicators))." + )) + end + # Multi-threaded surrogate realization + seeds = rand(signif.rng, 1:typemax(Int), Threads.nthreads()) + change_dummys = [X(0) for _ in 1:Threads.nthreads()] + pvalues = signif.pvalues = zeros(X, size(res.x_change)...) + + for k in eachindex(tseg_start) # loop over segments + # If segment too short, return inf p-value + if length(res.x_indicator[k][:, 1]) < res.config.min_width_cha + pvalues[k, :] .= Inf + else + tseg, xseg = segment(res.t, res.x, tseg_start[k], tseg_end[k]) + sgens = [surrogenerator(xseg, signif.surrogate, Random.Xoshiro(seed)) + for seed in seeds] + # Dummy vals for surrogate parallelization + indicator_dummys = [res.x_indicator[k][:, 1] for _ in 1:Threads.nthreads()] + for i in eachindex(indicators) + indicator = indicators[i] + i_metric = length(change_metrics) == length(indicators) ? i : 1 + chametric = change_metrics[i_metric] + + # Precomputation: include time vector for metrics that require it! + if chametric isa PrecomputableFunction + chametric = precompute(chametric, res.t_indicator[k]) + end + + tai = tail isa Symbol ? tail : tail[i] + c = res.x_change[k, i] # change metric + pval = view(pvalues, k, i) + # p values for current i + segmented_surrogates_loop!( + indicator, chametric, c, pval, signif.n, sgens, + indicator_dummys, change_dummys, + res.config.width_ind, res.config.stride_ind, tai) + end + end + end + pvalues ./= signif.n + return pvalues .< signif.p +end + +function sliding_surrogates_loop!( indicator, chametric, c, pval, n_surrogates, sgens, indicator_dummys, change_dummys, width_ind, stride_ind, width_cha, stride_cha, tail @@ -101,7 +150,6 @@ function indicator_metric_surrogates_loop!( pval_right = zeros(length(pval)) pval_left = copy(pval_right) end - # parallelized surrogate loop Threads.@threads for _ in 1:n_surrogates id = Threads.threadid() @@ -110,11 +158,41 @@ function indicator_metric_surrogates_loop!( windowmap!(indicator, indicator_dummys[id], s; width = width_ind, stride = stride_ind) windowmap!(chametric, change_dummy, indicator_dummys[id]; - width = width_cha, stride = stride_cha - ) - # accumulate for p-value + width = width_cha, stride = stride_cha) + if tail == :right + pval .+= c .< change_dummy + elseif tail == :left + pval .+= c .> change_dummy + elseif tail == :both + pval_right .+= c .< change_dummy + pval_left .+= c .> change_dummy + end + end + if tail == :both + pval .= 2min.(pval_right, pval_left) + end +end + +function segmented_surrogates_loop!( + indicator, chametric, c, pval, n_surrogates, sgens, + indicator_dummys, change_dummys, width_ind, stride_ind, tail +) + + if tail == :both + pval_right = zeros(length(pval)) + pval_left = copy(pval_right) + end + + # parallelized surrogate loop + Threads.@threads for _ in 1:n_surrogates + id = Threads.threadid() + s = sgens[id]() + change_dummy = change_dummys[id] + windowmap!(indicator, indicator_dummys[id], s; + width = width_ind, stride = stride_ind) + change_dummy = chametric(indicator_dummys[id]) if tail == :right - pval .+= c .< change_dummys[id] + pval .+= c .< change_dummy elseif tail == :left pval .+= c .> change_dummy elseif tail == :both diff --git a/test/full_analysis.jl b/test/full_analysis.jl index 8778d60d..ace87c44 100644 --- a/test/full_analysis.jl +++ b/test/full_analysis.jl @@ -8,7 +8,7 @@ using TransitionsInTimeseries, Test indicators = (mean, var) change_metric = RidgeRegressionSlope() - config = WindowedIndicatorConfig(indicators, change_metric; + config = SlidingWindowConfig(indicators, change_metric; width_ind = 100, stride_ind = 1, width_cha = 100, stride_cha = 1, whichtime = last, )