Merge pull request #73 from JuliaDynamics/slopechange

New pipeline for identifying a "change" in a timeseries: SlopeChange
JuliaDynamics · Feb 23, 2024 · b7719f9 · b7719f9
2 parents b4fc84c + ba153d4
commit b7719f9
Show file tree

Hide file tree

Showing 15 changed files with 233 additions and 24 deletions.
diff --git a/Project.toml b/Project.toml
@@ -10,6 +10,7 @@ FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+LsqFit = "2fda8390-95c7-5789-9bda-21331edee243"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
@@ -29,6 +30,7 @@ FFTW = "^1.6"
 HypothesisTests = "0.11"
 InteractiveUtils = "1"
 LinearAlgebra = "1"
+LsqFit = "0.15"
 Makie = "≥ 0.19"
 Random = "1"
 Reexport = "1.2"

diff --git a/docs/make.jl b/docs/make.jl
@@ -34,4 +34,5 @@ bib = CitationBibliography(joinpath(@__DIR__, "src", "refs.bib"); style=:authory
 
 build_docs_with_style(pages, TransitionsInTimeseries, StatsBase;
     authors = "Jan Swierczek-Jereczek <[email protected]>, "*
-    "George Datseris <[email protected]>", bib)
+    "George Datseris <[email protected]>", bib
+)
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -3,15 +3,33 @@
 ## Main analysis functions
 
 ```@docs
-ChangesConfig
-SlidingWindowConfig
-SegmentedWindowConfig
 estimate_changes
+ChangesConfig
 ChangesResults
+```
+
+### Sliding window
+
+```@docs
+SlidingWindowConfig
 SlidingWindowResults
+```
+
+### Segmented window
+
+```@docs
+SegmentedWindowConfig
 SegmentedWindowResults
 ```
 
+
+### Slope change
+
+```@docs
+SlopeChangeConfig
+SlopeChangeResults
+```
+
 ## Significance testing
 
 ```@docs
@@ -21,10 +39,14 @@ SurrogatesSignificance
 ThresholdSignificance
 SigmaSignificance
 QuantileSignificance
+SlopeChangeSignificance
 ```
 
 ## [Indicators](@id indicators)
 
+Note that any Julia function can be an indicator or change metric,
+so the list here is only just a couple of indicators directly implemented in this package.
+
 ### Value distribution
 
 ```@docs

diff --git a/docs/src/devdocs.md b/docs/src/devdocs.md
@@ -5,7 +5,7 @@ All contributions come in the form of Pull Requests, for which we strongly advis
 
 ## New indicators or change metrics
 
-As explained already in e.g., [`SlidingIndicatorConfig`](@ref), new indicators or change metrics are standard Julia functions, so you only need to define such a function (and document it, test it, etc.).
+As explained already in e.g., [`SlidingWindowConfig`](@ref), new indicators or change metrics are standard Julia functions, so you only need to define such a function (and document it, test it, etc.).
 
 ## New pipeline for estimating changes
 

diff --git a/docs/src/tutorial.md b/docs/src/tutorial.md
@@ -231,8 +231,7 @@ We can conveniently plot the information contained in `results` by using
 `plot_indicator_changes`:
 
 ````@example tutorial
-tv = plot_indicator_changes(results, additional_timeseries = x_nlinear[2:end])
-tv.fig
+fig = plot_indicator_changes(results)
 ````
 
 Step 2 is to estimate significance using [`SurrogatesSignificance`](@ref)
@@ -243,8 +242,8 @@ above with `plot_significance!`:
 ````@example tutorial
 signif = SurrogatesSignificance(n = 1000, tail = [:right, :right])
 flags = significant_transitions(results, signif)
-plot_significance!(tv, signif, flags = flags)
-tv.fig
+plot_significance!(fig, results, signif, flags = flags)
+fig
 ````
 
 ### [Segmented windows] (@id segmented_windows)
@@ -263,8 +262,6 @@ config = SegmentedWindowConfig(indicators, change_metrics,
 results = estimate_changes(config, input, t)
 signif = SurrogatesSignificance(n = 1000, tail = [:right, :right])
 flags = significant_transitions(results, signif)
-tv = plot_changes_significance(results, signif,
-    additional_timeseries = x_nlinear[2:end])
-tv.fig
+fig = plot_changes_significance(results, signif)
 ````
 
diff --git a/paper/code/figure1.jl b/paper/code/figure1.jl
@@ -8,7 +8,7 @@ indicators = (var, ar1_whitenoise)
 change_metrics = (kendalltau, kendalltau)
 config = SegmentedWindowConfig(indicators, change_metrics, [t[1]], [t[end]];
     width_ind = length(x) ÷ 2, whichtime = last, min_width_cha = 50)
-results = estimate_indicator_changes(config, x, t)
+results = estimate_changes(config, x, t)
 signif = SurrogatesSignificance(n = 1000, tail = [:right, :right], rng = Xoshiro(1995))
 flags = significant_transitions(results, signif)
 fig = plot_changes_significance(results, signif)

diff --git a/paper/code/figure2.jl b/paper/code/figure2.jl
@@ -23,7 +23,7 @@ function main()
         t0 = time()
         for i in 1:n
             # Compute the metrics over sliding windows and their significance
-            results = estimate_indicator_changes(config, x, t)
+            results = estimate_changes(config, x, t)
         end
         t_elapsed[i] = (time() - t0) / 2
     end
@@ -32,7 +32,7 @@ function main()
         width_ind = length(x) ÷ 2, stride_ind = 1, whichtime = last, min_width_cha = 1)
     t0 = time()
     for i in 1:n
-        results = estimate_indicator_changes(config, x, t)
+        results = estimate_changes(config, x, t)
     end
     t_elapsed[m+1] = (time() - t0) / 2
 

diff --git a/paper/paper.md b/paper/paper.md
@@ -48,7 +48,7 @@ largely because of the substantial implications of such a collapse for human soc
 A common concern in the scientific community is that published work on the topic is difficult
 to reproduce, despite the impact it implies for humanity.
 This can be largely addressed by a unifying software that is accessible, performant,
-reproducible, reliable and extensible. Such a software does not exist yet, but here 
+reproducible, reliable and extensible. Such a software does not exist yet, but here
 we propose TransitionsInTimeseries.jl to fill this gap.
 We believe this is a major step towards establishing a software as standard, widely used
 by academics working on transitions in timeseries.
@@ -91,7 +91,7 @@ config = SegmentedWindowConfig(indicators, change_metrics, [time[1]], [time[end]
     width_ind = length(residual) ÷ 2, whichtime = last, min_width_cha = 100)
 
 # Compute the metrics over sliding windows and their significance
-results = estimate_indicator_changes(config, data, time)
+results = estimate_changes(config, data, time)
 signif = SurrogatesSignificance(n = 1000, tail = :right, rng = Xoshiro(1995))
 flags = significant_transitions(results, signif)
 
@@ -176,7 +176,7 @@ thus offering optimized routines with numerous surrogate types.
 
 TransitionsInTimeseries.jl covers methods for prediction as well as detection of transitions,
 which is unprecedented to our knowledge. This relies on the definition of different analysis pipelines, which
-consist in a `ChangesConfig` determining the behavior of `estimate_indicator_changes` via
+consist in a `ChangesConfig` determining the behavior of `estimate_changes` via
 multiple dispatch. For instance, a detection
 task can be performed by replacing the `SegmentedWindowConfig` by a `SlidingWindowConfig`
 in the code above:
@@ -189,7 +189,7 @@ indicators = (nothing, nothing)
 change_metrics = (difference_of_mean(), difference_of_max())
 config = SlidingWindowConfig(indicators, change_metrics;
    width_cha = 50, whichtime = midpoint)
-results = estimate_indicator_changes(config, data, time)
+results = estimate_changes(config, data, time)
 ```
 
 We here skip the computation of indicators and compare the difference in mean and maximum
@@ -203,7 +203,7 @@ ways of testing for significance are provided and can be interchangeably used.
 
 Besides choosing among the already provided analysis pipelines,
 the user can implement their own one by defining a new `ChangesConfig` and
-the corresponding behavior of `estimate_indicator_changes`. This makes it particularly
+the corresponding behavior of `estimate_changes`. This makes it particularly
 easy to leverage pre-existing functionalities of TransitionsInTimeseries.jl
 with a minimal restriction on the structure. As explained in the devdocs, the latter eases
 the integration of new methods into a unified framework. This also holds for the

diff --git a/src/TransitionsInTimeseries.jl b/src/TransitionsInTimeseries.jl
@@ -26,9 +26,11 @@ include("misc/precomputation.jl")
 include("analysis/api.jl")
 include("analysis/sliding_window.jl")
 include("analysis/segmented_window.jl")
+include("analysis/slope_change.jl")
 include("significance/api_significance.jl")
 include("significance/surrogates_significance.jl")
 include("significance/basic_stat_significance.jl")
+include("significance/slope_significance.jl")
 
 include("indicators/critical_slowing_down.jl")
 include("indicators/distribution_distance.jl")
@@ -59,6 +61,7 @@ export SlidingWindowResults, SegmentedWindowResults
 export estimate_changes, ChangesResults
 export Significance, significant_transitions, segmented_significance
 export ThresholdSignificance, QuantileSignificance, SigmaSignificance, SurrogatesSignificance
+export SlopeChangeConfig, SlopeChangeResults, SlopeChangeSignificance
 
 # timeseries
 export isequispaced, equispaced_step

diff --git a/src/analysis/slope_change.jl b/src/analysis/slope_change.jl
@@ -0,0 +1,103 @@
+import LsqFit
+
+"""
+    SlopeChangeConfig <: ChangesConfig
+    SlopeChangeConfig(; indicator = nothing, kw...)
+
+A configuration that can be given to [`estimate_changes`](@ref).
+It estimates a change of slope in the timeseries by fitting two
+connected linear segments to the timeseries,
+returning the results (i.e., the two-linear fits) as [`SlopeChangeResults`](@ref).
+
+## Keyword arguments
+- indicator = nothing: if not nothing. Otherwise it should be a function f(x) -> Real.
+  The slope fitting is then done over an indicator of the timeseries, which itself
+  is estimated via a sliding window exactly as in [`SlidingWindowConfig`](@ref).
+- `width_ind, stride_ind, whichtime`: exactly as in [`SlidingWindowConfig`](@ref)
+  if `indicator` is not `nothing`.
+"""
+@kwdef struct SlopeChangeConfig{I, W} <: ChangesConfig
+    indicators::I = nothing
+    width_ind::Int = 100
+    stride_ind::Int = 1
+    whichtime::W = midpoint
+end
+
+function estimate_changes(config::ChangesConfig, x, t = eachindex(x))
+    indicators = config.indicators
+    # initialize time vectors
+    if isnothing(indicators)
+        # Skip indicators if they are nothing
+        t_indicator = t
+        x_indicator = x
+    else
+        t_indicator = windowmap(config.whichtime, t;
+            width = config.width_ind, stride = config.stride_ind
+        )
+        x_indicator = windowmap(config.indicators, x;
+            width = config.width_ind, stride = config.stride_ind
+        )
+    end
+    p0 = guess_initial_p(x, t)
+    fit = LsqFit.curve_fit(twolinear, t_indicator, x_indicator, p0)
+    pbest = LsqFit.coef(fit)
+    a, b, c, d = pbest
+    t_change = (c - a)/(b - d)
+    return SlopeChangeResults(t, x, t_indicator, x_indicator, [t_change], pbest, config, fit)
+end
+
+function guess_initial_p(x, t)
+    midindex = (firstindex(x) + lastindex(x))÷2
+    x1 = x[firstindex(x):midindex]
+    x2 = x[midindex:lastindex(x)]
+    t1 = t[firstindex(x):midindex]
+    t2 = t[midindex:lastindex(x)]
+    a, b = linreg(t1, x1)
+    c, d = linreg(t2, x2)
+    return [a, b, c, d]
+end
+
+import Statistics
+function linreg(x, y)
+    mx = Statistics.mean(x)
+    my = Statistics.mean(y)
+    b = Statistics.covm(x, mx, y, my)/Statistics.varm(x, mx)
+    a = my - b*mx
+    return a, b
+end
+
+function twolinear(t, p)
+    a, b, c, d = p
+    tcrit = (c - a)/(b - d)
+    return @. ifelse(t < tcrit, a + b*t, c + d*t)
+end
+
+"""
+    SlopeChangeResults <: ChangesResults
+
+A struct containing the output of [`estimate_changes`](@ref) used with
+[`SlopeChangeConfig`](@ref). It can be used for further analysis, visualization,
+or given to [`significant_transitions`](@ref). The only significance type
+that you can use this with [`significant_transitions`](@ref) is
+[`SlopeChangeSignificance`](@ref).
+
+It has the following fields that the user may access:
+
+- `x`: the input timeseries.
+- `t`: the time vector of the input timeseries.
+- `x_indicator`, the indicator timeseries.
+- `t_indicator`, the time vector of the indicator timeseries.
+- `t_change`, the time the slope changes.
+- `fitparams = a, b, c, d`, the fitted linear coefficients, `a + b*t` before.
+  `t_change` and `c + d*t` after `t_change`.
+"""
+struct SlopeChangeResults{T, X, W, L} <: ChangesResults
+    t # we don't parameterize these; they are only used for plotting
+    x
+    t_indicator::T
+    x_indicator::X
+    t_change::Vector{Float64}
+    fitparams::Vector{Float64}
+    config::W
+    lsqfit::L
+end
diff --git a/src/significance/slope_significance.jl b/src/significance/slope_significance.jl
@@ -0,0 +1,33 @@
+"""
+    SlopeChangeSignificance(; moe_slope, moe_offset, slope_diff = moe_slope, pvalue = 0.05)
+
+Test whether the result of [`SlopeChangeResults`](@ref) is statistically significant.
+
+Two tests are done:
+1. Check whether the _margin of error_ of the fitted parameters `a, b, c, d`
+   of the two linear segments `a + b*t, c + d*t`
+   is less than the specified margins of error, for a chosen `pvalue`.
+2. Test that the two slopes `b, d` have difference greater than `slope_diff`.
+
+The Boolean `&` of the above two is the final test.
+
+The margin of error is simply half the size of the confidence interval,
+also known as radius of the confidence interval.
+"""
+@kwdef struct SlopeChangeSignificance <: Significance
+    moe_slope::Float64
+    moe_offset::Float64
+    slope_diff::Float64 = moe_slope
+    pvalue::Float64 = 0.05
+end
+
+function significant_transitions(res::SlopeChangeResults, signif::SlopeChangeSignificance)
+    moe = LsqFit.margin_error(res.lsqfit, signif.pvalue)
+    moeflag = (moe[1] ≤ signif.moe_offset &&
+        moe[3] ≤ signif.moe_offset &&
+        moe[2] ≤ signif.moe_slope &&
+        moe[4] ≤ signif.moe_slope)
+
+    slopeflag = abs(res.fitparams[2] - res.fitparams[4]) > moe_slope
+    return [moeflag && slopeflag]
+end
diff --git a/test/Project.toml b/test/Project.toml
@@ -8,3 +8,4 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 TimeseriesSurrogates = "c804724b-8c18-5caa-8579-6025a0767c70"
+StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
diff --git a/test/full_analysis.jl b/test/full_analysis.jl
@@ -23,7 +23,7 @@ end
         width_cha = w, stride_cha = s, whichtime = last,
     )
 
-    res = estimate_indicator_changes(config, x, t)
+    res = estimate_changes(config, x, t)
     ni, nc = length(res.t_indicator), length(res.t_change)
 
     # Mean of identity is identity with offset; its trend is the stride for x=t
@@ -83,7 +83,7 @@ end
         width_ind = w, stride_ind = s, min_width_cha = 30, whichtime = last,
     )
 
-    res = estimate_indicator_changes(config, x, t)
+    res = estimate_changes(config, x, t)
 
     # Mean of identity is identity with offset; its trend is the stride for x=t
     mean_ground_truth = range((w+1)/2, step = s, length = length(t) - w + 1)

diff --git a/test/indicators.jl b/test/indicators.jl
@@ -1,4 +1,5 @@
 using TransitionsInTimeseries, Test, Random, TimeseriesSurrogates, Distributions
+using StableRNGs
 
 # Check if AR1 regression parameter from a known AR1 process with white noise
 # is successfully estimated.
@@ -24,11 +25,11 @@ end
 # Test kolmogorov_smirnov by sampling different distributions
 @testset "kolmogorov_smirnov" begin
     n = 1000
-
+    rng = StableRNG(1234)
     distributions = [Uniform(), Normal(), Binomial()]
     for (i, d1) in enumerate(distributions)
         for (j, d2) in enumerate(distributions)
-            x = vcat(rand(d1, n), rand(d2, n))
+            x = vcat(rand(rng, d1, n), rand(rng, d2, n))
             if i == j
                 @test kolmogorov_smirnov(x) > 0.1
             else