diff --git a/Project.toml b/Project.toml index 3ab4124f..71474f32 100644 --- a/Project.toml +++ b/Project.toml @@ -2,7 +2,7 @@ name = "Associations" uuid = "614afb3a-e278-4863-8805-9959372b9ec2" authors = ["Kristian Agasøster Haaga ", "Tor Einar Møller ", "George Datseris "] repo = "https://github.com/kahaaga/Associations.jl.git" -version = "4.2.0" +version = "4.3.0" [deps] Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" @@ -32,23 +32,23 @@ TimeseriesSurrogates = "c804724b-8c18-5caa-8579-6025a0767c70" [compat] Accessors = "^0.1.28" Combinatorics = "1" -ComplexityMeasures = "3.6.5" +ComplexityMeasures = "3.7.3" DSP = "^0.7" -DelayEmbeddings = "2.7" +DelayEmbeddings = "2.8" Distances = "^0.10" Distributions = "^0.25" Graphs = "^1.11" HypothesisTests = "^0.11" Neighborhood = "^0.2.4" ProgressMeter = "1.10" -RecurrenceAnalysis = "2" +RecurrenceAnalysis = "2.1" Reexport = "1" Scratch = "1" SpecialFunctions = "2" -StateSpaceSets = "^1.5" +StateSpaceSets = "2.1" StaticArrays = "^1" Statistics = "1" StatsBase = "^0.34" StyledStrings = "1" -TimeseriesSurrogates = "2.6" -julia = "^1.10" +TimeseriesSurrogates = "2.7" +julia = "^1.10.6" diff --git a/changelog.md b/changelog.md index c0c4d868..49225b64 100644 --- a/changelog.md +++ b/changelog.md @@ -2,6 +2,11 @@ From version v4.0 onwards, this package has been renamed to to Associations.jl. +# 4.3 + +- Compatiblity with StateSpaceSets.jl v2.X +- Improved documentation strings for `GaoOhViswanath` and `GaoKannanOhViswanath` + # 4.2 - New association measure: `AzadkiaChatterjeeCoefficient`. diff --git a/docs/Project.toml b/docs/Project.toml index 8a97c901..c3872cdf 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -26,4 +26,4 @@ TimeseriesSurrogates = "c804724b-8c18-5caa-8579-6025a0767c70" [compat] DynamicalSystemsBase = "3" -julia = "^1.6" +julia = "^1.10.6" diff --git a/src/Associations.jl b/src/Associations.jl index 58ecf3a4..dfd99233 100644 --- a/src/Associations.jl +++ b/src/Associations.jl @@ -10,17 +10,16 @@ module Associations using Reexport using StateSpaceSets + using DelayEmbeddings: embed, genembed export embed, genembed import HypothesisTests: pvalue export trajectory - @reexport using StateSpaceSets - @reexport using ComplexityMeasures - @reexport using TimeseriesSurrogates - include("utils/utils.jl") + include("core.jl") + include("utils/utils.jl") include("methods/information/information.jl") include("methods/crossmappings/crossmappings.jl") @@ -37,6 +36,9 @@ module Associations include("deprecations/deprecations.jl") + @reexport using StateSpaceSets + @reexport using ComplexityMeasures + @reexport using TimeseriesSurrogates # Update messages: using Scratch display_update = true diff --git a/src/causal_graphs/oce/OCE.jl b/src/causal_graphs/oce/OCE.jl index ad00084b..e1b6364c 100644 --- a/src/causal_graphs/oce/OCE.jl +++ b/src/causal_graphs/oce/OCE.jl @@ -89,7 +89,7 @@ function infer_graph(alg::OCE, x; verbose = true) return select_parents(alg, x; verbose) end -function infer_graph(alg::OCE, x::AbstractDataset; verbose = true) +function infer_graph(alg::OCE, x::AbstractStateSpaceSet; verbose = true) return infer_graph(alg, columns(x); verbose) end diff --git a/src/methods/information/definitions/mutual_informations/mutual_informations.jl b/src/methods/information/definitions/mutual_informations/mutual_informations.jl index 9b7344c7..1f23e59c 100644 --- a/src/methods/information/definitions/mutual_informations/mutual_informations.jl +++ b/src/methods/information/definitions/mutual_informations/mutual_informations.jl @@ -32,13 +32,12 @@ end function marginal_entropies_mi3h_discrete(est::EntropyDecomposition{<:MutualInformation, <:DiscreteInfoEstimator}, x, y) # Encode marginals to integers based on the outcome space. - eX, eY = codified_marginals(est.discretization, x, y) - eXY = StateSpaceSet(eX, eY) + eX::StateSpaceSet, eY::StateSpaceSet = StateSpaceSet.(codified_marginals(est.discretization, x, y)) + eXY::StateSpaceSet = StateSpaceSet(eX, eY) # The outcome space is no longer relevant from this point on. We're done discretizing, # so now we can just count (i.e. use `UniqueElements` as the outcome space). o = UniqueElements() - modified_est = estimator_with_overridden_parameters(est.definition, est.est) HX = information(modified_est, est.pest, o, eX) # estimates entropy in the X marginal HY = information(modified_est, est.pest, o, eY) # estimates entropy in the Y marginal diff --git a/src/methods/information/definitions/transferentropy/transferoperator.jl b/src/methods/information/definitions/transferentropy/transferoperator.jl index 48220bad..e5d30307 100644 --- a/src/methods/information/definitions/transferentropy/transferoperator.jl +++ b/src/methods/information/definitions/transferentropy/transferoperator.jl @@ -1,4 +1,3 @@ - import ComplexityMeasures: TransferOperator, invariantmeasure, InvariantMeasure, Probabilities using ComplexityMeasures.GroupSlices export TransferOperator @@ -47,19 +46,9 @@ end function _marginal_encodings(encoder::RectangularBinEncoding, x::VectorOrStateSpaceSet...) X = StateSpaceSet(StateSpaceSet.(x)...) - bins = [vec(encode_as_tuple(encoder, pt))' for pt in X] + bins = [vec(encode_as_tuple(encoder, pt))' for pt in unique(X.data)] joint_bins = reduce(vcat, bins) - idxs = size.(x, 2) #each input can have different dimensions - s = 1 - encodings = Vector{StateSpaceSet}(undef, length(idxs)) - for (i, cidx) in enumerate(idxs) - variable_subset = s:(s + cidx - 1) - s += cidx - y = @views joint_bins[:, variable_subset] - encodings[i] = StateSpaceSet(y) - end - - return encodings + return StateSpaceSet(joint_bins) end # Only works for `RelativeAmount`, because probabilities are obtained from the @@ -88,9 +77,11 @@ function h4_marginal_probs( # marginals, not a single encoding integer for each bin. Otherwise, we can't # properly subset marginals here and relate them to the approximated invariant measure. encoding = iv.to.encoding + # Visited bins (absolute coordinates) visited_bins_coordinates = StateSpaceSet(decode.(Ref(encoding), iv.to.bins)) - unique_visited_bins = _marginal_encodings(iv.to.encoding, visited_bins_coordinates)[1] + # Visited bins (coordinates encoded to integers using rectangular encoding) + unique_visited_bins = _marginal_encodings(iv.to.encoding, visited_bins_coordinates) # # The subset of visited bins with nonzero measure inds_non0measure = findall(iv.ρ .> 0) positive_measure_bins = unique_visited_bins[inds_non0measure] diff --git a/src/methods/information/estimators/mutual_info_estimators/GaoKannanOhViswanath.jl b/src/methods/information/estimators/mutual_info_estimators/GaoKannanOhViswanath.jl index 55ca8b36..fe56ca9f 100644 --- a/src/methods/information/estimators/mutual_info_estimators/GaoKannanOhViswanath.jl +++ b/src/methods/information/estimators/mutual_info_estimators/GaoKannanOhViswanath.jl @@ -4,7 +4,7 @@ export GaoKannanOhViswanath """ GaoKannanOhViswanath <: MutualInformationEstimator - GaoKannanOhViswanath(; k = 1, w = 0) + GaoKannanOhViswanath(definition = MIShannon(); k = 1, w = 0) The `GaoKannanOhViswanath` (Shannon) estimator is designed for estimating Shannon mutual information between variables that may be either discrete, continuous or @@ -14,6 +14,14 @@ a mixture of both [GaoKannanOhViswanath2017](@cite). - [`MIShannon`](@ref) +## Keyword arguments + +- **`k::Int`**: The number of nearest neighbors to consider. Only information about the + `k`-th nearest neighbor is actually used. +- **`w::Int`**: The Theiler window, which determines if temporal neighbors are excluded + during neighbor searches in the joint space. Defaults to `0`, meaning that only the + point itself is excluded. + ## Usage - Use with [`association`](@ref) to compute Shannon mutual information from input data. diff --git a/src/methods/information/estimators/mutual_info_estimators/GaoOhViswanath.jl b/src/methods/information/estimators/mutual_info_estimators/GaoOhViswanath.jl index 4ecdfd6e..4dc4e052 100644 --- a/src/methods/information/estimators/mutual_info_estimators/GaoOhViswanath.jl +++ b/src/methods/information/estimators/mutual_info_estimators/GaoOhViswanath.jl @@ -3,6 +3,7 @@ export GaoOhViswanath """ GaoOhViswanath <: MutualInformationEstimator + GaoOhViswanath(definition = MIShannon(); k = 1, w = 0) The `GaoOhViswanath` is a mutual information estimator based on nearest neighbors, and is also called the bias-improved-KSG estimator, or BI-KSG, by [Gao2018](@cite). @@ -11,6 +12,14 @@ and is also called the bias-improved-KSG estimator, or BI-KSG, by [Gao2018](@cit - [`MIShannon`](@ref) +## Keyword arguments + +- **`k::Int`**: The number of nearest neighbors to consider. Only information about the + `k`-th nearest neighbor is actually used. +- **`w::Int`**: The Theiler window, which determines if temporal neighbors are excluded + during neighbor searches in the joint space. Defaults to `0`, meaning that only the + point itself is excluded. + ## Usage - Use with [`association`](@ref) to compute Shannon mutual information from input data. diff --git a/src/methods/information/estimators/mutual_info_estimators/KSG1.jl b/src/methods/information/estimators/mutual_info_estimators/KSG1.jl index 71adb71b..926bd53b 100644 --- a/src/methods/information/estimators/mutual_info_estimators/KSG1.jl +++ b/src/methods/information/estimators/mutual_info_estimators/KSG1.jl @@ -68,10 +68,13 @@ const KSG1 = KraskovStögbauerGrassberger1 function association(est::KSG1{<:MIShannon}, x::VectorOrStateSpaceSet...) verify_number_of_inputs_vars(est.definition, length(x)) - (; definition, k, w, metric_joint, metric_marginals) = est - joint = StateSpaceSet(x...) + marginals = map(xᵢ -> StateSpaceSet(xᵢ), x) + # Note: this uses a StateSpaceSet constructor that is overloaded from StateSpaceSets.jl, because the native + # one is extremely slow. + joint::StateSpaceSet = StateSpaceSet(marginals...) + M = length(x) N = length(joint) diff --git a/src/methods/information/estimators/mutual_info_estimators/KSG2.jl b/src/methods/information/estimators/mutual_info_estimators/KSG2.jl index b05ca6aa..8b5dbd44 100644 --- a/src/methods/information/estimators/mutual_info_estimators/KSG2.jl +++ b/src/methods/information/estimators/mutual_info_estimators/KSG2.jl @@ -97,8 +97,10 @@ function association(est::KSG2{<:MIShannon}, x::VectorOrStateSpaceSet...) error("Need at leats two input StateSpaceSets to compute mutual information between them.") (; definition, k, w, metric_joint, metric_marginals) = est - joint = StateSpaceSet(x...) marginals = map(xᵢ -> StateSpaceSet(xᵢ), x) + # Note: this uses a StateSpaceSet constructor that is overloaded from StateSpaceSets.jl, because the native + # one is extremely slow. + joint::StateSpaceSet = StateSpaceSet(marginals...) M = length(x) N = length(joint) diff --git a/src/utils/statespaceset_concat.jl b/src/utils/statespaceset_concat.jl new file mode 100644 index 00000000..b5826165 --- /dev/null +++ b/src/utils/statespaceset_concat.jl @@ -0,0 +1,9 @@ +# This is a workaround until we can make faster horizontal concatenation in StateSpaceSets.jl +import StateSpaceSets: StateSpaceSet +using StaticArrays: SVector +export StateSpaceSet + +function StateSpaceSet(x::VectorOrStateSpaceSet...; container = SVector) + xs = (xᵢ isa AbstractStateSpaceSet ? Matrix(xᵢ) : reshape(xᵢ, length(xᵢ), 1) for xᵢ in x) + StateSpaceSet(hcat(xs...); container) +end \ No newline at end of file diff --git a/src/utils/utils.jl b/src/utils/utils.jl index 5d53c19f..29216b97 100644 --- a/src/utils/utils.jl +++ b/src/utils/utils.jl @@ -1,2 +1,3 @@ include("logs.jl") include("multidimensional_surrogates.jl") +include("statespaceset_concat.jl") \ No newline at end of file diff --git a/test/causal_graphs/oce.jl b/test/causal_graphs/oce.jl index 1f9f7d40..c8a13983 100644 --- a/test/causal_graphs/oce.jl +++ b/test/causal_graphs/oce.jl @@ -5,6 +5,45 @@ using StableRNGs using Graphs.SimpleGraphs: SimpleEdge using DynamicalSystemsBase + +# ---------------------------------------------------------------- +# Check most possible combinations +# ---------------------------------------------------------------- +@testset "OCE: SurrogateAssociationTest with MI/CMI" begin + rng = StableRNG(123) + sys = system(Logistic4Chain(; rng)) + X = columns(first(trajectory(sys, 15, Ttr = 10000))) + + mi_ests = [ + KSG1(MIShannon(), k = 2, w = 1), + KSG2(MIShannon(), k = 2, w = 1), + GaussianMI(), + GaoOhViswanath(), + ChatterjeeCorrelation(), + PearsonCorrelation(), + ] + cmi_ests = [ + FPVP(CMIShannon(), k = 2, w = 1), + MesnerShalizi(CMIShannon(), k = 2, w = 1), + Rahimzamani(CMIShannon(); k = 2, w = 1), + GaussianCMI(), + AzadkiaChatterjeeCoefficient(), + PartialCorrelation(), + ] + for mi_est in mi_ests + utest = SurrogateAssociationTest(mi_est; rng, nshuffles = 2) + for cmi_est in cmi_ests + ctest = LocalPermutationTest(cmi_est; rng, nshuffles = 2) + alg = OCE(; utest, ctest, τmax = 1) + parents = infer_graph(alg, X; verbose = false) + @test parents isa Vector{<:OCESelectedParents} + end + end +end +# ---------------------------------------------------------------- +# A few examples with more data and more iterations +# ---------------------------------------------------------------- + rng = StableRNG(123) sys = system(Logistic4Chain(; rng)) X = columns(first(trajectory(sys, 50, Ttr = 10000))) @@ -20,7 +59,7 @@ parents = infer_graph(alg, X; verbose = true) # Convenience method for `StateSpaceSet`s. d = first(trajectory(sys, 50, Ttr = 10000)) -parents = infer_graph(alg, d; verbose = true) +parents = infer_graph(alg, d; verbose = false) @test parents isa Vector{<:OCESelectedParents} rng = StableRNG(123) @@ -31,7 +70,7 @@ uest = KSG1(MIShannon(); k = 5, w = 1) cest = MesnerShalizi(CMIShannon(); k = 5, w = 1) utest = SurrogateAssociationTest(uest; rng, nshuffles = 19) ctest = LocalPermutationTest(cest; rng, nshuffles = 19) -parents = infer_graph(OCE(; utest, ctest, τmax = 1), X; verbose = true) +parents = infer_graph(OCE(; utest, ctest, τmax = 1), X; verbose = false) @test parents isa Vector{<:OCESelectedParents} g = SimpleDiGraph(parents) @test g isa SimpleDiGraph diff --git a/test/independence/LocalPermutationTest/part_mutual_information.jl b/test/independence/LocalPermutationTest/part_mutual_information.jl index a808677b..a4902c46 100644 --- a/test/independence/LocalPermutationTest/part_mutual_information.jl +++ b/test/independence/LocalPermutationTest/part_mutual_information.jl @@ -10,9 +10,9 @@ Y = StateSpaceSet(y) Z = StateSpaceSet(z) nshuffles = 2 -est_ord = JointProbabilities(PMI(), CodifyVariables(OrdinalPatterns())) -est_vh = JointProbabilities(PMI(), CodifyVariables(ValueHistogram(3))) -est_dp = JointProbabilities(PMI(), CodifyVariables( Dispersion(m = 2))) +est_ord = JointProbabilities(PartialMutualInformation(), CodifyVariables(OrdinalPatterns())) +est_vh = JointProbabilities(PartialMutualInformation(), CodifyVariables(ValueHistogram(3))) +est_dp = JointProbabilities(PartialMutualInformation(), CodifyVariables( Dispersion(m = 2))) lptest_sp = LocalPermutationTest(est_ord; nshuffles, rng) lptest_vh = LocalPermutationTest(est_vh; nshuffles, rng) diff --git a/test/methods/correlation/azadkia_chatterjee_coefficient.jl b/test/methods/correlation/azadkia_chatterjee_coefficient.jl index a016efd2..c8f5b5ce 100644 --- a/test/methods/correlation/azadkia_chatterjee_coefficient.jl +++ b/test/methods/correlation/azadkia_chatterjee_coefficient.jl @@ -18,7 +18,10 @@ Tₙs_pairwise = zeros(10) m = AzadkiaChatterjeeCoefficient() for i = 1:10 - n = 1000; x1, x2 = randn(n), randn(n); y = x1 .^2 .+ x2 .^ 2; z = atan.(x1 ./ x2) + n = 1000; + local x1 = randn(n) + local x2 = randn(n); y = x1 .^2 .+ x2 .^ 2; + local z = atan.(x1 ./ x2) Tₙs_cond[i] = association(m, y, z, x1) Tₙs_pairwise[i] = association(m, y, z) end diff --git a/test/methods/correlation/chatterjee_correlation.jl b/test/methods/correlation/chatterjee_correlation.jl index 8755d678..fcd15cfe 100644 --- a/test/methods/correlation/chatterjee_correlation.jl +++ b/test/methods/correlation/chatterjee_correlation.jl @@ -14,8 +14,8 @@ m = ChatterjeeCorrelation() # We should get exactly the same results for preallocated measure # as for non-preallocated measure. for i = 1:10 - x = rand(rng, 15) - y = rand(rng, 15) + local x = rand(rng, 15) + local y = rand(rng, 15) # We must initialize identical seeds to ensure reproducible results rng_seed = rand(rng, 1:100) diff --git a/test/methods/information/mutual_informations/mi_shannon.jl b/test/methods/information/mutual_informations/mi_shannon.jl index fbd1686b..4d3419bf 100644 --- a/test/methods/information/mutual_informations/mi_shannon.jl +++ b/test/methods/information/mutual_informations/mi_shannon.jl @@ -70,6 +70,16 @@ def = MIShannon() @test association(GaussianMI(def, normalize = false), x, y) isa Real @test association(GaussianMI(def; normalize = true), x, y) isa Real +# input is vector + dataset +x = rand(rng, 30) +Y = StateSpaceSet(rand(rng, 30)) +@test association(KSG1(def, k = 2), x, Y) isa Real +@test association(KSG2(def, k = 2), x, Y) isa Real +@test association(GaoOhViswanath(def, k = 2), x, Y) isa Real +@test association(GaoKannanOhViswanath(def, k = 2), x, Y) isa Real +@test association(GaussianMI(def, normalize = false), x, Y) isa Real +@test association(GaussianMI(def; normalize = true), x, Y) isa Real + # --------------- # Pretty printing # --------------- diff --git a/test/methods/information/transfer_entropies/te_renyi_jizba.jl b/test/methods/information/transfer_entropies/te_renyi_jizba.jl index 58714e0e..d5be3564 100644 --- a/test/methods/information/transfer_entropies/te_renyi_jizba.jl +++ b/test/methods/information/transfer_entropies/te_renyi_jizba.jl @@ -1,13 +1,11 @@ using Test using Associations using Random -rng = MersenneTwister(1234) - -# Double-sum estimation. -x = randn(rng, 100) -y = randn(rng, 100) -z = randn(rng, 100) +using StableRNGs +rng = StableRNG(123) +sys = system(Logistic4Chain(; rng)) +x, y, z, w = columns(first(trajectory(sys, 30, Ttr = 10000))) def = TERenyiJizba(base = 3, q = 0.5) # Here we test all the possible "generic" ways of estimating `TERenyiJizba`. @@ -26,13 +24,29 @@ est_disc = EntropyDecomposition(TERenyiJizba(), PlugIn(Renyi()), discretization) @test association(est_disc, x, z) isa Real @test association(est_disc, x, z, y) isa Real +# Check that in the limit of a lot of points, we roughly get the same answer for transfer +# operator and regular value binning. +x, y, z, w = columns(first(trajectory(sys, 1000, Ttr = 10000))) + +te_def = TERenyiJizba(base = 3, q = 0.5) +def_renyi = Renyi() +disc_vf = CodifyVariables(ValueBinning(2)) +disc_to = CodifyVariables(TransferOperator(RectangularBinning(2, precise))) # + +est_disc_vf = EntropyDecomposition(te_def, PlugIn(def_renyi), disc_vf); +est_disc_to = EntropyDecomposition(te_def, PlugIn(def_renyi), disc_to); +te_vf = association(est_disc_vf, x, z) +te_to = association(est_disc_to, x, z) +@test in_agreement(te_vf, te_to; agreement_threshold = 0.005) # --------------- # Pretty printing # --------------- -out_hdiff = repr(EntropyDecomposition(def, LeonenkoProzantoSavani(Renyi()))) -out_hdisc = repr(EntropyDecomposition(def, PlugIn(Renyi()), CodifyVariables(ValueBinning(2)))) +te_def = TERenyiJizba(base = 3, q = 0.5) +out_hdiff = repr(EntropyDecomposition(te_def, LeonenkoProzantoSavani(Renyi()))) +out_hdisc = repr(EntropyDecomposition(te_def, PlugIn(Renyi()), CodifyVariables(ValueBinning(2)))) @test occursin("TEᵣⱼ(s → t | c) = hᵣ(t⁺, t⁻,c⁻) - hᵣ(t⁻,c⁻) - hᵣ(t⁺,s⁻,t⁻,c⁻) + hᵣ(s⁻,t⁻,c⁻)", out_hdiff) -@test occursin("TEᵣⱼ(s → t | c) = Hᵣ(t⁺, t⁻,c⁻) - Hᵣ(t⁻,c⁻) - Hᵣ(t⁺,s⁻,t⁻,c⁻) + Hᵣ(s⁻,t⁻,c⁻)", out_hdisc) \ No newline at end of file +@test occursin("TEᵣⱼ(s → t | c) = Hᵣ(t⁺, t⁻,c⁻) - Hᵣ(t⁻,c⁻) - Hᵣ(t⁺,s⁻,t⁻,c⁻) + Hᵣ(s⁻,t⁻,c⁻)", out_hdisc) + diff --git a/test/runtests.jl b/test/runtests.jl index aaafd1ee..82a48830 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,7 @@ defaultname(file) = uppercasefirst(replace(splitext(basename(file))[1], '_' => ' testfile(file, testname=defaultname(file)) = @testset "$testname" begin; include(file); end @testset "Associations.jl" begin + include("test_utils.jl") include("test_systems.jl") testfile("deprecations.jl") testfile("methods/methods.jl") diff --git a/test/test_utils.jl b/test/test_utils.jl new file mode 100644 index 00000000..0e5a811e --- /dev/null +++ b/test/test_utils.jl @@ -0,0 +1,10 @@ +# Check if the difference is within a certain threshold percentage. Used to check +# agreement between `ValueBinning` and `TransferOperator` estimation. +function in_agreement(val1, val2; agreement_threshold = 0.02) + largest_magnitude = max(abs(val1), abs(val2)) + if largest_magnitude == 0 + return val1 == val2 + else + return abs(val1 - val2) / largest_magnitude <= agreement_threshold + end +end \ No newline at end of file