diff --git a/Project.toml b/Project.toml index 076f04f..539516e 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "0.3.7-DEV" [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" Requires = "ae029012-a4dd-5104-9daa-d747884805df" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" @@ -28,6 +29,7 @@ Requires = "1.2" StaticArrays = "1.2" Statistics = "1" StatsAPI = "1.6" +LinearAlgebra = "1.6" StatsFuns = "0.9.15, 1" julia = "1.6" diff --git a/docs/Project.toml b/docs/Project.toml index c568697..17599b5 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -3,5 +3,6 @@ DistributionFits = "45214091-1ed4-4409-9bcf-fdb48a05e921" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" Optim = "429524aa-4258-5aef-a3af-852621145aeb" +PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" diff --git a/docs/make.jl b/docs/make.jl index cd6cde8..6494efe 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -29,6 +29,7 @@ makedocs(; "LogitNormal" => "logitnormal.md", "Weibull" => "weibull.md", "Gamma" => "gamma.md", + "MvLogNormal" => "mvlognormal.md", ], "Dependencies" => "set_optimize.md", "API" => "api.md", diff --git a/docs/src/lognormal.md b/docs/src/lognormal.md index 006a8d8..fde54c9 100644 --- a/docs/src/lognormal.md +++ b/docs/src/lognormal.md @@ -21,7 +21,10 @@ d = LogNormal(log(2), log(1.2)) true ``` -Alternatively the distribution can be specified by its mean and ``\sigma^*`` using type [`AbstractΣstar`](@ref) +Alternatively the distribution can be specified by its mean and either +- Multiplicative standard deviation,``\sigma^*``, using type [`AbstractΣstar`](@ref) +- Standard deviation at log-scale, ``\sigma``, or +- relative error, ``cv``. ```jldoctest; output = false, setup = :(using DistributionFits,Optim) d = fit(LogNormal, 2, Σstar(1.2)) @@ -29,6 +32,18 @@ d = fit(LogNormal, 2, Σstar(1.2)) # output true ``` +```jldoctest; output = false, setup = :(using DistributionFits,Optim) +d = fit_mean_Σ(LogNormal, 2, 1.2) +(mean(d), d.σ) == (2, 1.2) +# output +true +``` +```jldoctest; output = false, setup = :(using DistributionFits,Optim) +d = fit_mean_relerror(LogNormal, 2, 0.2) +(mean(d), std(d)/mean(d)) .≈ (2, 0.2) +# output +(true, true) +``` ## Detailed API @@ -37,7 +52,7 @@ true ``` ```@docs -fit(::Type{LogNormal}, ::T, ::AbstractΣstar) where T<:Real +fit(d::Type{LogNormal}, mean, σstar::AbstractΣstar) ``` ```@docs diff --git a/docs/src/mvlognormal.md b/docs/src/mvlognormal.md new file mode 100644 index 0000000..19e79fc --- /dev/null +++ b/docs/src/mvlognormal.md @@ -0,0 +1,22 @@ +```@meta +CurrentModule = DistributionFits +``` + +# Multivariate LogNormal distribution + +Can be fitted to a given mean, provided the Covariance of the underlying +normal distribution. + +```@docs +fit_mean_Σ(::Type{MvLogNormal}, mean::AbstractVector{T1}, Σ::AbstractMatrix{T2}) where {T1 <:Real,T2 <:Real} +``` + +```jldoctest; output = false, setup = :(using DistributionFits) +Σ = hcat([0.6,0.02],[0.02,0.7]) +μ = [1.2,1.3] +d = MvLogNormal(μ, Σ) +d2 = fit_mean_Σ(MvLogNormal, mean(d), Σ) +isapprox(d2, d, rtol = 1e6) +# output +true +``` diff --git a/src/DistributionFits.jl b/src/DistributionFits.jl index 066fd27..91b20f0 100644 --- a/src/DistributionFits.jl +++ b/src/DistributionFits.jl @@ -5,6 +5,7 @@ using Reexport using FillArrays, StaticArrays using StatsFuns: logit, logistic, normcdf +using LinearAlgebra #using Infiltrator if !isdefined(Base, :get_extension) @@ -26,7 +27,8 @@ export @qs_cf90, @qs_cf95, qp, qp_ll, qp_l, qp_m, qp_u, qp_uu, qs_cf90, qs_cf95, - fit_mean_relerror + fit_mean_relerror, + fit_mean_Σ # document but do not export - need to qualify by 'DistributionFits.' # export @@ -53,5 +55,6 @@ end # fitting distributions to stats include("fitstats.jl") include("univariates.jl") +include("multivariates.jl") end diff --git a/src/multivariate/mvlognormal.jl b/src/multivariate/mvlognormal.jl new file mode 100644 index 0000000..a0f74fc --- /dev/null +++ b/src/multivariate/mvlognormal.jl @@ -0,0 +1,26 @@ +""" + fit_mean_Σ(::Type{<:Distribution}, mean, Σ) + +Fit a Distribution to mean and uncertainty quantificator Σ. + +The meaning of `Σ` depends on the type of distribution: +- `MvLogNormal`: the Covariancematrix of the associated normal distribution +- `LogNormal`: the scale parameter, i.e. the standard deviation at log-scale, `σ` +""" +function fit_mean_Σ(::Type{MvLogNormal}, mean::AbstractVector{T1}, Σ::AbstractMatrix{T2}) where {T1 <:Real,T2 <:Real} + _T = promote_type(T1, T2) + fit_mean_Σ(MvLogNormal{_T}, mean, Σ) +end +function fit_mean_Σ(::Type{MvLogNormal{T}}, mean::AbstractVector{T1}, Σ::AbstractMatrix{T2}) where {T, T1 <:Real,T2 <:Real} + meanT = T1 == T ? mean : begin + meanT = similar(mean, T) + meanT .= mean + end + ΣT = T2 == T ? Σ : begin + ΣT = similar(Σ, T) + ΣT .= Σ + end + σ2 = diag(ΣT) + μ = log.(meanT) .- σ2 ./ 2 + MvLogNormal(μ, ΣT) +end \ No newline at end of file diff --git a/src/multivariates.jl b/src/multivariates.jl new file mode 100644 index 0000000..ba9228d --- /dev/null +++ b/src/multivariates.jl @@ -0,0 +1,16 @@ +##### Specific distributions ##### + +for fname in [ + # "dirichlet.jl", + # "multinomial.jl", + # "dirichletmultinomial.jl", + # "jointorderstatistics.jl", + # "mvnormal.jl", + # "mvnormalcanon.jl", + # "mvlogitnormal.jl", + "mvlognormal.jl", + # "mvtdist.jl", + # "vonmisesfisher.jl" + ] +include(joinpath("multivariate", fname)) +end diff --git a/src/univariate/continuous/lognormal.jl b/src/univariate/continuous/lognormal.jl index ad8fcd3..b960f63 100644 --- a/src/univariate/continuous/lognormal.jl +++ b/src/univariate/continuous/lognormal.jl @@ -101,17 +101,20 @@ true σstar(d::LogNormal) = exp(params(d)[2]) """ - fit(D, mean, σstar) + fit(D, mean, σstar::AbstractΣstar) + fit_mean_Σ(D, mean, σ::Real) Fit a statistical distribution of type `D` to mean and multiplicative -standard deviation. +standard deviation, `σstar`, or scale parameter at log-scale: `σ`. # Arguments - `D`: The type of distribution to fit - `mean`: The moments of the distribution - `σstar::AbstractΣstar`: The multiplicative standard deviation +- `σ`: The standard-deviation parameter at log-scale -See also [`σstar`](@ref), [`AbstractΣstar`](@ref). +The first version uses type [`AbstractΣstar`](@ref) to distinguish from +other methods of function fit. # Examples ```jldoctest fm1; output = false, setup = :(using DistributionFits) @@ -121,17 +124,23 @@ d = fit(LogNormal, 2, Σstar(1.1)); true ``` """ -function fit(::Type{LogNormal}, mean::T, σstar::AbstractΣstar) where {T <: Real} - _T = promote_type(T, eltype(σstar)) - fit(LogNormal{_T}, mean, σstar) +function fit(d::Type{LogNormal}, mean, σstar::AbstractΣstar) + fit_mean_Σ(d, mean, log(σstar())) end - -function fit(::Type{LogNormal{T}}, mean::Real, σstar::AbstractΣstar) where {T} - σ = log(σstar()) +function fit(d::Type{LogNormal{T}}, mean::Real, σstar::AbstractΣstar) where {T} + fit_mean_Σ(d, mean, log(σstar())) +end +function fit_mean_Σ(::Type{LogNormal}, mean::T1, σ::T2) where {T1 <: Real,T2 <: Real} + _T = promote_type(T1, T2) + fit_mean_Σ(LogNormal{_T}, mean, σ) +end +function fit_mean_Σ(::Type{LogNormal{T}}, mean::Real, σ::Real) where {T} + #σ = log(σstar()) μ = log(mean) - σ * σ / 2 LogNormal(T(μ), T(σ)) end + """ fit_mean_relerror(D, mean, relerror) diff --git a/test/Project.toml b/test/Project.toml index 0037303..a12d82e 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -2,8 +2,10 @@ Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" +Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36" Optim = "429524aa-4258-5aef-a3af-852621145aeb" +PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" diff --git a/test/multivariate/mvlognormal.jl b/test/multivariate/mvlognormal.jl new file mode 100644 index 0000000..d8c20aa --- /dev/null +++ b/test/multivariate/mvlognormal.jl @@ -0,0 +1,17 @@ +using PDMats +using DistributionFits +using Test + +@testset "fit_mean_Σ" begin + Σ = PDiagMat([0.6,0.7]) + μ = [1.2,1.2] + d = MvLogNormal(μ, Σ) + mean(d) + d2 = fit_mean_Σ(MvLogNormal, mean(d), params(d)[2]) + @test d2 ≈ d rtol = 1e6 + # + # Float32 + d2_f32 = fit_mean_Σ(MvLogNormal, Float32.(mean(d)), Float32.(params(d)[2])) + @test d2 ≈ d rtol = 1e6 + @test partype(d2_f32) === Float32 +end; diff --git a/test/multivariate/test_multivariate.jl b/test/multivariate/test_multivariate.jl new file mode 100644 index 0000000..2dc9f7d --- /dev/null +++ b/test/multivariate/test_multivariate.jl @@ -0,0 +1,134 @@ +using DistributionFits +using Test +using Random: Random +using LoggingExtras +using Optim + +pkgdir = dirname(dirname(pathof(DistributionFits))) +testdir = joinpath(pkgdir, "test") +include(joinpath(testdir,"testutils.jl")) + +function test_univariate_fits(d, D = typeof(d)) + @testset "fit moments" begin + if !occursin("fit(::Type{D}", + string(first(methods(fit, (Type{typeof(d)}, AbstractMoments))))) + m = Moments(mean(d), var(d)) + d_fit = fit(D, m) + @test d ≈ d_fit + @test partype(d_fit) == partype(d) + end + end + @testset "fit two quantiles" begin + qpl = @qp_l(quantile(d, 0.05)) + qpu = @qp_u(quantile(d, 0.95)) + d_fit = fit(D, qpl, qpu) + @test quantile.(d, [qpl.p, qpu.p]) ≈ [qpl.q, qpu.q] + d_fit = fit(D, qpl, qpu) + @test quantile.(d, [qpl.p, qpu.p]) ≈ [qpl.q, qpu.q] + d_fit = fit(D, qpu, qpl) # sort + @test quantile.(d, [qpl.p, qpu.p]) ≈ [qpl.q, qpu.q] + @test partype(d_fit) == partype(d) + end + @testset "fit two quantiles, function version" begin + P = partype(d) + qpl = qp_l(P(quantile(d, 0.05))) + qpu = qp_u(P(quantile(d, 0.95))) + d_fit = fit(D, qpl, qpu) + @test quantile.(d, [qpl.p, qpu.p]) ≈ [qpl.q, qpu.q] + d_fit = fit(D, qpl, qpu) + @test quantile.(d, [qpl.p, qpu.p]) ≈ [qpl.q, qpu.q] + d_fit = fit(D, qpu, qpl) # sort + @test quantile.(d, [qpl.p, qpu.p]) ≈ [qpl.q, qpu.q] + @test partype(d_fit) == partype(d) + end + @testset "typeof mean, mode equals partype" begin + if !(d isa Gamma && first(params(d)) < 1) + @test mean(d) isa partype(d) + @test mode(d) isa partype(d) + end + end + @testset "quantile is of eltype" begin + # quantile still Float64 for Normal of eltype Float32 + if d isa Normal && eltype(d) != Float64 + @test_broken quantile(d, 0.1) isa eltype(d) + else + @test quantile(d, 0.1) isa eltype(d) + end + # quantile is sample-like: stick to eltype - special of normal + # broken, because quantile Normal{Float32} returns Float32 + # but eltype(D{Float32}) is Float64 + if d isa Union{LogNormal, LogitNormal, Exponential, Laplace, Weibull} && + partype(d) != eltype(d) + @test_broken quantile(d, 0.1f0) isa eltype(d) + else + @test quantile(d, 0.1f0) isa eltype(d) + end + end + @testset "fit to quantilepoint and mean" begin + if !occursin("fit_mean_quantile(::Type{D}", + string(first(methods(fit_mean_quantile, + (Type{typeof(d)}, partype(d), QuantilePoint))))) + m = log(mean(d)) + qp = @qp_u(quantile(d, 0.95)) + logger = d isa Exponential ? MinLevelLogger(current_logger(), Logging.Error) : + current_logger() + with_logger(logger) do + d_fit = fit_mean_quantile(D, mean(d), qp) + @test d_fit ≈ d + @test partype(d_fit) == partype(d) + d_fit = fit(D, mean(d), qp, Val(:mean)) + @test d_fit ≈ d + @test partype(d_fit) == partype(d) + # with lower quantile + qp = @qp_l(quantile(d, 0.05)) + d_fit = fit_mean_quantile(D, mean(d), qp) + @test d_fit ≈ d + @test partype(d_fit) == partype(d) + end + # very close to mean can give very different results: + # qp = @qp(mean(d)-1e-4,0.95) + # d_fit = fit_mean_quantile(D, mean(d), qp) + # @test mean(d_fit) ≈ mean(d) && quantile(d_fit, qp.p) ≈ qp.q + end + end + @testset "fit to quantilepoint and mode" begin + if !(d isa Gamma && first(params(d)) < 1) && + !(d isa Weibull) + qp = qp_u(quantile(d, 0.95)) + d_fit = fit_mode_quantile(D, mode(d), qp) + @test d_fit≈d atol=0.1 + d_fit = fit(D, mode(d), qp, Val(:mode)) + @test d_fit≈d atol=0.1 + @test partype(d_fit) == partype(d) + # with lower quantile + qp = qp_ll(quantile(d, 0.025)) + d_fit = fit(D, mode(d), qp, Val(:mode)) + @test mode(d_fit) ≈ mode(d) + @test quantile(d_fit, qp.p)≈qp.q atol=0.01 + @test partype(d_fit) == partype(d) + end + end + @testset "fit to quantilepoint and median" begin + qp = @qp_u(quantile(d, 0.95)) + logger = d isa Exponential ? MinLevelLogger(current_logger(), Logging.Error) : + current_logger() + with_logger(logger) do + d_fit = fit(D, median(d), qp, Val(:median)) + @test d_fit ≈ d + @test partype(d_fit) == partype(d) + end + end +end + + +const tests = [ + "mvlognormal", +] +#tests = ["mvlognormal"] + +for t in tests + @testset "Test $t" begin + Random.seed!(345679) + include(joinpath(testdir,"multivariate","$t.jl")) + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 75826fc..4dfe22e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,7 @@ tmpf = () -> begin - push!(LOAD_PATH, expanduser("~/julia/devtools/")) # access local pack - push!(LOAD_PATH, joinpath(pwd(), "test/")) # access local pack + pop!(LOAD_PATH) + push!(LOAD_PATH, joinpath(pwd(), "test/")) + push!(LOAD_PATH, expanduser("~/julia/devtools_$(VERSION.major).$(VERSION.minor)")) end using Test, SafeTestsets @@ -15,6 +16,8 @@ const GROUP = get(ENV, "GROUP", "All") # defined in in CI.yml @time @safetestset "fitstats" include("fitstats.jl") #@safetestset "Tests" include("test/univariate/test_univariate.jl") @time @safetestset "test_univariate" include("univariate/test_univariate.jl") + #@safetestset "Tests" include("test/multivariate/test_multivariate.jl") + @time @safetestset "test_multivariate" include("multivariate/test_multivariate.jl") end if GROUP == "All" || GROUP == "JET" #@safetestset "Tests" include("test/test_JET.jl") diff --git a/test/test_JET.jl b/test/test_JET.jl index f54ee5d..c81294a 100644 --- a/test/test_JET.jl +++ b/test/test_JET.jl @@ -1,10 +1,18 @@ using DistributionFits using Test using JET: JET +using Logging, LoggingExtras @testset "JET" begin @static if VERSION ≥ v"1.9.2" - JET.test_package(DistributionFits; target_modules = (@__MODULE__,)) + logger = ActiveFilteredLogger(current_logger()) do args + is_filtered = !isnothing(match(r"overwritten in module .+ on the same line", args.message)) + @show is_filtered, args.message + is_filtered + end + with_logger(logger) do + JET.test_package(DistributionFits; target_modules = (@__MODULE__,)) + end end end; # JET.report_package(DistributionFits) # to debug the errors diff --git a/test/univariate/continuous/lognormal.jl b/test/univariate/continuous/lognormal.jl index 7700af0..e0347db 100644 --- a/test/univariate/continuous/lognormal.jl +++ b/test/univariate/continuous/lognormal.jl @@ -24,6 +24,11 @@ end; # dfit32 = fit(LogNormal, Float32(mean(d)), Σstar(1.2f0)) @test partype(dfit32) === Float32 + # + # former fit without type was type piracy + # d_fit = fit(LogNormal, mean(d), log(1.2)) + # @test d == d_fit + # @test σstar(d) ≈ 1.2 end; @testset "fit_mean_relerror" begin