diff --git a/Project.toml b/Project.toml index 5b290a67..9dbd3f90 100644 --- a/Project.toml +++ b/Project.toml @@ -12,6 +12,7 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" Format = "1fa38f19-a742-5d3f-a2b9-30dd87b9d5f8" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +GaussianMixtures = "cc18c42c-b769-54ff-9e2a-b28141a64aae" IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6" @@ -19,6 +20,7 @@ KernelDensity = "5ab0869b-81aa-558d-bb23-cbf5423bbe9b" LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearRegression = "92481ed7-9fb7-40fd-80f2-46fd0f076581" +LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688" LsqFit = "2fda8390-95c7-5789-9bda-21331edee243" Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7" Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e" @@ -43,8 +45,8 @@ Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" ValueShapes = "136a8f8c-c49b-4edb-8b98-f3d64d48be8f" [weakdeps] -RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" [extensions] LegendSpecFitsRecipesBaseExt = ["RecipesBase", "Plots"] @@ -59,11 +61,13 @@ Distributions = "0.24, 0.25" FillArrays = "0.7, 0.8, 0.9, 0.10, 0.11, 0.12, 0.13, 1" Format = "1.2, 1.3" ForwardDiff = "0.10" +GaussianMixtures = "0.3" IntervalSets = "0.7" InverseFunctions = "0.1" IrrationalConstants = "0.1, 0.2" KernelDensity = "0.5, 0.6" LaTeXStrings = "1.3" +LogExpFunctions = "0.3" LinearAlgebra = "1" LinearRegression = "0.2" LsqFit = "0.14, 0.15" diff --git a/ext/LegendSpecFitsRecipesBaseExt.jl b/ext/LegendSpecFitsRecipesBaseExt.jl index 888530ab..246dd6fd 100644 --- a/ext/LegendSpecFitsRecipesBaseExt.jl +++ b/ext/LegendSpecFitsRecipesBaseExt.jl @@ -490,6 +490,198 @@ end end end +@recipe function f(report::NamedTuple{(:peakpos, :peakpos_cal, :h_uncal, :h_calsimple)}; cal=true) + legend := :topright + size := (1000, 600) + thickness_scaling := 1.5 + framestyle := :box + yformatter := :plain + if cal + h = LinearAlgebra.normalize(report.h_calsimple, mode = :density) + xlabel := "Peak Amplitudes (P.E.)" + ylabel := "Counts / $(round_wo_units(step(first(h.edges)), digits=2)) P.E." + xticks := (0:0.5:last(first(h.edges))) + pps = report.peakpos_cal + else + h = LinearAlgebra.normalize(report.h_uncal, mode = :density) + xlabel := "Peak Amplitudes (ADC)" + ylabel := "Counts / $(round_wo_units(step(first(h.edges)), digits=2)) ADC" + pps = report.peakpos + end + xlims := (0, last(first(h.edges))) + min_y = minimum(h.weights) == 0.0 ? 1e-3*maximum(h.weights) : 0.8*minimum(h.weights) + ylims --> (min_y, maximum(h.weights)*1.1) + @series begin + seriestype := :stepbins + label := "amps" + h + end + y_vline = min_y:1:maximum(h.weights)*1.1 + for (i, p) in enumerate(pps) + @series begin + seriestype := :line + if i == 1 + label := "Peak Pos. Guess" + else + label := "" + end + color := :red + linewidth := 1.5 + fill(p, length(y_vline)), y_vline + end + end +end + +@recipe function f(report_sipm::NamedTuple{(:h_cal, :f_fit, :f_fit_components, :min_pe, :max_pe, :bin_width, :n_mixtures, :n_pos_mixtures, :peaks, :positions, :μ , :gof)}; xerrscaling=1, show_residuals=true, show_peaks=true, show_components=false) + legend := :topright + size := (1000, 600) + margins := (4, :mm) + thickness_scaling := 1.5 + framestyle := :box + yformatter := :plain + foreground_color_legend := :silver + background_color_legend := :white + ylabel := "Counts / $(round_wo_units(report_sipm.bin_width * 1e3, digits=2))E-3 P.E." + xlims := (first(first(report_sipm.h_cal.edges)), last(first(report_sipm.h_cal.edges))) + xticks := (ceil(first(first(report_sipm.h_cal.edges)))-0.5:0.5:last(first(report_sipm.h_cal.edges))) + min_y = minimum(report_sipm.h_cal.weights) == 0.0 ? 1e-3*maximum(report_sipm.h_cal.weights) : 0.8*minimum(report_sipm.h_cal.weights) + ylims := (min_y, maximum(report_sipm.h_cal.weights)*1.1) + bin_centers = collect(report_sipm.h_cal.edges[1])[1:end-1] .+ diff(collect(report_sipm.h_cal.edges[1]))[1]/2 + @series begin + yscale --> :log10 + label := "Amplitudes" + subplot --> 1 + seriestype := :bar + alpha --> 1.0 + fillalpha --> 0.85 + fillcolor --> :lightgrey + linecolor --> :lightgrey + fillrange := 1e-1 + bins --> :sqrt + bar_width := diff(report_sipm.h_cal.edges[1])[1] + bin_centers, report_sipm.h_cal.weights + end + @series begin + seriestype := :line + if !isempty(report_sipm.gof) + label := "Best Fit (p = $(round(report_sipm.gof.pvalue, digits=2)))" + else + label := "Best Fit" + end + if show_residuals && !isempty(report_sipm.gof) + xlabel := "" + xticks := [] + else + xlabel := "Peak Amplitudes (P.E.)" + end + subplot --> 1 + color := :black + linewidth := 1.5 + report_sipm.min_pe:report_sipm.bin_width/100:report_sipm.max_pe, report_sipm.f_fit + end + if show_components + for (i, μ) in enumerate(report_sipm.μ) + @series begin + seriestype := :line + if i == 1 + label := "Mixture Components" + else + label := "" + end + if show_residuals && !isempty(report_sipm.gof) + xlabel := "" + xticks := [] + else + xlabel := "Peak Amplitudes (P.E.)" + end + subplot --> 1 + color := i + 1 + length(report_sipm.positions) + linestyle := :dash + linewidth := 1.3 + # fillalpha := 1 + alpha := 0.4 + xi = report_sipm.min_pe:report_sipm.bin_width/100:report_sipm.max_pe + yi = Base.Fix2(report_sipm.f_fit_components, i).(xi) + # ribbon := (yi .- 1, zeros(length(xi))) + xi, yi + end + end + end + if show_peaks + y_vline = [min_y, maximum(report_sipm.h_cal.weights)*1.1] + for (i, p) in enumerate(report_sipm.positions) + @series begin + seriestype := :line + if xerrscaling == 1 + label := "$(report_sipm.peaks[i]) P.E. [$(report_sipm.n_pos_mixtures[i]) Mix.]" + else + label := "$(report_sipm.peaks[i]) P.E. [$(report_sipm.n_pos_mixtures[i]) Mix.] (error x$xerrscaling)" + end + subplot --> 1 + color := i + 1 + linewidth := 1.5 + fill(value(p), length(y_vline)), y_vline + end + @series begin + seriestype := :vspan + label := "" + fillalpha := 0.1 + subplot --> 1 + if show_residuals && !isempty(report_sipm.gof) + xlabel := "" + xticks := [] + else + xlabel := "Peak Amplitudes (P.E.)" + end + color := i + 1 + [value(p) - xerrscaling * uncertainty(p), value(p) + xerrscaling * uncertainty(p)] + end + end + end + if show_residuals && !isempty(report_sipm.gof) + link --> :x + layout --> @layout([a{0.7h}; b{0.3h}]) + @series begin + seriestype := :hline + ribbon := 3 + subplot --> 2 + fillalpha := 0.5 + label := "" + fillcolor := :lightgrey + linecolor := :darkgrey + [0.0] + end + @series begin + seriestype := :hline + ribbon := 1 + subplot --> 2 + fillalpha := 0.5 + label := "" + fillcolor := :grey + linecolor := :darkgrey + [0.0] + end + @series begin + seriestype := :scatter + subplot --> 2 + label := "" + title := "" + markercolor --> :darkgrey + markersize --> 3.0 + markerstrokewidth := 0.1 + ylabel := "Residuals (σ)" + xlabel := "Peak Amplitudes (P.E.)" + link --> :x + top_margin --> (-8, :mm) + ylims := (-6, 6) + xlims := (first(first(report_sipm.h_cal.edges)), last(first(report_sipm.h_cal.edges))) + yscale --> :identity + yticks := ([-3, 0, 3]) + report_sipm.gof.bin_centers, [ifelse(abs(r) < 1e-6, 0.0, r) for r in report_sipm.gof.residuals_norm] + end + end +end + @recipe function f(report_ctc::NamedTuple{(:peak, :window, :fct, :bin_width, :bin_width_qdrift, :e_peak, :e_ctc, :qdrift_peak, :h_before, :h_after, :fwhm_before, :fwhm_after, :report_before, :report_after)}) if !("StatsPlots" in string.(Base.loaded_modules_array())) throw(ErrorException("StatsPlots not loaded. Please load StatsPlots before using this recipe.")) @@ -608,9 +800,9 @@ end framestyle := :box xformatter := :plain yformatter := :plain + margins := (0, :mm) if !isempty(report.gof) layout --> @layout([a{0.8h}; b{0.2h}]) - margins --> (-11.5, :mm) link --> :x end @series begin @@ -643,6 +835,10 @@ end label := "Data (x-Error x$(xerrscaling), y-Error x$(yerrscaling))" end markercolor --> :black + if !isempty(report.gof) + xguide := "" + xticks := [] + end xerror := uncertainty.(report.x) .* xerrscaling yerror := uncertainty.(report.y) .* yerrscaling value.(report.x), value.(report.y) @@ -663,6 +859,10 @@ end ms --> 3 markershape --> :circle markerstrokecolor --> :black + if !isempty(report.gof) + xguide := "" + xticks := [] + end linewidth --> 0.5 markercolor --> :silver xerror := uncertainty.(additional_pts.x) .* xerrscaling @@ -709,7 +909,8 @@ end subplot --> 2 label --> "" markercolor --> :black - ylabel --> "Residuals (σ)" + yguide := "Residuals (σ)" + top_margin --> (-4, :mm) ylims --> (-5, 5) yticks --> ([-3, 0, 3]) value.(report.x), report.gof.residuals_norm @@ -729,7 +930,6 @@ end else NamedTuple() end - xlabel := "Energy (keV)" legend := :topleft framestyle := :box xlims := (0, 3000) @@ -737,6 +937,7 @@ end @series begin grid --> :all xerrscaling --> xerrscaling + xlabel := "Energy (keV)" yerrscaling --> yerrscaling additional_pts --> additional_pts (par = report.par, f_fit = report.f_fit, x = report.x, y = report.y, gof = get(report, :gof, NamedTuple())) @@ -762,36 +963,22 @@ end if report.type == :cal additional_pts = if !isempty(additional_pts) μ_cal = report.f_fit.(additional_pts.μ) .* report.e_unit - (x = additional_pts.μ, y = additional_pts.peaks, - residuals_norm = (value.(μ_cal) .- additional_pts.peaks)./ uncertainty.(μ_cal)) + (x = additional_pts.μ, y = ustrip.(report.e_unit, additional_pts.peaks), + residuals_norm = (value.(μ_cal) .- additional_pts.peaks) ./ uncertainty.(μ_cal)) else NamedTuple() end - xlabel := "Energy (ADC)" - legend := :bottomright framestyle := :box - xlims := (0, 168000) - xticks := (0:16000:176000) + xlims := (0, 1.1*maximum(value.(report.x))) @series begin + xlabel := "Energy (ADC)" + ylabel := "Energy ($(report.e_unit))" grid --> :all xerrscaling --> xerrscaling yerrscaling --> yerrscaling additional_pts := additional_pts (par = report.par, f_fit = report.f_fit, x = report.x, y = report.y, gof = report.gof) end - @series begin - seriestype := :hline - label := L"Q_{\beta \beta}" - color := :green - fillalpha := 0.2 - linewidth := 2.5 - xticks := :none - ylabel := "Energy ($(report.e_unit))" - ylims := (0, 1.2*value(maximum(report.y))) - yticks := (500:500:3000) - subplot := 1 - [2039] - end end end diff --git a/src/LegendSpecFits.jl b/src/LegendSpecFits.jl index d175f167..c8929dd7 100644 --- a/src/LegendSpecFits.jl +++ b/src/LegendSpecFits.jl @@ -14,13 +14,16 @@ using Random using ArgCheck using ArraysOfArrays using BAT +using DensityInterface using Distributions using FillArrays using Format using ForwardDiff +using GaussianMixtures using IntervalSets using InverseFunctions using IrrationalConstants +using LogExpFunctions using LsqFit using Measurements using Measurements: value @@ -31,6 +34,7 @@ using OptimizationNLopt using OptimizationOptimJL using PropDicts using RadiationSpectra +using RadiationSpectra: peakfinder using Roots using SpecialFunctions using StatsBase @@ -74,6 +78,7 @@ include("pseudo_prior.jl") include("specfit_functions.jl") include("calfunc.jl") include("sipm_simple_calibration.jl") +include("sipmfit.jl") abstract type UncertTag end ForwardDiff.:(≺)(::Type{<:ForwardDiff.Tag}, ::Type{UncertTag}) = true ForwardDiff.:(≺)(::Type{UncertTag}, ::Type{<:ForwardDiff.Tag}) = false diff --git a/src/fit_calibration.jl b/src/fit_calibration.jl index 1e359673..be5cf033 100644 --- a/src/fit_calibration.jl +++ b/src/fit_calibration.jl @@ -13,7 +13,7 @@ function fit_calibration(pol_order::Int, µ::AbstractVector{<:Union{Unitful.Real @assert length(peaks) == length(μ) "Number of calibration points does not match the number of energies" @assert pol_order >= 1 "The polynomial order must be greater than 0" - e_unit = u"keV" + e_unit = unit(first(peaks)) # make all inputs unitless with the dimension e_unit μ_nounit = if !Unitful.isunitless(unit(first(μ))) @warn "µ has a unit, it will be converted to $(e_unit) and stripped." diff --git a/src/gof.jl b/src/gof.jl index b4b7d4af..61234b22 100644 --- a/src/gof.jl +++ b/src/gof.jl @@ -16,10 +16,10 @@ end """ - _get_model_counts(f_fit::Base.Callable,v_ml::NamedTuple,bin_centers::StepRangeLen,bin_widths::StepRangeLen) + _get_model_counts(f_fit::Base.Callable,v_ml::Union{NamedTuple, AbstractVector},bin_centers::StepRangeLen,bin_widths::StepRangeLen) aux. function to get modelled peakshape based on histogram binning and best-fit parameter """ -function _get_model_counts(f_fit::Base.Callable, v_ml::NamedTuple, bin_centers::Union{StepRangeLen, Vector{<:Real}}, bin_widths::Union{StepRangeLen, Vector{<:Real}}) +function _get_model_counts(f_fit::Base.Callable, v_ml::Union{NamedTuple, AbstractVector}, bin_centers::Union{StepRangeLen, Vector{<:Real}}, bin_widths::Union{StepRangeLen, Vector{<:Real}}) model_func = Base.Fix2(f_fit, v_ml) # fix the fit parameters to ML best-estimate model_counts = bin_widths .* map(energy -> model_func(energy), bin_centers) # evaluate model at bin center (= binned measured energies) return model_counts @@ -28,7 +28,7 @@ end """ - p_value(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::NamedTuple) + p_value(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::Union{NamedTuple, AbstractVector}) calculate p-value based on least-squares, assuming gaussian uncertainty baseline method to get goodness-of-fit (gof) # input: @@ -40,7 +40,7 @@ baseline method to get goodness-of-fit (gof) * `chi2` chi2 value * `dof` degrees of freedom """ -function p_value(fit_func::Base.Callable, h::Histogram{<:Real,1}, v_ml::NamedTuple) +function p_value(fit_func::Base.Callable, h::Histogram{<:Real,1}, v_ml::Union{NamedTuple, AbstractVector}) # prepare data counts, bin_widths, bin_centers = _prepare_data(h) @@ -67,10 +67,10 @@ export p_value """ - p_value_poissonll(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::NamedTuple) + p_value_poissonll(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::Union{NamedTuple, AbstractVector}) p-value via poisson likelihood ratio: baseline for ML fits using Poisson statistics and bins with low number of counts """ -function p_value_poissonll(fit_func::Base.Callable, h::Histogram{<:Real,1}, v_ml::NamedTuple) +function p_value_poissonll(fit_func::Base.Callable, h::Histogram{<:Real,1}, v_ml::Union{NamedTuple, AbstractVector}) counts, bin_widths, bin_centers = _prepare_data(h) # prepare data model_func = Base.Fix2(fit_func, v_ml) # fix the fit parameters to ML best-estimate @@ -155,7 +155,7 @@ end export p_value_MC """ - residuals(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::NamedTuple) + residuals(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::Union{NamedTuple, AbstractVector}) Calculate bin-wise residuals and normalized residuals. Calcualte bin-wise p-value based on poisson distribution for each bin. @@ -169,7 +169,7 @@ Calcualte bin-wise p-value based on poisson distribution for each bin. * `residuals_norm` normalized residuals: model - data / sqrt(model) * `p_value_binwise` p-value for each bin based on poisson distribution """ -function get_residuals(f_fit::Base.Callable, h::Histogram{<:Real,1}, v_ml::NamedTuple) +function get_residuals(f_fit::Base.Callable, h::Histogram{<:Real,1}, v_ml::Union{NamedTuple, AbstractVector}) # prepare data counts, bin_widths, bin_centers = _prepare_data(h) diff --git a/src/sipm_simple_calibration.jl b/src/sipm_simple_calibration.jl index aa68afb6..d7aa5fa6 100644 --- a/src/sipm_simple_calibration.jl +++ b/src/sipm_simple_calibration.jl @@ -31,25 +31,33 @@ function sipm_simple_calibration(pe_uncal::Vector{<:Real}; kwargs...) h_uncal, peakpos = if expect_noise_peak - find_peaks_noise_peak_exists(pe_uncal; kwargs...) + find_peaks_noise_peak_exists(pe_uncal; kwargs...) else - find_peaks(pe_uncal; kwargs...) + find_peaks(pe_uncal; kwargs...) end # simple calibration + sort!(peakpos) gain = peakpos[2] - peakpos[1] c = 1/gain offset = - (peakpos[1] * c - 1) - f_simple_calib = Base.Fix1(*, c) + f_simple_calib = x -> x .* c .+ offset + f_simple_uncal = x -> (x .- offset) ./ c pe_simple_cal = pe_uncal .* c .+ offset peakpos_cal = peakpos .* c .+ offset - h_calsimple = histogram(pe_simple_cal, bins=0.5:.01:4.5) + bin_width_cal = get_friedman_diaconis_bin_width(filter(in(0.5..1.5), pe_simple_cal)) + bin_width_uncal = get_friedman_diaconis_bin_width(filter(in( (0.5 - offset) / c .. (1.5 - offset) / c), pe_simple_cal)) + + h_calsimple = fit(Histogram, pe_simple_cal, 0.0:bin_width_cal:6.0) + h_uncal = fit(Histogram, pe_uncal, 0.0:bin_width_uncal:(6.0 - offset) / c) result = ( pe_simple_cal = pe_simple_cal, - func = f_simple_calib, + peakpos = peakpos, + f_simple_calib = f_simple_calib, + f_simple_uncal = f_simple_uncal, c = c, offset = offset ) @@ -102,19 +110,19 @@ function find_peaks_noise_peak_exists( h_uncal = fit(Histogram, amps, min_amp:bin_width:max_amp) h_decon, peakpos = peakfinder(h_uncal, σ=peakfinder_σ, backgroundRemove=true, threshold=peakfinder_threshold) - println("Current peak positions: ", peakpos) + @debug("Current peak positions: ", peakpos) num_peaks = length(peakpos) # Safety check to avoid infinite loops if min_amp >= 50 - error("Unable to exclude noise peak within reasonable min_amp range.") + @error("Unable to exclude noise peak within reasonable min_amp range.") end end # If more than two peaks are found, reduce max_quantile to find exactly two peaks if num_peaks > 2 - println("Found more than 2 peaks. Reducing max range. Currently: quantile $max_quantile") + @debug("Found more than 2 peaks. Reducing max range. Currently: quantile $max_quantile") while num_peaks != 2 max_quantile -= 0.01 @@ -127,7 +135,7 @@ function find_peaks_noise_peak_exists( # Safety check to avoid infinite loops if max_quantile <= 0.5 - error("Unable to find exactly two peaks within reasonable quantile range.") + @error("Unable to find exactly two peaks within reasonable quantile range.") end end end @@ -179,7 +187,7 @@ function find_peaks( # Safety check to avoid threshold becoming too low if peakfinder_threshold < 3.0 - error("Unable to find more than one peak within reasonable quantile range.") + @error("Unable to find more than one peak within reasonable quantile range.") end # Reset σ to its initial value after adjusting threshold @@ -188,7 +196,7 @@ function find_peaks( # Safety check to avoid infinite loops if peakfinder_σ >= 5.0 && peakfinder_threshold < 3.0 - error("Unable to find more than one peak within reasonable quantile range.") + @error("Unable to find more than one peak within reasonable quantile range.") end end @@ -204,7 +212,7 @@ function find_peaks( # If more than two peaks are found, reduce max_quantile to find exactly two peaks if num_peaks > 2 - println("Found more than 2 peaks. Reducing max range. Currently: quantile $max_quantile") + @debug("Found more than 2 peaks. Reducing max range. Currently: quantile $max_quantile") while num_peaks != 2 max_quantile -= 0.01 @@ -217,7 +225,7 @@ function find_peaks( # Safety check to avoid infinite loops if max_quantile <= 0.5 - error("Unable to find exactly two peaks within reasonable quantile range.") + @error("Unable to find exactly two peaks within reasonable quantile range.") end end end diff --git a/src/sipmfit.jl b/src/sipmfit.jl new file mode 100644 index 00000000..a943ddf1 --- /dev/null +++ b/src/sipmfit.jl @@ -0,0 +1,165 @@ + +""" + fit_sipm_spectrum(pe_cal::Vector{<:Real}, min_pe::Real=0.5, max_pe::Real=3.5; + n_mixtures::Int=ceil(Int, (max_pe - min_pe) * 4), nIter::Int=50, nInit::Int=50, + method::Symbol=:kmeans, kind=:diag, Δpe_peak_assignment::Real=0.3, f_uncal::Function=identity, uncertainty::Bool=true) + +Fit a Gaussian Mixture Model to the given pe calibration data and return the fit parameters. + +# Arguments +- `pe_cal::Vector{<:Real}`: the pe calibration data +- `min_pe::Real=0.5`: the minimum pe to consider +- `max_pe::Real=3.5`: the maximum pe to consider +- `n_mixtures::Int=ceil(Int, (max_pe - min_pe) * 4)`: the number of mixtures to fit +- `nIter::Int=50`: the number of iterations for the EM algorithm +- `nInit::Int=50`: the number of initializations for the EM algorithm +- `method::Symbol=:kmeans`: the method to use for initialization +- `kind::Symbol=:diag`: the kind of covariance matrix to use +- `Δpe_peak_assignment::Real=0.3`: the range to consider for peak assignment +- `f_uncal::Function=identity`: the function to use for uncalibration +- `uncertainty::Bool=true`: whether to calculate the uncertainty + +# Returns +- `result`: a tuple with the fit parameters +- `report`: a tuple with the fit report which can be plotted via a recipe +""" +function fit_sipm_spectrum(pe_cal::Vector{<:Real}, min_pe::Real=0.5, max_pe::Real=3.5; + n_mixtures::Int=ceil(Int, (max_pe - min_pe) * 4), nIter::Int=50, nInit::Int=50, + method::Symbol=:kmeans, kind=:diag, Δpe_peak_assignment::Real=0.3, f_uncal::Function=identity, uncertainty::Bool=true) + + # first filter peak positions out of amplitude vector + amps_fit = filter(in(min_pe..max_pe), pe_cal) + + # reshape necessary to deal with the GMM + dmat = reshape(amps_fit, length(amps_fit), 1) + + # set up mixture model with given number of mixtures + gmm = GMM(n_mixtures, dmat; method=method, nInit=nInit, nIter=nIter, kind=kind) + + # get mixture model out of EM best fit estimate + gmm_dist = MixtureModel(gmm) + + # get Gauss center and weights vector out of gmm + μ_ml = reshape(gmm.μ, n_mixtures) + σ_ml = sqrt.(reshape(gmm.Σ, n_mixtures)) + w_ml = gmm.w + + # PE positions to be determined are all integers up to the max_pe + pes = ceil(Int, min_pe):1:floor(Int, max_pe) + + # calculate bin width for histogram + bin_width = get_friedman_diaconis_bin_width(filter(in((-Δpe_peak_assignment..Δpe_peak_assignment) .+ first(pes)), pe_cal)) + + # create gof NamedTuple + gof, gof_report = NamedTuple(), NamedTuple() + + if uncertainty + # define loglikelihood function for binned data to enhance speed + h = fit(Histogram, pe_cal, minimum(amps_fit):bin_width:maximum(amps_fit)) + + # create vector of all parameters + μσw_ml = vcat(μ_ml, σ_ml, w_ml) + + # define loglikelihood function + f_loglike = let n=n_mixtures, bin_edges=only(h.edges), bin_counts=h.weights + μσw -> -_gmm_binned_loglike_func(μσw[1:n], μσw[n+1:2*n], μσw[2*n+1:end], bin_edges)(bin_counts) + end + # Calculate the Hessian matrix using ForwardDiff + H = ForwardDiff.hessian(f_loglike, μσw_ml) + + # Calculate the parameter covariance matrix + param_covariance_raw = inv(H) + param_covariance = nearestSPD(param_covariance_raw) + + # Extract the parameter uncertainties + μσw_ml_err = sqrt.(abs.(diag(param_covariance))) + μ_err, σ_err, w_err = μσw_ml_err[1:n_mixtures], μσw_ml_err[n_mixtures+1:2*n_mixtures], μσw_ml_err[2*n_mixtures+1:end] + + # create fit function + fit_function = let n=n_mixtures, total_counts=sum(h.weights) + (x, μσw) -> sum(h.weights) .* sum(μσw[2*n+1:end] .* pdf.(Normal.(μσw[1:n], μσw[n+1:2*n]), x)) + end + + # calculate p-value + pval, chi2, dof = p_value_poissonll(fit_function, h, μσw_ml) # based on likelihood ratio + + # calculate normalized residuals + residuals, residuals_norm, _, bin_centers = get_residuals(fit_function, h, μσw_ml) + + gof = (pvalue = pval, + chi2 = chi2, + dof = dof, + covmat = param_covariance, + mean_residuals = mean(residuals_norm), + median_residuals = median(residuals_norm), + std_residuals = std(residuals_norm)) + gof_report = merge(gof, (residuals = residuals, + residuals_norm = residuals_norm, + bin_centers = bin_centers)) + + μ, σ, w = measurement.(μ_ml, μ_err), measurement.(σ_ml, σ_err), measurement.(w_ml, w_err) + else + μ, σ, w = measurement.(μ_ml, Ref(NaN)), measurement.(σ_ml, Ref(NaN)), measurement.(w_ml,Ref(NaN)) + end + + # get pe_pos + get_pe_pos = pe -> dot(μ[in.(μ, (-Δpe_peak_assignment..Δpe_peak_assignment) .+ pe)], w[in.(μ, (-Δpe_peak_assignment..Δpe_peak_assignment) .+ pe)] ) / sum(w[in.(μ, (-Δpe_peak_assignment..Δpe_peak_assignment) .+ pe)]) + n_pos_mixtures = [count(in.(μ, (-Δpe_peak_assignment..Δpe_peak_assignment) .+ pe)) for pe in pes] + + pe_pos = get_pe_pos.(pes) + + # create return histogram for report + h_cal = fit(Histogram, pe_cal, ifelse(min_pe >= 0.5, min_pe-0.5, min_pe):bin_width:max_pe+0.5) + + result = ( + μ = μ, + σ = σ, + w = w, + n_pos_mixtures = n_pos_mixtures, + n_mixtures = n_mixtures, + peaks = pes, + positions_cal = pe_pos, + positions = f_uncal.(pe_pos), + gof = gof + ) + report = ( + h_cal = h_cal, + f_fit = x -> pdf(gmm_dist, x) * length(amps_fit) * bin_width, + f_fit_components = (x, i) -> length(amps_fit) * bin_width * w_ml[i] * pdf(Normal(μ_ml[i], σ_ml[i]), x), + min_pe = min_pe, + max_pe = max_pe, + bin_width = bin_width, + n_mixtures = result.n_mixtures, + n_pos_mixtures = result.n_pos_mixtures, + peaks = result.peaks, + positions = result.positions_cal, + μ = result.μ, + gof = gof_report + ) + return result, report +end +export fit_sipm_spectrum + + +function _gmm_calc_p_bin( + mix_μ::AbstractVector{<:Real}, mix_σ::AbstractVector{<:Real}, mix_w::AbstractVector{<:Real}, + bin_edges::AbstractVector{<:Real} +) + edge_cdf = vec(sum(mix_w .* cdf.(Normal.(mix_μ, mix_σ), bin_edges'), dims = 1)) + renorm_edge_cdf = (edge_cdf .- edge_cdf[begin]) .* inv(edge_cdf[end] - edge_cdf[begin]) + diff(renorm_edge_cdf) +end + +function _gmm_binned_loglike_func( + mix_μ::AbstractVector{<:Real}, mix_σ::AbstractVector{<:Real}, + mix_w::AbstractVector{<:Real}, bin_edges::AbstractVector{<:Real} +) + p_bin = _gmm_calc_p_bin(mix_μ, mix_σ, mix_w, bin_edges) + bin_widths = diff(bin_edges) + binned_density = p_bin ./ bin_widths + # Without permutation correction to get values similar to unbinned: + f_loglike(bin_counts) = sum( + xlogy.(bin_counts, binned_density) + ) + return f_loglike +end \ No newline at end of file