legend-exp · theHenks · Feb 6, 2024 · Jan 29, 2024 · Jan 30, 2024 · Jan 30, 2024
diff --git a/Project.toml b/Project.toml
@@ -9,13 +9,15 @@ BAT = "c0cd4b16-88b7-57fa-983b-ab80aecada7e"
 ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
 DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
 InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112"
 IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6"
 LegendDataManagement = "9feedd95-f0e0-423f-a8dc-de0970eae6b3"
+LegendDataTypes = "99e09c13-5545-5ee2-bfa2-77f358fb75d8"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LinearRegression = "92481ed7-9fb7-40fd-80f2-46fd0f076581"
 LsqFit = "2fda8390-95c7-5789-9bda-21331edee243"
@@ -56,7 +58,6 @@ InverseFunctions = "0.1"
 IrrationalConstants = "0.1, 0.2"
 LegendDataManagement = "0.2.7"
 LinearAlgebra = "1"
-LinearAlgebra = "1"
 LinearRegression = "0.2"
 LsqFit = "0.14, 0.15"
 Optim = "1"

diff --git a/ext/LegendSpecFitsRecipesBaseExt.jl b/ext/LegendSpecFitsRecipesBaseExt.jl
@@ -374,4 +374,5 @@ end
 
 end
 
+
 end # module LegendSpecFitsRecipesBaseExt
diff --git a/src/LegendSpecFits.jl b/src/LegendSpecFits.jl
@@ -51,7 +51,7 @@ include("aoe_calibration.jl")
 include("specfit_combined.jl")
 include("ctc.jl")
 include("qc.jl")
-
+include("gof.jl")
 include("precompile.jl")
 
 end # module
diff --git a/src/gof.jl b/src/gof.jl
@@ -0,0 +1,176 @@
+# This file is a part of LegendSpecFits.jl, licensed under the MIT License (MIT).
+
+
+"""
+    _prepare_data(h::Histogram{<:Real,1})
+aux. function to convert histogram data into bin edges, bin width and bin counts
+"""
+function _prepare_data(h::Histogram{<:Real,1})
+    # get bin center, width and counts from histogrammed data
+    bin_edges = first(h.edges)
+    counts = h.weights
+    bin_centers = (bin_edges[begin:end-1] .+ bin_edges[begin+1:end]) ./ 2
+    bin_widths = bin_edges[begin+1:end] .- bin_edges[begin:end-1]
+    return counts, bin_widths, bin_centers
+end
+
+
+"""
+    _get_model_counts(f_fit::Base.Callable,v_ml::NamedTuple,bin_centers::StepRangeLen,bin_widths::StepRangeLen)
+aux. function to get modelled peakshape based on  histogram binning and best-fit parameter
+"""
+function _get_model_counts(f_fit::Base.Callable,v_ml::NamedTuple,bin_centers::StepRangeLen,bin_widths::StepRangeLen)
+    model_func  = Base.Fix2(f_fit, v_ml) # fix the fit parameters to ML best-estimate
+    model_counts = bin_widths.*map(energy->model_func(energy), bin_centers) # evaluate model at bin center (= binned measured energies)
+    return model_counts
+end
+
+
+
+""" 
+    p_value(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::NamedTuple) 
+calculate p-value based on least-squares
+baseline method to get goodness-of-fit (gof)
+# input:
+ * `f_fit`function handle of fit function (peakshape)
+ * `h` histogram of data
+ * `v_ml` best-fit parameters
+# returns:
+ * `pval` p-value of chi2 test
+ * `chi2` chi2 value
+ * `dof` degrees of freedom
+"""
+function p_value(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::NamedTuple)
+    # prepare data
+    counts, bin_widths, bin_centers = _prepare_data(h)
+
+    # get peakshape of best-fit 
+    model_counts = _get_model_counts(f_fit, v_ml, bin_centers,bin_widths)
+
+    # calculate chi2
+    chi2    = sum((model_counts[model_counts.>0]-counts[model_counts.>0]).^2 ./ model_counts[model_counts.>0])
+    npar    = length(v_ml)
+    dof    = length(counts[model_counts.>0])-npar
+    pval    = ccdf(Chisq(dof),chi2)
+    if any(model_counts.<=5)
+        @warn "WARNING: bin with <=$(round(minimum(model_counts),digits=0)) counts -  chi2 test might be not valid"
+    else  
+        @debug "p-value = $(round(pval,digits=2))"
+    end
+    return pval, chi2, dof
+end
+export p_value
+
+
+""" 
+    p_value_LogLikeRatio(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::NamedTuple)
+alternative p-value via loglikelihood ratio
+"""
+function p_value_LogLikeRatio(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::NamedTuple)
+    # prepare data
+    counts, bin_widths, bin_centers = _prepare_data(h)
+
+    # get peakshape of best-fit 
+    model_counts = _get_model_counts(f_fit, v_ml, bin_centers,bin_widths)
+
+    # calculate chi2
+    chi2    = sum((model_counts[model_counts.>0]-counts[model_counts.>0]).^2 ./ model_counts[model_counts.>0])
+    npar    = length(v_ml)
+    dof    = length(counts[model_counts.>0])-npar
+    pval    = ccdf(Chisq(dof),chi2)
+    if any(model_counts.<=5)
+        @warn "WARNING: bin with <=$(minimum(model_counts)) counts -  chi2 test might be not valid"
+    else  
+        @debug "p-value = $(round(pval,digits=2))"
+    end
+    chi2   = 2*sum(model_counts.*log.(model_counts./counts)+model_counts-counts)
+    pval   = ccdf(Chisq(dof),chi2)
+return pval, chi2, dof
+end
+export p_value_LogLikeRatio
+
+"""
+    p_value_MC(f_fit::Base.Callable, h::Histogram{<:Real,1},ps::NamedTuple{(:peak_pos, :peak_fwhm, :peak_sigma, :peak_counts, :mean_background)},v_ml::NamedTuple,;n_samples::Int64=1000) 
+alternative p-value calculation via Monte Carlo sampling. **Warning**: computational more expensive than p_vaule() and p_value_LogLikeRatio()
+# Input:
+ * `f_fit`function handle of fit function (peakshape)
+ * `h` histogram of data
+ * `ps` best-fit parameters
+ * `v_ml` best-fit parameters
+ * `n_samples` number of samples
+
+# Performed Steps:
+* Create n_samples randomized histograms. For each bin, samples are drawn from a Poisson distribution with λ = model peak shape (best-fit parameter)
+* Each sample histogram is fit using the model function `f_fit`
+* For each sample fit, the max. loglikelihood fit is calculated 
+
+# Returns
+* % p value --> comparison of sample max. loglikelihood and max. loglikelihood of best-fit
+"""
+function p_value_MC(f_fit::Base.Callable, h::Histogram{<:Real,1},ps::NamedTuple{(:peak_pos, :peak_fwhm, :peak_sigma, :peak_counts, :mean_background)},v_ml::NamedTuple,;n_samples::Int64=1000)
+    counts, bin_widths, bin_centers = _prepare_data(h) # get data 
+    # get peakshape of best-fit and maximum likelihood value
+    model_func  = Base.Fix2(f_fit, v_ml) # fix the fit parameters to ML best-estimate
+    model_counts = bin_widths.*map(energy->model_func(energy), bin_centers) # evaluate model at bin center (= binned measured energies)
+    loglike_bf = -hist_loglike(model_func,h) 
+
+    # draw sample for each bin
+    dists = Poisson.(model_counts) # create poisson distribution for each bin
+    counts_mc_vec = rand.(dists,n_samples) # randomized histogram counts
+    counts_mc = [ [] for _ in 1:n_samples ] #re-structure data_samples_vec to array of arrays, there is probably a better way to do this...
+    for i = 1:n_samples
+        counts_mc[i] = map(x -> x[i],counts_mc_vec)
+    end
+
+    # fit every sample histogram and calculate max. loglikelihood
+    loglike_bf_mc = NaN.*ones(n_samples)
+    h_mc = h # make copy of data histogram
+    for i=1:n_samples
+        h_mc.weights = counts_mc[i] # overwrite counts with MC values
+        result_fit_mc, report = fit_single_peak_th228(h_mc, ps ; uncertainty=false) # fit MC histogram
+        fit_par_mc   = result_fit_mc[(:μ, :σ, :n, :step_amplitude, :skew_fraction, :skew_width, :background)]
+        model_func_sample  = Base.Fix2(f_fit, fit_par_mc) # fix the fit parameters to ML best-estimate
+        loglike_bf_mc[i] = -hist_loglike(model_func_sample,h_mc) # loglikelihood for best-fit
+    end
+
+    # calculate p-value
+    pval= sum(loglike_bf_mc.<=loglike_bf)./n_samples # preliminary. could be improved e.g. with interpolation
+    return pval 
+end
+export p_value_MC
+
+""" 
+    residuals(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::NamedTuple)
+Calculate bin-wise residuals and normalized residuals. 
+Calcualte bin-wise p-value based on poisson distribution for each bin.
+
+# Input:
+ * `f_fit`function handle of fit function (peakshape)
+ * `h` histogram of data
+ * `v_ml` best-fit parameters
+
+# Returns:
+ * `residuals` difference: model - data (histogram bin count)
+ * `residuals_norm` normalized residuals: model - data / sqrt(model)
+ * `p_value_binwise` p-value for each bin based on poisson distribution
+"""
+function get_residuals(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::NamedTuple)
+    # prepare data
+    counts, bin_widths, bin_centers = _prepare_data(h)
+
+    # get peakshape of best-fit 
+    model_counts = _get_model_counts(f_fit, v_ml, bin_centers,bin_widths)
+
+    # calculate bin-wise residuals 
+    residuals    = model_counts[model_counts.>0]-counts[model_counts.>0]
+    sigma        = sqrt.(model_counts[model_counts.>0])
+    residuals_norm = residuals./sigma
+
+    # calculate something like a bin-wise p-value (in case that makes sense)
+    dist = Poisson.(model_counts) # each bin: poisson distributed 
+    cdf_value_low = cdf.(dist, model_counts.-abs.(residuals)) 
+    cdf_value_up  = 1 .-cdf.(dist, model_counts.+abs.(residuals))  
+    p_value_binwise = cdf_value_low .+ cdf_value_up # significance of residuals -> ~proabability that residual (for a given bin) is as large as observed or larger
+    return residuals, residuals_norm, p_value_binwise, bin_centers
+end
+
diff --git a/src/specfit.jl b/src/specfit.jl
@@ -8,7 +8,6 @@ th228_fit_functions = (
     f_bck = (x, v) -> background_peakshape(x, v.μ, v.σ, v.step_amplitude, v.background),
     f_sigWithTail = (x, v) -> signal_peakshape(x, v.μ, v.σ, v.n, v.skew_fraction) + lowEtail_peakshape(x, v.μ, v.σ, v.n, v.skew_fraction, v.skew_width)
 )
-
 """
     estimate_single_peak_stats(h::Histogram, calib_type::Symbol=:th228)
 
@@ -38,7 +37,6 @@ function estimate_single_peak_stats(h::Histogram,; calib_type::Symbol=:th228)
 end
 export estimate_single_peak_stats
 
-
 function estimate_single_peak_stats_th228(h::Histogram{T}) where T<:Real
     W = h.weights
     E = first(h.edges)
@@ -88,16 +86,16 @@ Perform a fit of the peakshape to the data in `peakhists` using the initial valu
     * `peak_fit_plots`: array of plots of the peak fits
     * `return_vals`: dictionary of the fit results
 """
-function fit_peaks(peakhists::Array, peakstats::StructArray, th228_lines::Array,; calib_type::Symbol=:th228, uncertainty::Bool=true, low_e_tail::Bool=true)
+function fit_peaks(peakhists::Array, peakstats::StructArray, th228_lines::Array,; calib_type::Symbol=:th228, uncertainty::Bool=true, low_e_tail::Bool=true,iterative_fit::Bool=false)
     if calib_type == :th228
-        return fit_peaks_th228(peakhists, peakstats, th228_lines,; uncertainty=uncertainty, low_e_tail=low_e_tail)
+        return fit_peaks_th228(peakhists, peakstats, th228_lines,; uncertainty=uncertainty, low_e_tail=low_e_tail,iterative_fit=iterative_fit)
     else
         error("Calibration type not supported")
     end
 end
 export fit_peaks
 
-function fit_peaks_th228(peakhists::Array, peakstats::StructArray, th228_lines::Array{T},; uncertainty::Bool=true, low_e_tail::Bool=true) where T<:Any
+function fit_peaks_th228(peakhists::Array, peakstats::StructArray, th228_lines::Array{T},; uncertainty::Bool=true, low_e_tail::Bool=true, iterative_fit::Bool=false) where T<:Any
     # create return and result dicts
     result = Dict{T, NamedTuple}()
     report = Dict{T, NamedTuple}()
@@ -108,6 +106,17 @@ function fit_peaks_th228(peakhists::Array, peakstats::StructArray, th228_lines::
         ps = peakstats[i]
         # fit peak
         result_peak, report_peak = fit_single_peak_th228(h, ps, ; uncertainty=uncertainty, low_e_tail=low_e_tail)
+
+        # check covariance matrix for being semi positive definite (no negative uncertainties)
+        if uncertainty
+            if iterative_fit && !isposdef(result_peak.covmat)
+                @warn "Covariance matrix not positive definite for peak $peak - repeat fit without low energy tail"
+                pval_save = result_peak.pval
+                result_peak, report_peak = fit_single_peak_th228(h, ps, ; uncertainty=uncertainty, low_e_tail=false)
+                @info "New covariance matrix is positive definite: $(isposdef(result_peak.covmat))"
+                @info "p-val with low-energy tail  p=$(round(pval_save,digits=5)) , without low-energy tail: p=$(round((result_peak.pval),digits=5))"
+                end
+        end
         # save results
         result[peak] = result_peak
         report[peak] = report_peak
@@ -124,7 +133,9 @@ Also, FWHM is calculated from the fitted peakshape with MC error propagation. Th
     * `result`: NamedTuple of the fit results containing values and errors
     * `report`: NamedTuple of the fit report which can be plotted
 """
-function fit_single_peak_th228(h::Histogram, ps::NamedTuple{(:peak_pos, :peak_fwhm, :peak_sigma, :peak_counts, :mean_background), NTuple{5, T}}; uncertainty::Bool=true, low_e_tail::Bool=true, fixed_position::Bool=false, pseudo_prior::NamedTupleDist=NamedTupleDist(empty = true)) where T<:Real
+function fit_single_peak_th228(h::Histogram, ps::NamedTuple{(:peak_pos, :peak_fwhm, :peak_sigma, :peak_counts, :mean_background), NTuple{5, T}}; 
+    uncertainty::Bool=true, low_e_tail::Bool=true, fixed_position::Bool=false, pseudo_prior::NamedTupleDist=NamedTupleDist(empty = true),
+    fit_fun::Symbol=:f_fit) where T<:Real
     # create standard pseudo priors
     standard_pseudo_prior = NamedTupleDist(
         μ = ifelse(fixed_position, ConstValueDist(ps.peak_pos), Uniform(ps.peak_pos-10, ps.peak_pos+10)),
@@ -152,8 +163,8 @@ function fit_single_peak_th228(h::Histogram, ps::NamedTuple{(:peak_pos, :peak_fw
     # start values for MLE
     v_init = mean(pseudo_prior)
 
-    # create loglikehood function
-    f_loglike = let f_fit=th228_fit_functions.f_fit, h=h
+    # create loglikehood function: f_loglike(v) that can be evaluated for any set of v (fit parameter)
+    f_loglike = let f_fit=th228_fit_functions[fit_fun], h=h
         v -> hist_loglike(Base.Fix2(f_fit, v), h)
     end
 
@@ -163,37 +174,45 @@ function fit_single_peak_th228(h::Histogram, ps::NamedTuple{(:peak_pos, :peak_fw
     # best fit results
     v_ml = inverse(f_trafo)(Optim.minimizer(opt_r))
 
-    f_loglike_array = let f_fit=gamma_peakshape, h=h
-        v -> - hist_loglike(x -> f_fit(x, v...), h)
+    f_loglike_array = let f_fit=th228_fit_functions[fit_fun], h=h, v_keys = keys(standard_pseudo_prior) #same loglikelihood function as f_loglike, but has array as input instead of NamedTuple
+        v ->  - hist_loglike(    x -> f_fit(x,NamedTuple{v_keys}(v)), h) 
     end
 
     if uncertainty
         # Calculate the Hessian matrix using ForwardDiff
         H = ForwardDiff.hessian(f_loglike_array, tuple_to_array(v_ml))
 
         # Calculate the parameter covariance matrix
-        param_covariance = inv(H)
-
+        param_covariance_raw = inv(H)
+        param_covariance = nearestSPD(param_covariance_raw)
+
         # Extract the parameter uncertainties
         v_ml_err = array_to_tuple(sqrt.(abs.(diag(param_covariance))), v_ml)
 
+        # calculate p-value
+        pval, chi2, dof = p_value(th228_fit_functions.f_fit, h, v_ml)
+
+        # calculate normalized residuals
+        residuals, residuals_norm, p_value_binwise, bin_centers =  get_residuals(th228_fit_functions.f_fit, h, v_ml)
+
         # get fwhm of peak
-        fwhm, fwhm_err = get_peak_fwhm_th228(v_ml, v_ml_err)
+        fwhm, fwhm_err = get_peak_fwhm_th228(v_ml, param_covariance)
 
         @debug "Best Fit values"
         @debug "μ: $(v_ml.μ) ± $(v_ml_err.μ)"
         @debug "σ: $(v_ml.σ) ± $(v_ml_err.σ)"
         @debug "n: $(v_ml.n) ± $(v_ml_err.n)"
+        @debug "p: $pval , chi2 = $(chi2) with $(dof) dof"
         @debug "FWHM: $(fwhm) ± $(fwhm_err)"
-
-        result = merge(v_ml, (fwhm = fwhm, err = merge(v_ml_err, (fwhm = fwhm_err,))))
+    
+        result = merge(v_ml, (pval = pval, chi2 = chi2, dof = dof, fwhm = fwhm,covmat = param_covariance, covmat_raw = param_covariance_raw,residuals = residuals, residuals_norm = residuals_norm, p_value_binwise= p_value_binwise,bin_centers = bin_centers,),(err = merge(v_ml_err, (fwhm = fwhm_err,)),))
         report = (
             v = v_ml,
             h = h,
             f_fit = x -> Base.Fix2(th228_fit_functions.f_fit, v_ml)(x),
             f_sig = x -> Base.Fix2(th228_fit_functions.f_sig, v_ml)(x),
             f_lowEtail = x -> Base.Fix2(th228_fit_functions.f_lowEtail, v_ml)(x),
-            f_bck = x -> Base.Fix2(th228_fit_functions.f_bck, v_ml)(x)
+            f_bck = x -> Base.Fix2(th228_fit_functions.f_bck, v_ml)(x),
         )
     else
         # get fwhm of peak
@@ -240,8 +259,6 @@ function estimate_fwhm(v::NamedTuple)
         return NaN
     end
 end
-
-
 """
     get_peak_fwhm_th228(v_ml::NamedTuple, v_ml_err::NamedTuple)
 Get the FWHM of a peak from the fit parameters while performing a MC error propagation.
@@ -250,22 +267,27 @@ Get the FWHM of a peak from the fit parameters while performing a MC error propa
     * `fwhm`: the FWHM of the peak
     * `fwhm_err`: the uncertainty of the FWHM of the peak
 """
-function get_peak_fwhm_th228(v_ml::NamedTuple, v_ml_err::NamedTuple, uncertainty::Bool=true)
+function get_peak_fwhm_th228(v_ml::NamedTuple, v_ml_err::Union{Matrix,NamedTuple},uncertainty::Bool=true)
     # get fwhm for peak fit
     fwhm = estimate_fwhm(v_ml)
     if !uncertainty
         return fwhm, NaN
     end
+
     # get MC for FWHM err
-    v_mc = get_mc_value_shapes(v_ml, v_ml_err, 1000)
+    if isa(v_ml_err,Matrix)# use correlated fit parameter uncertainties 
+        v_mc = get_mc_value_shapes(v_ml, v_ml_err, 10000)
+    elseif isa(v_ml_err,NamedTuple) # use uncorrelated fit parameter uncertainties 
+        v_mc = get_mc_value_shapes(v_ml, v_ml_err, 1000)
+    end
     fwhm_mc = estimate_fwhm.(v_mc)
     fwhm_err = std(fwhm_mc[isfinite.(fwhm_mc)])
     return fwhm, fwhm_err
 end
-
+export get_peak_fwhm_th228
 
 """
-    fitCalibration
+    fitCalibration(peaks::Array, μ::Array)
 Fit the calibration lines to a linear function.
 # Returns
     * `slope`: the slope of the linear fit
Original file line number	Diff line number	Diff line change
Expand Up		@@ -374,4 +374,5 @@ end

		end


		end # module LegendSpecFitsRecipesBaseExt